In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
df = pd.read_csv("../input/results-vinbin/results/tmp_debug/submission.csv")
df2 = pd.read_csv("../input/241solution/submission.csv")
df2

# ConvXray

In [None]:
########################################################################################################################
#                                                      IMPORTS                                                         #
########################################################################################################################

import os
from PIL import Image
import pandas as pd
from tqdm.auto import tqdm
from collections import Counter
from typing import Any, Dict
import cv2
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import torch
import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut


########################################################################################################################
#                                                Class definition                                                      #
########################################################################################################################


# ------------------------------------#
#         USEFUL FUNCTIONS            #
# ------------------------------------#

def read_xray(path, voi_lut=True, fix_monochrome=True):
    # Original from: https://www.kaggle.com/raddar/convert-dicom-to-np-array-the-correct-way
    dicom = pydicom.read_file(path)
    # VOI LUT (if available by DICOM device) is used to transform raw DICOM data to "human-friendly" view
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array
    # depending on this value, X-ray may look inverted - fix that:
    if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data
    data = data - np.min(data)
    data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
    return data


def resize(array, size, keep_ratio=False, resample=Image.LANCZOS):
    # Original from: https://www.kaggle.com/xhlulu/vinbigdata-process-and-resize-to-image
    im = Image.fromarray(array)
    if keep_ratio:
        im.thumbnail((size, size), resample)
    else:
        im = im.resize((size, size), resample)
    return im


# DRAW BOUNDING-BOXES #
def draw_bboxes(img, tl, br, rgb, score, label="", label_location="tl", opacity=0.1, line_thickness=0, font_scale=0.2,
                font_thickness=1):
    """ Draw bounding boxes in an image"""
    box = np.uint8(np.ones((br[1] - tl[1], br[0] - tl[0], 3)) * rgb)
    sub_combo = cv2.addWeighted(img[tl[1]:br[1], tl[0]:br[0], :], 1 - opacity, box, opacity, 1.0)
    img[tl[1]:br[1], tl[0]:br[0], :] = sub_combo
    if line_thickness > 0:
        img = cv2.rectangle(img, tuple(tl), tuple(br), rgb, line_thickness)
    if label:
        font = cv2.FONT_HERSHEY_SIMPLEX
        font_line_type = cv2.LINE_AA
        label = label.upper()
        text_width, text_height = cv2.getTextSize(label, font, font_scale, font_thickness)[0]
        label_origin = {"tl": tl, "br": br, "tr": (br[0], tl[1]), "bl": (tl[0], br[1])}[label_location]
        label_offset = {
            "tl": np.array([0, -10]), "br": np.array([-text_width, text_height + 10]),
            "tr": np.array([-text_width, -10]), "bl": np.array([0, text_height + 10])
        }[label_location]
        img = cv2.putText(img, label + "(" + str(round(score, 2)) + ")", tuple(label_origin + label_offset), font,
                          font_scale, rgb, font_thickness, font_line_type)
    return img


def show_xray(*img, title: list or str = "", axis: bool = False, size: tuple = (20, 13)):
    """ Show with mathlab plot as many X-rays than passed as argument"""
    plt.figure(figsize=size)
    for n in range(len(img)):
        plt.subplot(len(img), 2, n + 1)
        plt.axis(axis)
        plt.imshow(img[n], cmap="gray")
        if len(title) == n and type(title) == "array" :
            titre = title[n]
        elif type(title) == str:
            titre = title
        else:
            raise ValueError("number of titles don't match with the number of Xray")
        plt.title(titre)
    plt.show()


def predict_bbox(image_to_predict, predictor, resized_width=256, resized_height=256):
    """ Return predictions with labels, scores and bboxes"""
    with torch.no_grad():  # https://github.com/sphinx-doc/sphinx/issues/4258
        inputs_list = []
        img = image_to_predict.copy()
        if predictor.input_format == "RGB":
            img = img[:, :, ::-1]
        height, width = img.shape[:2]
        inputs = {"image": image, "height": height, "width": width}
        inputs_list.append(inputs)
        predictions = predictor.model(inputs_list)
    instances = predictions[0]["instances"]
    if len(instances) == 0:
        pred_classes = 14
        pred_boxes = [0, 0, 1, 1]
        pred_scores = 1.0
    else:
        fields: Dict[str, Any] = instances.get_fields()
        pred_classes, pred_scores, pred_boxes = fields["pred_classes"], fields["scores"], fields["pred_boxes"].tensor
        h_ratio, w_ratio = height / resized_height, width / resized_width
        pred_boxes[:, [0, 2]] *= w_ratio
        pred_boxes[:, [1, 3]] *= h_ratio
        pred_classes, pred_boxes, pred_scores = pred_classes.cpu().numpy(), pred_boxes.cpu().numpy(), pred_scores.cpu().numpy()
    return pred_classes, pred_boxes, pred_scores




# ------------------------------------#
#             XRAY CLASS              #
# ------------------------------------#

class Xray:

    def __init__(self, path: str = "", folder: str = "", name: str = "", extension: str = "",
                 th: float = 0.25, palette: str = "icefire", predictor=False):
        if extension == "":
            self.extension = path
        else:
            self.extension = extension
        if name == "":
            self.name = path
        else:
            self.name = name
        if folder == "":
            self.folder = path
        else:
            self.folder = folder
        self.path = path
        self.image = self.extension
        self.shape = self.image.shape
        self.height, self.width = self.shape[0], self.shape[1]
        self.th = th
        self.palette = [tuple([int(x) for x in np.array(c) * (255, 255, 255)]) for c in sns.color_palette(palette, 15)]
        self.predictor = predictor
    
    @property
    def image(self):
        return self._image
    
    @image.setter
    def image(self,ext):
        if ext == 'png':
            self._image = cv2.imread(self.path)
        elif ext == "dicom":
            self._image = read_xray(self.path)
        else :
            raise ValueError("extention is not possible")
            
    @property
    def path(self):
        return self._path

    @path.setter
    def path(self, value):
        if value == "":
            if self.folder != "" and self.name != "" and self.extension != "":
                self._path = self.folder + "/" + self.name + "." + self.extension
            else:
                raise ValueError("Please provide a complete path or name, folder and extension values")
        else:
            self._path = value

    @property
    def extension(self):
        return self._extension

    @extension.setter
    def extension(self, value):
        possible_extensions = ["png", "dicom","jpg"]
        possible_extensions_txt = 'png, dicom, jpg'
        ext = value.split(".")
        if len(ext) > 1:
            ext = ext[-1]
        if ext in possible_extensions:
            self._extension = ext
        else:
            raise ValueError("Please enter a valid extension (possible extensions : " + possible_extensions_txt)

    @property
    def name(self):
        return self._name

    @name.setter
    def name(self, value):
        split_value = value.split(".")
        if len(split_value) > 1:
            name = split_value[-2]
            self._name = name.split("/")[-1]
        else:
            self._name = split_value

    @property
    def folder(self):
        return self._folder

    @folder.setter
    def folder(self, value):
        split = value.split(".")
        split = "".join(split[:(len(split) - 1)])  # getting rid of the extension
        split = split.split("/")
        fold = "/".join(split[:(len(split) - 1)])  # getting rid of the name
        self._folder = fold

    @property
    def predictor(self):
        return self._predictor

    @predictor.setter
    def predictor(self, value):
        self._predictor = value
        return self._predictor

    def show(self):
        show_xray(self.image, title=self.name)

    def predict_bbox(self, resized_width: int = 256, resized_height: int = 256):
        if not self.predictor:
            raise ValueError('Predictor is missing. Please provide one using Xray.predictor = predictor')
        return predict_bbox(self.image, self.predictor, resized_width, resized_height)

    def process_prediction(self, th=False):
        if not th:
            th = self.th
        labels, boxes, scores = self.predict_bbox()
        processed_scores, processed_labels, processed_boxes = [], [], []
        if len(labels) > 1:
            count_dict = Counter(labels.tolist())
        for score, box, label in zip(scores, boxes, labels):
            score_i = score
            # aortic enlargement
            if int(label) == 0 and count_dict[label] != 1:
                best_score = np.max(scores[np.where(labels == label)])  # best score for aortic enlargement
                if score < best_score:
                    score_i = 0
            # cardiomegaly
            if int(label) == 3:
                score_i = score / 2
                if np.any(labels == 10):  # cardiomegaly + pleuresie => pas de cardiomégalie
                    score_i = 0
            if int(label) == 9:  # other lesion
                score_i = score / 1.3
            print(label + " : " + str(score) + ' ( ' + str(score_i) + ' ) ')
            processed_scores.append(score_i)
            processed_labels.append(label)
            processed_boxes.append(box)
        return processed_labels, processed_boxes, processed_scores

        def predicted_image(self, labels, boxes, scores):
            predicted_img = self.image.copy()
            nb_box = 0
            for label, box, score in labels, boxes, scores:
                if score_i > self.th:
                    predicted_img = draw_bboxes(predicted_img, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])),
                                                self.palette[label], score, label=self.mapping[label],
                                                label_location="tr",
                                                opacity=0.2, line_thickness=1)
                nb_box += 1
                if nb_box == 0:
                    predicted_img = draw_bboxes(predicted_img, (40, self.height - 40), (self.width - 40, self.height),
                                                self.palette[14],
                                                1 - scores[0], label="NORMAL", label_location="tl", opacity=0.2,
                                                line_thickness=1)

        return predicted_img


    
class Xray_dataset:
    
    def __init__(self,files):
        self.files = [Xray(file) for file in files]

        
    
    
    
    
    
    
    
    
    
from typing import Any
import yaml

def save_yaml(filepath: str, content: Any, width: int = 120):
    with open(filepath, "w") as f:
        yaml.dump(content, f, width=width)
    
    
    
    
    
from dataclasses import dataclass, field
from typing import Dict, Any, Tuple, Union, List


@dataclass
class Flags:
    # General
    debug: bool = True
    outdir: str = "results/det"
    device: str = "cuda:0"

    # Data config
    imgdir_name: str = "vinbigdata-chest-xray-resized-png-256x256"
    # split_mode: str = "all_train"  # all_train or valid20
    seed: int = 111
    target_fold: int = 0  # 0~4
    label_smoothing: float = 0.0
    # Model config
    model_name: str = "resnet18"
    model_mode: str = "normal"  # normal, cnn_fixed supported
    # Training config
    epoch: int = 20
    batchsize: int = 8
    valid_batchsize: int = 16
    num_workers: int = 4
    snapshot_freq: int = 5
    ema_decay: float = 0.999  # negative value is to inactivate ema.
    scheduler_type: str = ""
    scheduler_kwargs: Dict[str, Any] = field(default_factory=lambda: {})
    scheduler_trigger: List[Union[int, str]] = field(default_factory=lambda: [1, "iteration"])
    aug_kwargs: Dict[str, Dict[str, Any]] = field(default_factory=lambda: {})
    mixup_prob: float = -1.0  # Apply mixup augmentation when positive value is set.

    def update(self, param_dict: Dict) -> "Flags":
        # Overwrite by `param_dict`
        for key, value in param_dict.items():
            if not hasattr(self, key):
                raise ValueError(f"[ERROR] Unexpected key for flag = {key}")
            setattr(self, key, value)
        return self
    
    
    
#####################################################
#  TESTS


if __name__ == "__main__":
    print('tests went good')

In [None]:
n=15
name=df2.loc[n,"image_id"]
pred = df2.loc[n,"PredictionString"]
xray = Xray("../input/vinbigdata-chest-xray-abnormalities-detection/test/"+name+".dicom")
xray.show()

splited = pred.split(" ")
result = {"label" : [],"score" : [],'xmin' : [],"ymin" : [],"xmax" : [], "ymax" : []}

for n in range(len(splited)//6):
    result["label"].append(splited[n*6])
    result["score"].append(splited[n*6+1])
    result["xmin"].append(splited[n*6+2])
    result["ymin"].append(splited[n*6+3])
    result["xmax"].append(splited[n*6+4])
    result["ymax"].append(splited[n*6+5])
    

result_df = pd.DataFrame(result)
result_df
n=1

In [None]:

from collections import namedtuple


Rectangle = namedtuple('Rectangle', 'xmin ymin xmax ymax')

ra = Rectangle(3., 3., 5., 5.)
rb = Rectangle(1., 1., 4., 3.5)
# intersection here is (3, 3, 4, 3.5), or an area of 1*.5=.5

def intersection(a, b,normalisation=1):
    dx = min(a.xmax, b.xmax) - max(a.xmin, b.xmin)
    dy = min(a.ymax, b.ymax) - max(a.ymin, b.ymin)
    if (dx>=0) and (dy>=0):
        return dx*dy/normalisation
    else :
        return 0
    

print(intersection(ra, rb))

In [None]:
k = 90

In [None]:
import seaborn as sns

mode = 'compet'
normale = True

while normale:
    normale = False
    k +=1
    print(k)
    name=df2.loc[k,"image_id"]
    pred = df2.loc[k,"PredictionString"]
    xray = Xray("../input/vinbigdata-chest-xray-abnormalities-detection/test/"+name+".dicom")

    palette = "icefire"
    palette = [tuple([int(x) for x in np.array(c) * (255, 255, 255)]) for c in sns.color_palette(palette, 15)]
    predicted = xray.image
    predicted = cv2.cvtColor(predicted,cv2.COLOR_GRAY2RGB)
    
    mapping = {0: 'Elargissement aortique', 1: 'Atelectasie', 2: 'Calcification', 3: 'Cardiomegalie',
               4: 'Sd alveolaire', 5: 'Sd Interstitiel',6: 'Infiltration', 7: 'Opacite', 8: 'Nodule/Masse',
               9: 'Autre', 10: 'Pleuresie', 11: 'Epaississement pleural',12: 'Pneumothorax',
               13: 'Fibrose', 14: 'normal'}
    decision = {(0,0) : 'fusion',(0,1):'possibleifBig&Right',(0,2):'possible',(0,3):'specificCMG',(0,4):"?",(0,5):'?',(0,6):'?',(0,7):'?',(0,8):"?",(0,9):"possible",(0,10):"impossible",(0,11):"?",(0,12):"possible",(0,13):"possible",
               (1,0):'-',(1,2):"?",(1,3):'possibleifBig&Side'}

    splited = pred.split(" ")
    
    result = {"label" : [],"score" : [],'xmin' : [],"ymin" : [],"xmax" : [], "ymax" : []}
    for n in range(len(splited)//6):
        result["label"].append(splited[n*6])
        result["score"].append(splited[n*6+1])
        result["xmin"].append(splited[n*6+2])
        result["ymin"].append(splited[n*6+3])
        result["xmax"].append(splited[n*6+4])
        result["ymax"].append(splited[n*6+5])

    result_df = pd.DataFrame(result)

    ####################

    TH = 0.3
    nb = 0
    result_df_th = result_df[result_df["score"].astype("float") >= 0.4]
    result_df_th=result_df_th.reset_index()
    del result_df_th['index']
    result_df_th["label"] = result_df_th["label"].astype("int")
    result_df_th["xmin"] = result_df_th["xmin"].astype("int")
    result_df_th["ymin"] = result_df_th["ymin"].astype("int")
    result_df_th["xmax"] = result_df_th["xmax"].astype("int")
    result_df_th["ymax"] = result_df_th["ymax"].astype("int")

    if len(result_df_th[result_df_th['label'] == 14]) > 1:
        if len(result_df_th[result_df_th['label'] != 14]) < 1:
            pd.DataFrame.drop(pd.index(result_df_th[result_df_th['label'].astype('int') == 14][result_df_th['score'].astype('float') != 1]),axis=0, inplace=True)
        
    if len(result_df_th[result_df_th['label'] == 14]) == 1 or len(result_df_th[result_df_th['label'] != 14]) > 10 or len(result_df_th[result_df_th['label'] != 0]) < 2:
        print('NORMAL, next Xray')
        normale = True
        
        

        
        
if len(result_df_th) >= 1:
    for n in range(len(result_df_th)):
    ################################################################################################################
    
        for j in range(len(result_df_th)):
            if j != n : 
                inter = intersection(Rectangle(result_df_th.loc[j,"xmin"],result_df_th.loc[j,"ymin"],result_df_th.loc[j,"xmax"],result_df_th.loc[j,"ymax"]),
                                     Rectangle(result_df_th.loc[n,"xmin"],result_df_th.loc[n,"ymin"],result_df_th.loc[n,"xmax"],result_df_th.loc[n,"ymax"]),
                                    (result_df_th.loc[j,"xmax"] - result_df_th.loc[j,"xmin"])*(result_df_th.loc[j,"ymax"] - result_df_th.loc[j,"ymin"]) ) 
                if inter != 0:
                    print(f'intersection of {inter} between {mapping[result_df_th.loc[n,"label"]]} ({result_df_th.loc[n,"score"]}) and {mapping[result_df_th.loc[j,"label"]]} ({result_df_th.loc[j,"score"]})')
                    
                    # Si intersection avec other et autre probable, delete other
                    if inter > 0.6:
                        
                        # Si intersection avec other et autre probable, delete other
                        if result_df_th.loc[n,"label"] == 9 and result_df_th.loc[j,"score"] > 0.5 and result_df_th.loc[j,"score"] > 2*result_df_th.loc[n,"score"] :
                            print('this {mapping[result_df_th.loc[n,"label"]]} is deleted')
                            pd.DataFrame.drop(result_df_th.iloc[n], inplace=True)
                            
                    # Si pleural effusion
                    if inter > 0.8:
                        if result_df_th.loc[n,"label"] == 10:
                            result_df_th.loc[n,"xmin"] = min(result_df_th.loc[n,"xmin"],result_df_th.loc[j,"xmin"])
                            result_df_th.loc[n,"ymin"] = min(result_df_th.loc[n,"ymin"],result_df_th.loc[j,"ymin"])
                            result_df_th.loc[n,"xmax"] = max(result_df_th.loc[n,"xmax"],result_df_th.loc[j,"xmax"])
                            result_df_th.loc[n,"ymax"] = max(result_df_th.loc[n,"ymax"],result_df_th.loc[j,"ymax"])
                            pd.DataFrame.drop(result_df_th.index[j], inplace=True)
                         

    
    
    ################################################################################################################
        predicted = draw_bboxes(predicted,
                            tl=(int(result_df_th.loc[n,"xmin"]),int(result_df_th.loc[n,"ymin"])),
                            br=(int(result_df_th.loc[n,"xmax"]),int(result_df_th.loc[n,"ymax"])),
                            rgb=palette[int(result_df_th.loc[n,"label"])],
                            score=float(result_df_th.loc[n,"score"]),
                            label=mapping[int(result_df_th.loc[n,"label"])],
                            label_location="tl",
                            opacity=0.5,
                            line_thickness=0,
                            font_scale=2,
                            font_thickness=5)
else:
    print('NORMAL')
    

show_xray(xray.image,predicted,size=(30,30))
result_df_th

# New prediction

In [None]:
from collections import Counter
from tqdm import tqdm
TH = 0.05

mapping = {0: 'Aortic enlargement', 1: 'Atelectasis', 2: 'Calcification', 3: 'Cardiomegaly', 4: 'Consolidation', 5: 'ILD',6: 'Infiltration', 7: 'Lung Opacity', 8: 'Nodule/Mass', 9: 'Other lesion', 10: 'Pleural effusion', 11: 'Pleural thickening',12: 'Pneumothorax', 13: 'Pulmonary fibrosis'}

for i in tqdm(range(len(df2))):
    name=df2.loc[i,"image_id"]
    pred = df2.loc[i,"PredictionString"]
    #xray = Xray("../input/vinbigdata-chest-xray-abnormalities-detection/test/"+name+".dicom")
    #xray.show()
    splited = pred.split(" ")
    result = {"label" : [],"score" : [],'xmin' : [],"ymin" : [],"xmax" : [], "ymax" : []}
    for n in range(len(splited)//6):
        result["label"].append(splited[n*6])
        result["score"].append(splited[n*6+1])
        result["xmin"].append(splited[n*6+2])
        result["ymin"].append(splited[n*6+3])
        result["xmax"].append(splited[n*6+4])
        result["ymax"].append(splited[n*6+5])
        
    result_df = pd.DataFrame(result)
    resultSTR = ""
    
    labels = result["label"]
    scores = result["score"]
    
    cls_ids = np.unique(labels).tolist()
    count_dict = Counter(labels)

    
    for k in range(len(result_df)):        
        
        result_df["score"] =result_df["score"].astype("float")
        label = result_df.loc[k,"label"]
        score_i = result_df.loc[k,"score"]
        score = result_df.loc[k,"score"]
        
        if int(label) == 0 and count_dict[label] != 1:
            best_score = result_df[result_df["label"] == label]["score"].max() #meilleure score pour aortic enlargment
            if score < best_score :
                score_i = 0
                
        if int(label) == 3 : # cardiomegaly
            if np.any(labels == 10) : # cardiomegaly + pleuresie => pas de cardiomégalie
                score_i = 0
            else:
                best_score = result_df[result_df["label"] == label]["score"].max() #meilleure score pour cardiomegaly
            if score < best_score :
                score_i = 0
            else:
                score_i = float(score_i) / 2
                score_i = str(score_i)
        
        if int(label) == 9:
            score_i = float(score_i)/4
            score_i = str(score_i)
        
        
        if int(label) == 14 and count_dict[label] != 1:
            best_score = result_df[result_df["label"] == label]["score"].max() #meilleure score pour aortic enlargment
            if score < best_score :
                score_i = 0
        
        result_df.loc[k,"score"] = score_i
        # TH
        if float(result_df.loc[k,"score"]) > TH:
            resultSTR = resultSTR + ' ' + result_df.loc[k,"label"] + ' ' + str(result_df.loc[k,"score"]) + ' ' + result_df.loc[k,"xmin"] + ' ' + result_df.loc[k,"ymin"] + ' ' + result_df.loc[k,"xmax"] + ' ' + result_df.loc[k,"ymax"]
    
    
    resultSTR = resultSTR.strip()
    df2.loc[i,"PredictionString"] = resultSTR
    


In [None]:
df2 

df2.to_csv("./submission.csv",index=False)