In [None]:
!cp ../input/gdcm-conda-install/gdcm.tar .
!tar -xvzf gdcm.tar
!conda install --offline ./gdcm/gdcm-2.8.9-py37h71b2a6d_0.tar.bz2

In [None]:
!pip install -q ../input/monai030/monai-0.3.0-202010042353-py3-none-any.whl

In [None]:
! tar xvf ../input/rsna-src/workdir.tar_ 

In [None]:
import pandas as pd
import os
os.makedirs("cache", exist_ok=True)

In [None]:
! python3 ./sub_b3_monai_position_1026_ensemble.py  --mode private   --skip

In [None]:
! mv submission.csv submission_pre.csv

# requirement

In [None]:
sub = pd.read_csv("./submission_pre.csv")
test = pd.read_csv("../input/rsna-str-pulmonary-embolism-detection/test.csv")

In [None]:
sub

In [None]:
def check_consistency(sub, test):
    
    '''
    Checks label consistency and returns the errors
    
    Args:
    sub   = submission dataframe (pandas)
    test  = test.csv dataframe (pandas)
    '''
    
    # EXAM LEVEL
    for i in test['StudyInstanceUID'].unique():
        df_tmp = sub.loc[sub.id.str.contains(i, regex = False)].reset_index(drop = True)
        df_tmp['StudyInstanceUID'] = df_tmp['id'].str.split('_').str[0]
        df_tmp['label_type']       = df_tmp['id'].str.split('_').str[1:].apply(lambda x: '_'.join(x))
        del df_tmp['id']
        if i == test['StudyInstanceUID'].unique()[0]:
            df = df_tmp.copy()
        else:
            df = pd.concat([df, df_tmp], axis = 0)
    df_exam = df.pivot(index = 'StudyInstanceUID', columns = 'label_type', values = 'label')
    
    # IMAGE LEVEL
    df_image = sub.loc[sub.id.isin(test.SOPInstanceUID)].reset_index(drop = True)
    df_image = df_image.merge(test, how = 'left', left_on = 'id', right_on = 'SOPInstanceUID')
    df_image.rename(columns = {"label": "pe_present_on_image"}, inplace = True)
    del df_image['id']
    
    # MERGER
    df = df_exam.merge(df_image, how = 'left', on = 'StudyInstanceUID')
    ids    = ['StudyInstanceUID', 'SeriesInstanceUID', 'SOPInstanceUID']
    labels = [c for c in df.columns if c not in ids]
    df = df[ids + labels]
    
    # SPLIT NEGATIVE AND POSITIVE EXAMS
    df['positive_images_in_exam'] = df['StudyInstanceUID'].map(df.groupby(['StudyInstanceUID']).pe_present_on_image.max())
    df_pos = df.loc[df.positive_images_in_exam >  0.5]
    df_neg = df.loc[df.positive_images_in_exam <= 0.5]
    
    # CHECKING CONSISTENCY OF POSITIVE EXAM LABELS
    rule1a = df_pos.loc[((df_pos.rv_lv_ratio_lt_1  >  0.5)  & 
                         (df_pos.rv_lv_ratio_gte_1 >  0.5)) | 
                        ((df_pos.rv_lv_ratio_lt_1  <= 0.5)  & 
                         (df_pos.rv_lv_ratio_gte_1 <= 0.5))].reset_index(drop = True)
    rule1a['broken_rule'] = '1a'
    rule1b = df_pos.loc[(df_pos.central_pe    <= 0.5) & 
                        (df_pos.rightsided_pe <= 0.5) & 
                        (df_pos.leftsided_pe  <= 0.5)].reset_index(drop = True)
    rule1b['broken_rule'] = '1b'
    rule1c = df_pos.loc[(df_pos.acute_and_chronic_pe > 0.5) & 
                        (df_pos.chronic_pe           > 0.5)].reset_index(drop = True)
    rule1c['broken_rule'] = '1c'
    rule1d = df_pos.loc[(df_pos.indeterminate        > 0.5) | 
                        (df_pos.negative_exam_for_pe > 0.5)].reset_index(drop = True)
    rule1d['broken_rule'] = '1d'

    # CHECKING CONSISTENCY OF NEGATIVE EXAM LABELS
    rule2a = df_neg.loc[((df_neg.indeterminate        >  0.5)  & 
                         (df_neg.negative_exam_for_pe >  0.5)) | 
                        ((df_neg.indeterminate        <= 0.5)  & 
                         (df_neg.negative_exam_for_pe <= 0.5))].reset_index(drop = True)
    rule2a['broken_rule'] = '2a'
    rule2b = df_neg.loc[(df_neg.rv_lv_ratio_lt_1     > 0.5) | 
                        (df_neg.rv_lv_ratio_gte_1    > 0.5) |
                        (df_neg.central_pe           > 0.5) | 
                        (df_neg.rightsided_pe        > 0.5) | 
                        (df_neg.leftsided_pe         > 0.5) |
                        (df_neg.acute_and_chronic_pe > 0.5) | 
                        (df_neg.chronic_pe           > 0.5)].reset_index(drop = True)
    rule2b['broken_rule'] = '2b'
    
    # MERGING INCONSISTENT PREDICTIONS
    errors = pd.concat([rule1a, rule1b, rule1c, rule1d, rule2a, rule2b], axis = 0)
    
    # OUTPUT
    print('Found', len(errors), 'inconsistent predictions')
    return errors

In [None]:
check_consistency(sub.copy(), test.copy())

In [None]:
# EXAM LEVEL
for i in test['StudyInstanceUID'].unique():

    df_tmp = sub.loc[sub.id.str.contains(i, regex = False)].reset_index(drop = True)
    df_tmp['StudyInstanceUID'] = df_tmp['id'].str.split('_').str[0]
    df_tmp['label_type']       = df_tmp['id'].str.split('_').str[1:].apply(lambda x: '_'.join(x))
    del df_tmp['id']

    if i == test['StudyInstanceUID'].unique()[0]:
        df = df_tmp.copy()
    else:
        df = pd.concat([df, df_tmp], axis = 0)

df_exam = df.pivot(index = 'StudyInstanceUID', columns = 'label_type', values = 'label')


# IMAGE LEVEL
df_image = sub.loc[sub.id.isin(test.SOPInstanceUID)].reset_index(drop = True)
df_image = df_image.merge(test, how = 'left', left_on = 'id', right_on = 'SOPInstanceUID')
df_image.rename(columns = {"label": "pe_present_on_image"}, inplace = True)
del df_image['id']

# MERGER
df = df_exam.merge(df_image, how = 'left', on = 'StudyInstanceUID')
ids    = ['StudyInstanceUID', 'SeriesInstanceUID', 'SOPInstanceUID']
labels = [c for c in df.columns if c not in ids]
df = df[ids + labels]
df.head()

In [None]:
from tqdm import tqdm
import numpy as np

In [None]:
#%%capture

upper_bound = 0.500001
lower_bound = 0.499999

for i in tqdm(df.StudyInstanceUID.unique()):
    sel = df[df.StudyInstanceUID == i].copy().reset_index(drop = True)
    
    #adjust exam level predictions

    image_level_bool = (sel.pe_present_on_image > 0.5).any()

    if(image_level_bool):
        #print("exam has positive images adjust exam")
        ############ 
        #Negative_Exam
        ############

        # if one image is positive, exam has to be positive
        old_negative_exam_for_pe = sel.negative_exam_for_pe[0]
        new_negative_exam_for_pe = np.clip(old_negative_exam_for_pe, 0, lower_bound)


        ############ 
        #Position
        ############

        # atleast one of rightsided, leftsided, central has to be > 0.5
        old_rightsided_pe = sel.rightsided_pe[0]
        old_leftsided_pe = sel.leftsided_pe[0]
        old_central_pe = sel.central_pe[0]

        position_arr = np.array([old_rightsided_pe, old_leftsided_pe, old_central_pe])

        if((position_arr > 0.5).any()):
            print()
        else:
            #adjust the one which has the highest value          
            position_arr[np.argmin((0.5 - position_arr) * (0.0625, 0.0625 ,0.1877))] = upper_bound

        ############ 
        #RV_LV
        ############

        old_rv_lv_ratio_gte_1 = sel.rv_lv_ratio_gte_1[0]
        old_rv_lv_ratio_lt_1 = sel.rv_lv_ratio_lt_1[0]

        rv_lv_arr = np.array([old_rv_lv_ratio_lt_1, old_rv_lv_ratio_gte_1])


        #only one can be > 0.5
        #adjust the one which is lower
        if((rv_lv_arr > 0.5).all()):
            rv_lv_arr[np.argmin((rv_lv_arr - 0.5) * (0.0782, 0.2346))] = lower_bound


        #one has to be > 0.5
        #adjust the one which has the highest value
        elif((rv_lv_arr < 0.5).all()):
            rv_lv_arr[np.argmin((0.5 - rv_lv_arr) * (0.0782, 0.2346))] = upper_bound
            
        #print(rv_lv_arr)

        ############ 
        #ACUTE & CHRONIC
        ############
        # one of acute_and_chronic_pe or chronic_pe has to be > 0.5
        old_acute_and_chronic_pe = sel.acute_and_chronic_pe[0]
        old_chronic_pe = sel.chronic_pe[0]
        #
        acute_chronic_arr = np.array([old_acute_and_chronic_pe, old_chronic_pe])

        if((acute_chronic_arr > 0.5).all()):
            acute_chronic_arr[np.argmin((acute_chronic_arr - 0.5) * (0.104294, 0.104294))] = lower_bound

        #print("Position", position_arr)
        #print("RV_LV", rv_lv_arr)
        #print("ACUTE/CHRONIC", acute_chronic_arr)


        if(~((acute_chronic_arr == np.array([old_acute_and_chronic_pe, old_chronic_pe])).all())):
            print("ACUTE/CHRONIC changed")

        if(~((rv_lv_arr == np.array([old_rv_lv_ratio_lt_1, old_rv_lv_ratio_gte_1])).all())):
            print("RVLV changed")

        if(~((position_arr == np.array([old_rightsided_pe, old_leftsided_pe, old_central_pe])).all())):
            print("POSITION changed")
            
        print(i, acute_chronic_arr, rv_lv_arr, position_arr)

    else:
        #print("exam has no positive images adjust exam")

        ############ 
        #Negative_Exam
        ############

        # if one image is positive, exam has to be positive
        old_negative_exam_for_pe = sel.negative_exam_for_pe[0]
        new_negative_exam_for_pe = np.clip(old_negative_exam_for_pe, upper_bound, 1)


        ############ 
        #Position
        ############

        # atleast one of rightsided, leftsided, central has to be > 0.5
        old_rightsided_pe = sel.rightsided_pe[0]
        old_leftsided_pe = sel.leftsided_pe[0]
        old_central_pe = sel.central_pe[0]

        position_arr = np.array([old_rightsided_pe, old_leftsided_pe, old_central_pe])
        position_arr = np.clip(position_arr, 0, lower_bound)

        ############ 
        #RV_LV
        ############


        old_rv_lv_ratio_gte_1 = sel.rv_lv_ratio_gte_1[0]
        old_rv_lv_ratio_lt_1 = sel.rv_lv_ratio_lt_1[0]

        rv_lv_arr = np.array([old_rv_lv_ratio_lt_1, old_rv_lv_ratio_gte_1])
        rv_lv_arr = np.clip(rv_lv_arr, 0, lower_bound)

        ############ 
        #ACUTE & CHRONIC
        ############


        # one of acute_and_chronic_pe or chronic_pe has to be > 0.5
        old_acute_and_chronic_pe = sel.acute_and_chronic_pe[0]
        old_chronic_pe = sel.chronic_pe[0]
        #
        acute_chronic_arr = np.array([old_acute_and_chronic_pe, old_chronic_pe])
        acute_chronic_arr = np.clip(acute_chronic_arr, 0, lower_bound)

        #print("Position", position_arr)
        #print("RV_LV", rv_lv_arr)
        #print("ACUTE/CHRONIC", acute_chronic_arr)

        if(~((acute_chronic_arr == np.array([old_acute_and_chronic_pe, old_chronic_pe])).all())):
            print("ACUTE/CHRONIC changed")

        if(~((rv_lv_arr == np.array([old_rv_lv_ratio_lt_1, old_rv_lv_ratio_gte_1])).all())):
            print("RVLV changed")

        if(~((position_arr == np.array([old_rightsided_pe, old_leftsided_pe, old_central_pe])).all())):
            print("POSITION changed")
            
    sub["label"][sub.id == sel.StudyInstanceUID[0] + "_negative_exam_for_pe"] = new_negative_exam_for_pe

    sub["label"][sub.id == sel.StudyInstanceUID[0] + "_rightsided_pe"] = position_arr[0]
    sub["label"][sub.id == sel.StudyInstanceUID[0] + "_leftsided_pe"] = position_arr[1]
    sub["label"][sub.id == sel.StudyInstanceUID[0] + "_central_pe"] = position_arr[2]

    sub["label"][sub.id == sel.StudyInstanceUID[0] + "_rv_lv_ratio_lt_1"] = rv_lv_arr[0]
    sub["label"][sub.id == sel.StudyInstanceUID[0] + "_rv_lv_ratio_gte_1"] = rv_lv_arr[1]
    
    print(i, sub["label"][sub.id == sel.StudyInstanceUID[0] + "_rv_lv_ratio_lt_1"], sub["label"][sub.id == sel.StudyInstanceUID[0] + "_rv_lv_ratio_gte_1"])
    
    sub["label"][sub.id == sel.StudyInstanceUID[0] + "_indeterminate"] = np.clip(sel.indeterminate[0], 0, 0.5)

In [None]:
check = check_consistency(sub, test)

In [None]:
len(check)

In [None]:
if(len(check) == 0):
    sub.to_csv("submission.csv", index = False)