In [None]:
1. Imports
2. Model initialization and loading

In-sample set: 
    1. Model predictions and storing Image_top10%prediction dictionary/csv ✔
    2. Use the actual threshold given by the data scientist to categorize or classify images (TP, FP, FN, TN) ✔
    3. Find out best threshold and fill up respective columns 
    4. Segregate images into positive and negative on basis of GT and arrange images in descending order of probabilities, fill up the columns
    5. Tag the images with the buckets and Use 50 top, 50 bottom and 50 randomly sampled images from the middle bucket and create a separate CSV
    6. Plot images GT, Radiologist Annotation and AI prediction and use interactive fields to update  the CSV with Radiologist comments.

# Importing Libraries and DS_utils

In [None]:
import sys
sys.path.append('/opt/bucketdata/Users/Rohit/') 

In [None]:
import ast
import os
import cv2
import pydicom          
import numpy as np
import pandas as pd
from glob import glob
import tensorflow as tf
from random import shuffle
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import tensorflow.keras.backend as K
from sklearn.metrics import roc_auc_score
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import regularizers 
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler
from tensorflow.keras.callbacks import Callback
from tensorflow.keras.losses import binary_crossentropy
from tensorflow.keras.applications import Xception
from tensorflow.keras.utils import Sequence
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, concatenate, Conv2DTranspose, Dropout,\
    UpSampling2D, LeakyReLU, Add, ZeroPadding2D, GlobalAveragePooling2D, Dense

import ds_utils
from ds_utils import utils

ds_utils.set_framework("keras")
print(ds_utils.framework())

# Custom Imports

In [None]:
from Model_Audits.normal_abnormal_2.scripts import pathology_pipeline_2 as model_pipeline
# from Model_Audits.normal_abnormal_1.scripts import abnormality_pipeline as abnormality_pipeline

# Utility functions

In [None]:
def preprocess_groundtruth(region_attribute, mask_temp, color=(255,255,255)):
    try:
        x = region_attribute['x']
        y = region_attribute['y']
        w = region_attribute['width']
        h = region_attribute['height']
        ground_truth_mask=cv2.rectangle(mask_temp,(int(x),int(y)),(int(x)+int(w),int(y)+int(h)),color,5)
    except:
        x = list(region_attribute['x'])
        y = list(region_attribute['y'])
        pts = [list(k) for k in zip(x, y)]
        pts = np.array(pts,np.int32)
        pts = pts.reshape((-1, 1, 2))
#         ground_truth_mask=cv2.fillPoly(mask_temp, [pts], pathology_color_dict[patho]) #  To get filled masks
        ground_truth_mask = cv2.polylines(mask_temp, [pts], True, color, 16) # To get boundaries of mask
 
    return (ground_truth_mask)


def preprocess_wrapper(region_attributes, mask, color=(255,255,255)):
    for i in region_attributes:
        try:
            mask=preprocess_groundtruth(i, mask, color)
        except:
            continue
        
#     mask=cv2.resize(mask, (model_pipeline.img_size, model_pipeline.img_size))
#     mask=np.where(mask>0,255,0)
    
    return mask

In [None]:
def get_results_table(threshold, ground_truth, ai_probability):
    metrics = []
    metrics.append(utils.compute_metrics(ground_truth, np.where(ai_probability > threshold,1,0), print_metrics=False))
    metrics[-1]["roc_auc"] = roc_auc_score(ground_truth, ai_probability)
    df_2 = pd.DataFrame(metrics)
    df_2 = df_2.applymap("{0:.3f}".format)
    
    matrix=confusion_matrix(ground_truth, np.where(ai_probability > threshold,1,0))
    df_2['true_positives']=matrix[1][1]
    df_2['true_negatives']=matrix[0][0]
    df_2['false_positives']=matrix[0][1]
    df_2['false_negatives']=matrix[1][0]
    return (df_2)

In [None]:
def make_required_directories(output_path):
    os.makedirs(output_path+'heatmaps/')
    os.makedirs(output_path+'predicted_masks/')
    os.makedirs(output_path+'ground_truth_pathology_nopathology/')
    os.makedirs(output_path+'original_images/')
    os.makedirs(output_path+'ground_truth_all_pathologies/')
    os.makedirs(output_path+'abnormality_masks/')
    os.makedirs(output_path+'legends/')

In [None]:
def get_legends(groundtruth_csv, image_name):
    
    specific_row=groundtruth_csv.loc[groundtruth_csv['image_path']==i]
    pathologies=[]
    
    for j in specific_row.columns[9:]:
        if len(specific_row[j].values[0])>6:
            pathologies.append(j)
            
    count=0
    image=np.ones((768,1536,3),dtype='uint8')
    image=image*100
    for ldx, l in enumerate(list(sushrut_pathology_color_mapping.keys())):
        if l in pathologies:
            count+=1
            if count<7:
                image=cv2.putText(image,"---"+l, (10,100*count), cv2.FONT_HERSHEY_SIMPLEX, 3, sushrut_pathology_color_mapping[l], 2)
            else:
                image=cv2.putText(image,"---"+l, (700,100*((count+1)%7)), cv2.FONT_HERSHEY_SIMPLEX, 3, sushrut_pathology_color_mapping[l], 2)

    return image

In [None]:
def ground_truth_mask(name, image, groundtruth_csv): #All the pathologies
    specific_row=groundtruth_csv.loc[groundtruth_csv['image_path']==name]
    
    semi_gt_mask=np.zeros((image.shape[0], image.shape[1], 3), dtype='uint8')
    
    for j in specific_row.columns[9:]:
        if len(specific_row[j].values[0])>6:
            if j in sushrut_pathology_color_mapping:
#             try:
                print (j, specific_row[j].values[0])
                semi_gt_mask=preprocess_wrapper(eval(specific_row[j].values[0]), semi_gt_mask, sushrut_pathology_color_mapping[j])
#             except:
#                 continue

    semi_gt_mask=semi_gt_mask.astype('uint8')

    gt_mask=semi_gt_mask
#     print ("GT Mask", gt_mask.dtype, np.max(gt_mask), gt_mask.shape)
    
    gt_mask_final=cv2.addWeighted(image, 1, gt_mask, 1, 0, dtype=0)
    
    cv2.imwrite(output_path+'ground_truth_all_pathologies/'+name.split('/')[-1], cv2.resize(gt_mask_final, (768,768)))

for idx, i in enumerate(filenames):
    print (idx, i)
    image=cv2.imread(base_path+i)
    print ("Image:", image.shape)
    ground_truth_mask(i, image, ground_truth_csv)

# Important Variables

In [None]:
base_path='/opt/bucketdata/ORIGINAL_DATA/MASTER_DATA/'
pathology='normal'                                                         # Name of the column with Ground truth Annotations
output_path='../saved_images/'
parent_directory='../'

IMG_SIZE=768

sushrut_pathology_color_mapping={
'pleural effusion':(255, 255,255),
'pneumonia':(0,255,0),
'covid':(0,255,0),
'edema':(0,0,255),
'atelectasis':(0,255,255),
'tuberculosis': (255, 0, 255),
'fibrosis': (236, 252, 0),
'cardiomegaly': (25, 110, 230),
'lung mass': (100, 0, 0),
'surgical emphysema': (0, 100, 0),
'nodule': (0, 0, 100),
'pneumothorax': (0, 57, 122),
'scoliosis': (120,0,120),
'opaque hemithorax': (146, 156, 0),
'hernia': (255,255,255),
'pleural thickening': (125,125,125)
}

pathology=list(sushrut_pathology_color_mapping.keys()) 

# Read CSV from Sushrut 

In [None]:
ground_truth_csv=pd.read_csv(parent_directory+'/miscellaneous/output.csv')
df_test=pd.read_csv(parent_directory+'/miscellaneous/output.csv')
filenames=list(df_test['image_path'])

df_test.head()

In [None]:
len(filenames)

In [None]:
df_test=df_test.sample(2500)

In [None]:
filenames=list(df_test['image_path'])

# Load Model and weights from Developer's script

In [None]:
model=model_pipeline.abnormality_model()

# Prediction probability and other details for each image

filenames=filenames[0:200]

In [None]:
AI_prediction=[]
AI_probability=[]
Ground_truth=[]
Prediction_class=[]

# make_required_directories(output_path)
print ("Directories created")

for idx, i in enumerate(filenames):
    flag=0

    try:
        image=cv2.imread(base_path+i)
        print (image.shape)
    except:
        image=cv2.imread(base_path+'BIMCV_disk/'+i)
        flag=1
        print (image.shape)
    
    semi_gt_mask=np.zeros((image.shape[0], image.shape[1]), dtype='uint8')
#     ###if len(ast.literal_eval(df_test.loc[df_test['image_path']==i][pathology].values[0]))>0:
#     semi_gt_mask=preprocess_wrapper(ast.literal_eval(df_test.loc[df_test['image_path']==i][pathology].values[0]), semi_gt_mask)
#     semi_gt_mask=semi_gt_mask.astype('uint8')
        
#     if 1 in ast.literal_eval(df_test.loc[df_test['image_path']==i]['normal'].values[0]):
#         Ground_truth.append(0)
#     else:
#         Ground_truth.append(1) 

    for j in pathology:
        try:
            if len(ast.literal_eval(df_test.loc[df_test['image_path']==i][j].values[0]))>0:
                flag=1
                break
        except:
            continue
        
    if flag==0:
        Ground_truth.append(0)
    else:
        Ground_truth.append(1)
    
    gt_mask=np.zeros((semi_gt_mask.shape[0],semi_gt_mask.shape[1],3), dtype='uint8')
    gt_mask[:,:,0]=semi_gt_mask
    gt_mask[:,:,1]=semi_gt_mask
    gt_mask[:,:,2]=semi_gt_mask
    
    gt_mask=cv2.addWeighted(image, 0.7, gt_mask, 0.5, 0)
    gt_mask=cv2.resize(gt_mask, (768,768))
#  ##   print ("GT Mask: ", gt_mask.dtype, np.max(gt_mask), gt_mask.shape)

    ground_truth_mask(i, image, ground_truth_csv)
    
    image=cv2.resize(image, (768,768))
    
    print ("Predicting on Image No: {0}, Filepath: {1}".format(idx,(base_path+i)) )
    prediction=model.predict(model_pipeline.preprocessing(image), verbose=1)
    print ("Model Inference done. Adding Entry to the Excel sheet")
    
    original_threshold, predicted_probability, predicted_class, predicted_mask, heatmap = model_pipeline.postprocessing(prediction) 
    
   ### predicted_mask=np.reshape(predicted_mask, (768,768,1))
    print ("Predicted Mask", predicted_mask.dtype, np.max(predicted_mask), predicted_mask.shape)
    
    
    predicted_mask=utils.overlay_mask_on_image(image, predicted_mask)

    
    AI_probability.append(predicted_probability)    
    AI_prediction.append(predicted_class)

    print ("Heatmap Mask", heatmap.dtype, np.max(heatmap), heatmap.shape)

    heatmap = cv2.addWeighted(image,1.0,heatmap,0.2,0,dtype=0)
    
    if AI_prediction[-1]==1 and Ground_truth[-1]==1:
        Prediction_class.append('True Positive')
    elif AI_prediction[-1]==0 and Ground_truth[-1]==1:
        Prediction_class.append('False Negative')
    elif AI_prediction[-1]==1 and Ground_truth[-1]==0:
        Prediction_class.append('False Positive')   
    else:
        Prediction_class.append('True Negative') 
        
    print ("Image", image.dtype, np.max(image), image.shape)
    
    cv2.imwrite(output_path+'heatmaps/'+i.split('/')[-1], heatmap)
    cv2.imwrite(output_path+'predicted_masks/'+i.split('/')[-1], predicted_mask)
    cv2.imwrite(output_path+'ground_truth_pathology_nopathology/'+i.split('/')[-1], gt_mask)
    cv2.imwrite(output_path+'original_images/'+i.split('/')[-1], image)
            

df=pd.DataFrame()
df['image_name']=filenames
df['AI_probability']=AI_probability
df['AI_prediction_original_threshold']=AI_prediction
df['Ground_truth']=Ground_truth
df['Original_threshold']=[original_threshold]*len(Ground_truth)
df['AI_Prediction_Class']=Prediction_class


df=pd.DataFrame()
df['image_name']=filenames[0:2500]
df['AI_probability']=AI_probability[0:2500]
df['AI_prediction_original_threshold']=AI_prediction[0:2500]
df['Ground_truth']=Ground_truth[0:2500]
df['Original_threshold']=[original_threshold]*2500
df['AI_Prediction_Class']=Prediction_class[0:2500]

! rm -rf pleural_effusion/saved_images/

In [None]:
os.makedirs(parent_directory+'/predicted_csvs/')
df.to_csv(parent_directory+'/predicted_csvs/prediction.csv')

In [None]:
df.head()

In [None]:
for idx, i in enumerate(filenames[0:2500]):
    print(idx, i)
#     image=cv2.imread(base_path+i)
    
#     image=cv2.resize(image, (512,512))
    
#     _,_,_,abnormality_mask=get_abnormality_prediction(image, model)
    
#     print ("Predicted Masks", abnormality_mask.dtype, np.max(abnormality_mask), abnormality_mask.shape)
#     abnormality_mask=utils.overlay_mask_on_image(image, abnormality_mask)
    legend=get_legends(ground_truth_csv, i)    
    
    cv2.imwrite(output_path+'legends/'+i.split('/')[-1], legend)
#     cv2.imwrite(output_path+'abnormality_masks/'+i.split('/')[-1], cv2.resize(abnormality_mask, (768,768)))

# Find out best threshold

In [None]:
df=pd.read_csv(parent_directory+'/predicted_csvs/prediction.csv')

In [None]:
len(df)

In [None]:
from IPython.display import display, Markdown, Latex
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from sklearn.metrics import confusion_matrix

a=widgets.interact(lambda x: get_results_table(x, list(df['Ground_truth']), (df['AI_probability'])),
                         x=widgets.FloatSlider(min=0, max=1, step=0.01, value=0.5))

In [None]:
best_threshold=widgets.BoundedFloatText(
    value=7.5,
    min=0,
    max=1.0,
    step=0.001,
    description='Best Threshold:',
    disabled=False
)

best_threshold

In [None]:
print ("The best threshold is: ",best_threshold.value)

In [None]:
df['Best_Threshold']=[best_threshold.value]*len(df)

In [None]:
df

In [None]:
df.to_csv(parent_directory+'/predicted_csvs/prediction.csv')

# Segregate Dataframes into positive and negative

In [None]:
# Columns present: Image_name, AI_probability, AI_prediction_original_threshold, Ground_truth, Original_threshold
# Columns to be added: Percentile as per the class

df_positive=df.loc[df['Ground_truth']==1]
df_negative=df.loc[df['Ground_truth']==0]

In [None]:
df_positive=df_positive.sort_values(by=['AI_probability'], ascending='True')
df_negative=df_negative.sort_values(by=['AI_probability'], ascending='True')

df_positive=pd.concat([df_positive, df_negative])

# Add Percentile and AI confidence

In [None]:
positive_percentile=[]
ai_confidence=[]
i=0
while i<len(df_positive):
#     print (i)
    percentile=(i/len(df_positive))*100
    positive_percentile.append(percentile)
    
    if percentile>70:
        ai_confidence.append("High")
    elif percentile>30:
        ai_confidence.append("Medium")
    else:
        ai_confidence.append("Low")
        
    i=i+1
    
df_positive['Percentile']=positive_percentile
df_positive['AI_Confidence']=ai_confidence

In [None]:
negative_percentile=[]
ai_confidence=[]
i=0
while i<len(df_negative):
    negative_percentile.append((1-i/len(df_negative))*100)
    
    if ((1-i/len(df_negative))*100)>70:
        ai_confidence.append("High")
    elif ((1-i/len(df_negative))*100)>30:
        ai_confidence.append("Medium")
    else:
        ai_confidence.append("Low")
        
    i=i+1
    
df_negative['Percentile']=negative_percentile
df_negative['AI_Confidence']=ai_confidence

# Sample Images According to the buckets

In [None]:
num_samples=75

In [None]:
len(df_positive)

In [None]:
# Positive images
top_positive_bin=df_positive.tail(num_samples)
mid_positive_bin=df_positive[int(len(df_positive)*0.4):int(len(df_positive)*0.7)].sample(num_samples)
bottom_positive_bin=df_positive.head(num_samples)

In [None]:
num_samples=75

In [None]:
# Negative Images

top_negative_bin=df_negative.head(num_samples)
mid_negative_bin=df_negative[int(len(df_negative)*0.4):int(len(df_negative)*0.7)].sample(num_samples)
bottom_negative_bin=df_negative.tail(num_samples)

In [None]:
bottom_negative_bin

## Plot and Save the Grids

In [None]:
def plot_grids_and_save_images(bin_name, image_names, save_path):
    os.makedirs(save_path)
    for idx, i in enumerate(image_names):
        print(bin_name, str(idx*100/len(image_names))+"% Done")
        original_image=cv2.imread(output_path+'/original_images/'+i.split('/')[-1])
        ground_truth=cv2.imread(output_path+'/ground_truth_all_pathologies/'+i.split('/')[-1])
        abnormality_mask=cv2.imread(output_path+'/predicted_masks/'+i.split('/')[-1])
        heatmap=cv2.imread(output_path+'/heatmaps/'+i.split('/')[-1])
        legend=cv2.imread(output_path+'/legends/'+i.split('/')[-1])

        upper = np.concatenate((original_image, ground_truth), axis=1)
        lower = np.concatenate((abnormality_mask, heatmap), axis=1)
        final=np.concatenate((upper,lower), axis=0)
        
        final_grid=final
#         final_grid=np.concatenate((final,legend), axis=0)    
        cv2.imwrite(save_path+i.split('/')[-1], final_grid)

In [None]:
plot_grids_and_save_images("Top Positive Bin", list(top_positive_bin['image_name']), parent_directory+'/radiologist_audit/highly_confident_positive_images/' )
plot_grids_and_save_images("Medium Positive Bin", list(mid_positive_bin['image_name']), parent_directory+'/radiologist_audit/medium_confident_positive_images/' )
plot_grids_and_save_images("Least Positive Bin", list(bottom_positive_bin['image_name']), parent_directory+'/radiologist_audit/low_confident_positive_images/' )

In [None]:
plot_grids_and_save_images("Top Negative Bin", list(top_negative_bin['image_name']), parent_directory+'/radiologist_audit/highly_confident_negative_images/' )
plot_grids_and_save_images("Medium Negative Bin", list(mid_negative_bin['image_name']), parent_directory+'/radiologist_audit/medium_confident_negative_images/' )
plot_grids_and_save_images("Least Negative Bin", list(bottom_negative_bin['image_name']), parent_directory+'/radiologist_audit/low_confident_negative_images/' )