In [None]:
!cp /kaggle/input/gdcm-conda-install/gdcm.tar .
!tar -xvzf gdcm.tar
!conda install --offline ./gdcm/gdcm-2.8.9-py37h71b2a6d_0.tar.bz2
!rm -rf ./gdcm.tar
!pip install pandarallel

print("... PIP/APT INSTALLS COMPLETE ...\n")


In [None]:
# Other Competition Related Imports
import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut
from pandarallel import pandarallel; pandarallel.initialize();
import tensorflow as tf
import tensorflow_addons as tfa
import os
import shutil
import gc
import cv2
import random
import glob
import numpy as np
import pandas as pd
from PIL import Image
from tqdm import tqdm
from shutil import copyfile
import matplotlib.pyplot as plt

In [None]:
### Seeding - Attempt to be reproducible ###

def seed_it_all(seed=7):
    """ Attempt to be Reproducible """
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)

seed_it_all()

Connect the Dataset

In [None]:
base_path = '../input/siim-covid19-detection'

print('Directories:')
print('\n'.join([dir for dir in os.listdir(base_path) if os.path.isdir(os.path.join(base_path, dir))]))
print('\nFiles:')
print('\n'.join([dir for dir in os.listdir(base_path) if not os.path.isdir(os.path.join(base_path, dir))]))
TRAIN_PATH = os.path.join(base_path, "train")
TEST_PATH = os.path.join(base_path, "test")

image_test_paths = glob.glob(os.path.join(TEST_PATH, "*/*/*"))
print(len(image_test_paths))

In [None]:
### read sample submission csv ### 
# Read the submisison file
sub_df = pd.read_csv('/kaggle/input/siim-covid19-detection/sample_submission.csv')
print(len(sub_df))
sub_df.head()

In [None]:
## we're mainly just focusing on the study-levels ## 
studylvl_df = sub_df.loc[sub_df.id.str.contains('_study')]
print(f"Amount of Study rows in Sample Submission:          {len(studylvl_df)}")

# but we'll concatenate this later 
imagelvl_df = sub_df.loc[sub_df.id.str.contains('_image')]
print(f"Amount of Image-level rows in Sample Submission:    {len(imagelvl_df)}")

In [None]:
####### IMAGE PREPARATION FUNCTIONS ##########

### * dicom2array(): converts dicoms to arrays
### * reshape_square(): reshapes each example into a N x N square

#Define a function that uses `pydicom` to properly turn dicom files 
#into numpy arrays that are viewable in matplotlib. Code stolen from Darien Schettler

def dicom2array(path, voi_lut=True, fix_monochrome=True):
    """ Convert dicom file to numpy array 
    
    Args:
        path (str): Path to the dicom file to be converted
        voi_lut (bool): Whether or not VOI LUT is available
        fix_monochrome (bool): Whether or not to apply monochrome fix
        
    Returns:
        Numpy array of the respective dicom file 
        
    """
    # Use the pydicom library to read the dicom file
    dicom = pydicom.read_file(path)
    
    # VOI LUT (if available by DICOM device) is used to 
    # transform raw DICOM data to "human-friendly" view
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array
        
    # The XRAY may look inverted
    #   - If we want to fix this we can
    if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data
    
    # Normalize the image array and return
    data = (data-np.min(data))/(np.max(data)-np.min(data))
    
    return data

def resize_square_from_array(array, pixels, save_dir=None):

    """
    A function that both scales down and crops images to make them square. Takes in some rectangular 
    image and the desired side length of the image. 

    The function essentially finds the shorter side of the image, and scales the whole image down
    preserving aspect ratio so that the length of the smaller side and the desired pixels is the same.
    Then it crops the rest of the image evenly on both sides to make sure the longer side is also = pixels.

    ARGS:

    array (np.ndarray):  An image represented with a numpy array
    pixels (int):        the side length of the square image you want.

    KWARGS: 
    save_dir (path):     path to directory you want to save your image to.

    """
    
    image = Image.fromarray(np.uint8(array*255),mode='L')

#     #plt.imshow(image)
#     image = Image.open(image) 
    
    orig_width, orig_height =  image.size

    # get larger and smaller sides
    smaller_side = min(orig_width, orig_height)

    # make the scale factor from the smaller side so we won't create an image that is too small
    scale_factor = pixels/smaller_side

    # shrink down image with same aspect ratio
    image.thumbnail((int(scale_factor*orig_width), int(scale_factor*orig_height)))
    new_width, new_height = image.size

    # now crop the larger side to make it a square image
    padding = (abs(new_height - new_width))/2

    if smaller_side==orig_height:
        upper_left_point = (padding, 0)
        lower_right_point = (new_width - padding, new_height)

    elif smaller_side==orig_width:
        upper_left_point = (0, padding)
        lower_right_point = (new_width, new_height-padding)

    image = image.crop(upper_left_point + lower_right_point)

    return image


In [None]:
IMG_SIZE = 512
test_folder = '../input/siim-covid19-detection/test'
new_test_folder = './test_images'

def all_in_one_preparations(test_folder, new_test_folder):
    
    """
    Simultaneously copy all files over to a writable folder, convert them into pngs, 
    AND record image paths and all that good stuff to put into the master csv. 
    
    ARGS:
    
    test_folder (path): Where all the test dicoms are held (from dataset)
    new_test_folder (path): Where to put all the converted images

    """
    master_image_ids = []
    master_study_ids = []
    master_imglevel_paths = []
    master_img_size = []
    dim0 = []
    dim1 = []
    
    i = 0
    test_images_dir = './test_images'
    os.makedirs(new_test_folder, exist_ok=True)
    
    # study level iterator (iterates 1214 times)
    for study in tqdm(os.listdir(test_folder)):
        study_dir = os.path.join(test_folder, (study + "/"))
        
        # add study to study column
        master_study_ids.append(study)
        
        # make image level list and pathlist for this study (in case more than 1 image)
        study_images = []
        study_img_paths = []
        img_sizes = []
        
        
        #series level iterator (not significant, 1214 times)
        for series in os.listdir(study_dir):
            series_dir = os.path.join(study_dir, (series + "/"))
            
            #image level iterator (1263 times)
            for image in os.listdir(series_dir):
                
                image_path = os.path.join(series_dir, image)
                study_images.append(image.replace('.dcm', ''))
                
                # convert and reshape
                
                xray = dicom2array(os.path.join(image_path)) # convert to png
                img_sizes.append(xray.shape)
                im = resize_square_from_array(xray, pixels=IMG_SIZE) # reshape
                new_image_path = os.path.join(new_test_folder, image.replace('.dcm', '.png')) #define new path for conv. img
                im.save(os.path.join(new_image_path)) # save image to that path
                
                #append new path to img path list
                study_img_paths.append(new_image_path)
            
        master_image_ids.append(study_images)
        master_imglevel_paths.append(study_img_paths)
        master_img_size.append(img_sizes)
        
        
    return master_study_ids, master_image_ids, master_imglevel_paths, master_img_size
             
                
study_ids, image_ids, imglevel_paths, image_sizes = all_in_one_preparations(test_folder, new_test_folder)

In [None]:
amount_of_classes_should_be = len(os.listdir(test_folder))
amount_of_transfered_images = len(os.listdir('./test_images'))

print(f"Amount of Images Transfered: {amount_of_transfered_images} images for {amount_of_classes_should_be} classes")

In [None]:
########## create a master csv with the following columns ##########

# * study ID 
# * one image associated with the study (if multiple, chose one)
# * path to that chosen image. 
dhruv_master_df = pd.DataFrame(list(zip(study_ids, image_ids, imglevel_paths,image_sizes)),
                         columns =['study_id', 'image_id', 'image_paths','og_image_sizes'])
master_df = pd.DataFrame(list(zip(study_ids, image_ids, imglevel_paths)),
                         columns =['study_id', 'image_id', 'image_paths'])

# since there will be a few duplicate rows in the dataframe since 
# the iterator goes by image id and not by class 


print(f"Amount of Rows in Dataset (should be {amount_of_classes_should_be}): {len(master_df.index)}\n")

print("Master Dataset Preview\n===============================================================================")
master_df

In [None]:
dhruv_master_df

In [None]:
from IPython.display import Image
Image(filename='./test_images/0026720152f5.png') 

In [None]:
###########################################
####### STUDY LEVEL CLASSIFICATION ########
###########################################

#### Make a copy of the master_csv with only one image per study ####
# to make things easy, just pick the first image if there are multiple images for a study.


study_df = master_df.copy()

# just chose the first image for each study
study_df["image_id"] = study_df.apply(lambda x: x["image_id"][0], axis=1)

# just chose the first path for each study (corresponds to the first image)
study_df["image_paths"] = study_df.apply(lambda x: x["image_paths"][0], axis=1)

print("Edited Master Dataframe for Study-Level Classification")
print("========================================================")
display(study_df)

#### Create a Test tf.Data.Dataset from File Paths ####

# this will obviously have no augments or even batches since we are not training
# we are just evaluating

test_img_paths = list(study_df["image_paths"])

### DEFINE A FUNCITON TO LOAD IMAGES FROM PATH #######
# more informatino on this functino can be found on accompanying colab notebooks

INPUT_SHAPE = (512, 512, 3)

def tf_load_image(path, resize_to=INPUT_SHAPE):
    img_bytes = tf.io.read_file(path)
    img = tf.image.decode_png(img_bytes, channels=resize_to[-1])
    img = tf.image.resize(img, resize_to[:-1])
    img = tf.cast(img, tf.uint8)
    return img

test_img_ds = tf.data.Dataset.from_tensor_slices(test_img_paths)
test_img_ds = test_img_ds.map(lambda x: (tf_load_image(x, resize_to=INPUT_SHAPE)))

In [None]:
### COMPILING THE DATASET (NO SHUFFLING) ###

AUTOTUNE = tf.data.AUTOTUNE
batch_size = 16 

# we already mapped load_image
test_img_ds = test_img_ds.batch(batch_size).prefetch(AUTOTUNE)

## **Putting the Test Data through a Model**

In [None]:
###############################################
##### access model from dataset uploaded  #####
###############################################

# get the sharing url of the model folder on gdrive (must be a ZIP file)
model_path = '../input/model-2/epoch_15--val_loss_0.89--val_acc_0.66--val_AUC_0.87'

In [None]:
# load the model from the folder
best_model = tf.keras.models.load_model(model_path)

In [None]:
# get predictions on model on all 1263 images at once
model_predict = best_model.predict(test_img_ds)

### **Making the Submission String(s)**

In [None]:
#########################################################
##### Begin Making (Study-Level) Submission Strings #####
#########################################################

class_dict = {0: 'atypical',
              1: 'indeterminate',
              2: 'negative',
              3: 'typical'
             }

def study_predictions_to_submission_format(model_predict, study_level_sample, class_dict, one_or_nothing=True):
    """
    Use this function to get a pd.DataFrame that is the first half of the total submission dataframe,
    more specifically, all the study level predictions. 
    
    ARGS:
    model_predict (np.ndarray)         : The outputs of your model on the test data. Should be (amt_test_exmpls, n_classes)
    study_level_sample (pd.DataFrame)  : The study level section of the sample submission csv (the first 1214 lines)
    class_dict (dict)                  : A dictionary mapping integer labels to word labels.
    
    KWARGS:
    one_or_nothing (bool, dflt True)   : If you want confidence or just one or nothing.
    
    """
    # convert to pd.Series
    model_predict = pd.DataFrame(model_predict)
    
    # turn logits into ints 
    int_predict = model_predict.apply(np.argmax, axis=1)
    
    # turn ints into words
    word_predict = int_predict.apply(lambda x: class_dict[x])

    # attatch the meaningless bounding box filler (just to keep same format with image-level)
    p_strings = int_predict.apply(lambda x: class_dict[x] + str(' 1 0 0 1 1'))
    
    # now finally concatenate the class ids on the left (no shuffling so should be in same order)
   
    class_ids = study_level_sample['id']
    
    p_strings_with_ids = pd.concat([class_ids, p_strings], axis=1)
    p_strings_with_ids.columns = ['id', 'PredictionString']
    
    return p_strings_with_ids
    
    
study_submissions = study_predictions_to_submission_format(model_predict, studylvl_df, class_dict)

In [None]:
print(study_submissions)

## Start doing image level stuff


In [None]:
image_df = dhruv_master_df.set_index(['study_id']).apply(pd.Series.explode).reset_index()
image_df

In [None]:
mod_study_sub = study_submissions.copy()
mod_study_sub['study_id'] = mod_study_sub['id'].apply(lambda x: x.split('_')[0])
mod_study_sub['class'] = mod_study_sub['PredictionString'].apply(lambda x: x.split(' ')[0])
mod_study_sub.drop('PredictionString', axis=1, inplace=True)
mod_study_sub.drop('id', axis=1, inplace=True)
mod_study_sub

In [None]:
image_df = image_df.merge(mod_study_sub,how='left',on='study_id')
image_df

In [None]:
'''test_img_paths = list(image_df["image_paths"])

### DEFINE A FUNCITON TO LOAD IMAGES FROM PATH #######
# more informatino on this functino can be found on accompanying colab notebooks

INPUT_SHAPE = (512, 512, 3)

def tf_load_image(path, resize_to=INPUT_SHAPE):
    img_bytes = tf.io.read_file(path)
    img = tf.image.decode_png(img_bytes, channels=resize_to[-1])
    img = tf.image.resize(img, resize_to[:-1])
    img = tf.cast(img, tf.uint8)
    return img

test_img_ds = tf.data.Dataset.from_tensor_slices(test_img_paths)
test_img_ds = test_img_ds.map(lambda x: (tf_load_image(x, resize_to=INPUT_SHAPE)))'''

In [None]:
'''### COMPILING THE DATASET (NO SHUFFLING) ###

AUTOTUNE = tf.data.AUTOTUNE
batch_size = 16 

# we already mapped load_image
test_img_ds = test_img_ds.batch(batch_size).prefetch(AUTOTUNE)'''

In [None]:
#Import the required libraries for Object detection infernece
import time
import tensorflow as tf
#from object_detection.utils import label_map_util
#from object_detection.utils import visualization_utils as viz_utils
import os
import cv2
import matplotlib.pyplot as plt
#%matplotlib inline# setting min confidence threshold
#MIN_CONF_THRESH=.6#Loading the exported model from saved_model directory
PATH_TO_SAVED_MODEL =r'../input/tf-obj-api-effdetd0/TF_ObjDetAPI_EffDet_D0/saved_model'
print('Loading model...', end='')
start_time = time.time()# LOAD SAVED MODEL AND BUILD DETECTION FUNCTION
detect_fn = tf.saved_model.load(PATH_TO_SAVED_MODEL)
end_time = time.time()
elapsed_time = end_time - start_time
print('Done! Took {} seconds'.format(elapsed_time))# LOAD LABEL MAP DATA

In [None]:
def plot_image(image, boxes=None, size=(5,5), title=None, columns=4):
    def plot_img(image, boxes=None, title=None):
        if isinstance(image, str):
            image_id = os.path.splitext(os.path.split(image)[1])[0]
            df = df_image.loc[df_image['id'] == image_id + '_image']
            boxes = string2boxes(df['label'].iloc[0]) if len(df) > 0 else None
            image = read_dicom_image(image)
        image = np.stack([image] * 3, axis=-1)
        if boxes is not None:
            for box in boxes:
                image = cv2.rectangle(image, (int(box['x1']), int(box['y1'])), (int(box['x2']), int(box['y2'])), [0, 255, 0], 10)
        plt.axis('on')
        plt.imshow(image, cmap='gray')
        if title is not None:
            plt.title(title)

    plt.figure(figsize=size)
    if isinstance(image, list):
        num = len(image)
        columns = min(columns, num)
        rows = math.ceil(num / columns)

        for index, single_image in enumerate(image):
            plt.subplot(rows, columns, index + 1)
            plot_img(single_image, boxes=boxes, title=None if title is None else title[index])
    else:
        plot_img(image, boxes=boxes, title=title)
    plt.show()

In [None]:
def get_prediction(IMAGE_PATH):
    category_index = {1: {'id': 1, 'name': 'opacity'}}
    



    def load_image_into_numpy_array(path):
        """Load an image from file into a numpy array.
        Puts image into numpy array of shape (height, width, channels), where channels=3 for RGB to feed into tensorflow graph.
        Args:
          path: the file path to the image
        Returns:
          uint8 numpy array with shape (img_height, img_width, 3)
        """
        return np.array(cv2.cvtColor(cv2.imread(path), cv2.COLOR_BGR2RGB))
    image_np = load_image_into_numpy_array(IMAGE_PATH)# Running the infernce on the image specified in the  image path
    # The input needs to be a tensor, convert it using `tf.convert_to_tensor`.
    input_tensor = tf.convert_to_tensor(image_np)
    # The model expects a batch of images, so add an axis with `tf.newaxis`.
    input_tensor = input_tensor[tf.newaxis, ...]
    detections = detect_fn(input_tensor)

    # All outputs are batches tensors.
    # Convert to numpy arrays, and take index [0] to remove the batch dimension.
    # We're only interested in the first num_detections.
    num_detections = int(detections.pop('num_detections'))
    detections = {key: value[0, :num_detections].numpy()
                   for key, value in detections.items()}
    detections['num_detections'] = num_detections# detection_classes should be ints.
    detections['detection_classes'] = detections['detection_classes'].astype(np.int64)#print(detections['detection_classes'])
    image_np_with_detections = image_np.copy()
    return detections

In [None]:
image_df

In [None]:
image_df.groupby('class').size()

In [None]:
list_img_id = list(image_df['image_id'])
list_image_paths = list(image_df['image_paths'])
list_og_image_sizes = list(image_df['og_image_sizes'])
list_class = list(image_df['class'])





In [None]:
sub_df = pd.DataFrame(columns=['image_id','class','confidence','co-ordinates'])
for img_id,path,size,img_class in tqdm(zip(list_img_id,list_image_paths,list_og_image_sizes,list_class)):
    if img_class == 'negative':
        sub_df = sub_df.append({'image_id': img_id,'class':'none','confidence':1.0,'co-ordinates':str('0 0 1 1')}, ignore_index=True)
    else:
        detections = get_prediction(path)
        if (detections['num_detections'] >0):
            det_scores =  detections['detection_scores'][:3]
            det_scores = [str(x) for x in det_scores]
            #print(det_scores)
            det_boxes = detections['detection_boxes'][:3]
            #print(size)
            #print(det_boxes[:2])
            det_boxes = [[x[1],x[0],x[3],x[2]] for x in det_boxes]
            #print(det_boxes[:2])
            det_boxes = [[x[0]*size[1],x[1]*size[0],x[2]*size[1],x[3]*size[0]] for x in det_boxes]
            #print(det_boxes[:2])
            det_boxes = [[str(int(np.round(x[0]))),str(int(np.round(int(x[1])))),str(int(np.round(x[2]))),str(int(np.round(x[3])))] for x in det_boxes]
            det_boxes = [" ".join(y) for y in det_boxes ]
            #print(det_boxes[:2])
            classes = ['opacity' for x in det_scores]
            #break
            sub_df = sub_df.append({'image_id': img_id,'class':classes,'confidence':det_scores,'co-ordinates':det_boxes}, ignore_index=True)

            
            
        else:
            sub_df = sub_df.append({'image_id': img_id,'class':'none','confidence':str('1.0'),'co-ordinates':str('0 0 1 1')}, ignore_index=True)

            
        
    



In [None]:
sub_df

## Convert  predictions in dataframe to submission csv format 

In [None]:
def make_image_prediction_string(row):
    ans_string = ""
    if type(row['class']) is not list:
        row['class']=[row['class']]
    
    if type(row['confidence']) is not list:
        row['confidence'] = [row['confidence']]
        
    if type(row['co-ordinates']) is not list:
        row['co-ordinates'] = [row['co-ordinates']]
   
    for c,con,cord in zip(row['class'],row['confidence'],row['co-ordinates']):
        clist = [str(c),str(con),str(cord)]
        ans_string = ans_string +" "+" ".join(clist)
        
        
    return ans_string.strip()
        
sub_img_df  = sub_df.copy()
#Id,PredictionString
sub_img_df['Id'] = sub_img_df['image_id']
sub_img_df['Id'] = sub_img_df.Id + "_image"
sub_img_df.drop('image_id', axis=1, inplace=True)
sub_img_df['PredictionString']  = sub_img_df.apply(lambda row: make_image_prediction_string(row),axis=1)
sub_img_df = sub_img_df[['Id','PredictionString']]
sub_img_df

In [None]:
# NOW we can simply just add the bottom 1263 rows directly from the sample submission 
# since we haven't done object detection yet.
study_submissions.rename(columns={'id': 'Id'}, inplace=True)


final_submission = pd.concat((study_submissions,sub_img_df ), axis=0)
#final_submission = pd.concat((study_submissions,imagelvl_df ), axis=0)

print("FINAL SUBMISSION\n=========================================")
display(final_submission)

print("\nSAVING AS CSV...")
final_submission.to_csv("submission.csv", index=False)

In [None]:
sub_img_df['class'] = sub_img_df.PredictionString.apply(lambda x: x.split(" ")[0])
sub_img_df
sub_img_df.groupby('class').size()