In [None]:
from functools import partial
from collections import defaultdict
import pydicom
import os
import glob
import numpy as np
import pandas as pd
from joblib import Parallel, delayed
from lightgbm import LGBMClassifier
from sklearn.model_selection import KFold
from sklearn.metrics import roc_auc_score
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import seaborn as sns
from tqdm import tqdm
sns.set_style('whitegrid')
%matplotlib inline

np.warnings.filterwarnings('ignore')

In [None]:
labels = pd.read_csv('../input/rsna-pneumonia-detection-challenge/stage_2_train_labels.csv')
details = pd.read_csv('../input/rsna-pneumonia-detection-challenge/stage_2_detailed_class_info.csv')

# duplicates in details just have the same class so can be safely dropped
details = details.drop_duplicates('patientId').reset_index(drop=True)
labels_w_class = labels.merge(details, how='inner', on='patientId')

In [None]:
labels_w_class

In [None]:
# get lists of all train/test dicom filepaths
train_dcm_fps = glob.glob('../input/rsna-pneumonia-detection-challenge/stage_2_train_images/*.dcm')
test_dcm_fps = glob.glob('../input/rsna-pneumonia-detection-challenge/stage_2_test_images/*.dcm')

train_dcm_fps = train_dcm_fps[:11000]
test_dcm_fps = test_dcm_fps[:3000]

# read each file into a list (using stop_before_pixels to avoid reading the image for speed and memory savings)
train_dcms = [pydicom.read_file(x, stop_before_pixels=True) for x in tqdm(train_dcm_fps)]
test_dcms = [pydicom.read_file(x, stop_before_pixels=True) for x in tqdm(test_dcm_fps)]

In [None]:
len(train_dcm_fps)

In [None]:
train_dcm_fps[0]

In [None]:
train_dcms[1]

In [None]:
def parse_dcm_metadata(dcm):
    unpacked_data = {}
    group_elem_to_keywords = {}
    # iterating here to force conversion from lazy RawDataElement to DataElement
    for d in dcm:
        pass
    # keys are pydicom.tag.BaseTag, values are pydicom.dataelem.DataElement
    for tag, elem in dcm.items():
        tag_group = tag.group
        tag_elem = tag.elem
        keyword = elem.keyword
        group_elem_to_keywords[(tag_group, tag_elem)] = keyword
        value = elem.value
        unpacked_data[keyword] = value
    return unpacked_data, group_elem_to_keywords

train_meta_dicts, tag_to_keyword_train = zip(*[parse_dcm_metadata(x) for x in tqdm(train_dcms)])
test_meta_dicts, tag_to_keyword_test = zip(*[parse_dcm_metadata(x) for x in tqdm(test_dcms)])

In [None]:
#tag_to_keyword_train[0]
train_meta_dicts[0]

In [None]:
# join all the dicts
unified_tag_to_key_train = {k:v for dict_ in tag_to_keyword_train for k,v in dict_.items()}
unified_tag_to_key_test = {k:v for dict_ in tag_to_keyword_test for k,v in dict_.items()}

# quick check to make sure there are no different keys between test/train
assert len(set(unified_tag_to_key_test.keys()).symmetric_difference(set(unified_tag_to_key_train.keys()))) == 0

tag_to_key = {**unified_tag_to_key_test, **unified_tag_to_key_train}
tag_to_key

In [None]:
# using from_records here since some values in the dicts will be iterables and some are constants
train_df = pd.DataFrame.from_dict(data=train_meta_dicts)
test_df = pd.DataFrame.from_dict(data=test_meta_dicts)
train_df['dataset'] = 'train'
test_df['dataset'] = 'test'
#df = pd.concat([train_df, test_df])
df = train_df
df2 = test_df

In [None]:
df

In [None]:
df2

In [None]:
#[1,0] for PA and [0,1] for AP
# y=df['SeriesDescription']=='view: PA'
# number_of_images = len(y)
# train_Y = np.zeros((number_of_images,2))
# for i in range(0,number_of_images):
#     if(y[i] == True):
#         train_Y[i] = [1,0]
#     else:
#         train_Y[i] = [0,1]
        
# train_Y


In [None]:
y=df['SeriesDescription']=='view: PA'
number_of_images = len(y)
train_Y = np.zeros(number_of_images)
for i in range(0,number_of_images):
    if(y[i] == True):
        train_Y[i] = 1
        
train_Y

In [None]:
train_Y.shape

In [None]:
# y_test = df2['SeriesDescription']=='view: PA'
# number_of_images = len(y_test)
# test_Y = np.zeros((number_of_images,2))
# for i in range(0,number_of_images):
#     if(y_test[i] == True):
#         test_Y[i] = [1,0]
#     else:
#         test_Y[i] = [0,1]
        
# test_Y

In [None]:
y_test = df2['SeriesDescription']=='view: PA'
number_of_images = len(y_test)
test_Y = np.zeros(number_of_images)
for i in range(0,number_of_images):
    if(y_test[i] == True):
        test_Y[i] = 1

test_Y       

In [None]:
import cv2
train_X=[]
for x in tqdm(train_dcm_fps):
    img = pydicom.read_file(x).pixel_array
    img = cv2.resize(img, (128, 128))
    img = img/255
    train_X.append(img)

In [None]:
train_X = np.array(train_X)

In [None]:
train_X[0]

In [None]:
plt.imshow(train_X[170],cmap = 'gray')

In [None]:
train_X_rgb = np.repeat(train_X[..., np.newaxis], 3, -1)
print(train_X_rgb.shape)  

In [None]:
test_X=[]
for x in tqdm(test_dcm_fps):
    img_test = pydicom.read_file(x).pixel_array
    img_test = cv2.resize(img_test, (128, 128))
    img_test = img_test/255
    test_X.append(img_test)

In [None]:
test_X = np.array(test_X)

In [None]:
test_X_rgb = np.repeat(test_X[..., np.newaxis], 3, -1)
print(test_X_rgb.shape)

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras import Model
from tensorflow.keras.applications.vgg16 import VGG16

model = tf.keras.applications.resnet50.ResNet50(include_top=False, weights='imagenet', input_shape=(128,128,3))
#model = VGG16(include_top=False, weights='imagenet', input_shape=(256,256,3))
x = Flatten() (model.output)
x = Dense(32) (x)
x = Dense(1, activation = 'sigmoid') (x)

model = Model(inputs=model.inputs,outputs=x)

In [None]:
model.summary()

In [None]:
# histories = []
# losses = []
# accuracies = []

model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
model.fit(train_X_rgb, train_Y,  epochs=30, validation_split = 0.15)
results = model.evaluate(test_X_rgb, test_Y)
results = dict(zip(model.metrics_names,results))

# histories.append(history)
# accuracies.append(results['seg_seg_binary_accuracy'])    
# losses.append(results['seg_loss'])

In [None]:
print(results)

In [None]:
pred = model.predict(test_X_rgb[:10])

In [None]:
pred

In [None]:
test_Y[:10]

In [None]:
for i in range(10):
    if(test_Y[i]) == 1:
        print("Label : PA")
    else:
        print("Label : AP")
    
    if(pred[i]) > 0.7:
        print("Prediction : PA")
    else:
        print("Prediction : AP")  
        
    print("Test Image")
    plt.imshow(test_X_rgb[i])
    
    plt.show()