In [None]:
import os
import shutil
import pydicom
import matplotlib.pyplot as plt
import scipy.io
import numpy as np
import cv2
from PIL import Image
import pandas as pd
import gc
from tqdm import tqdm


import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers
from tensorflow.keras import applications
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint
from tensorflow.keras.layers import Dense, Conv2D , MaxPool2D , Flatten , Dropout , BatchNormalization

from sklearn.model_selection import RepeatedKFold, cross_val_score, train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score

In [None]:
ds = pydicom.dcmread("/kaggle/input/rsna-str-pulmonary-embolism-detection/train/0003b3d648eb/d2b2960c2bbf/00ac73cfc372.dcm")
dcm_sample=ds.pixel_array.astype('float32')
scaled_image = (np.maximum(dcm_sample, 0) / dcm_sample.max())
plt.imshow(scaled_image)

In [None]:
#not_noraml
df = pd.read_csv("/kaggle/input/rsna-str-pulmonary-embolism-detection/train.csv")
df = df.loc[df["pe_present_on_image"]==1,:].reset_index(drop=True)
print(len(df))
df.tail()

In [None]:
#normal
df1 = pd.read_csv("/kaggle/input/rsna-str-pulmonary-embolism-detection/train.csv")
df1 = df1.loc[(df1["pe_present_on_image"] == 0) & (df1["negative_exam_for_pe"] == 1) ,:].reset_index(drop=True)
print(len(df1))
df1.head()

In [None]:
!mkdir /kaggle/data

!mkdir /kaggle/data/train
!mkdir /kaggle/data/valid
!mkdir /kaggle/data/test

!mkdir /kaggle/data/train/normal
!mkdir /kaggle/data/train/not_normal

!mkdir /kaggle/data/valid/normal
!mkdir /kaggle/data/valid/not_normal

!mkdir /kaggle/data/test/normal
!mkdir /kaggle/data/test/not_normal

In [None]:
#Train not_normal
for i in tqdm(range(10000)):
    dcm = pydicom.dcmread("/kaggle/input/rsna-str-pulmonary-embolism-detection/train/"+df.loc[i,'StudyInstanceUID']+'/'+df.loc[i,'SeriesInstanceUID']+'/'+df.loc[i,'SOPInstanceUID']+'.dcm')
    dc_image=dcm.pixel_array.astype('float32')
    #scaled_image = (np.maximum(dc_image, 0) / dc_image.max())
    #scaled_image = np.reshape(scaled_image,(scaled_image.shape[0], scaled_image.shape[1], 1))
    im = Image.fromarray(dc_image).convert('RGB').resize((256,256))  
    im.save("/kaggle/data/train/not_normal/"+str(i)+".jpg")
    del dcm, dc_image, im
    gc.collect()

In [None]:
#Train normal
for i in tqdm(range(10000)):
    dcm = pydicom.dcmread("/kaggle/input/rsna-str-pulmonary-embolism-detection/train/"+df1.loc[i,'StudyInstanceUID']+'/'+df1.loc[i,'SeriesInstanceUID']+'/'+df1.loc[i,'SOPInstanceUID']+'.dcm')
    dc_image = dcm.pixel_array.astype('float32')
    #scaled_image = (np.maximum(dc_image, 0) / dc_image.max())
    #scaled_image = np.reshape(scaled_image,(scaled_image.shape[0], scaled_image.shape[1], 1))
    im = Image.fromarray(dc_image).convert('RGB').resize((256,256))  
    im.save("/kaggle/data/train/normal/"+str(i)+".jpg")
    del dcm, dc_image, im
    gc.collect()

In [None]:
#Valid not_normal
for i in tqdm(range(10000,12000)):
    dcm = pydicom.dcmread("/kaggle/input/rsna-str-pulmonary-embolism-detection/train/"+df.loc[i,'StudyInstanceUID']+'/'+df.loc[i,'SeriesInstanceUID']+'/'+df.loc[i,'SOPInstanceUID']+'.dcm')
    dc_image=dcm.pixel_array.astype('float32')
    #scaled_image = (np.maximum(dc_image, 0) / dc_image.max())
    #scaled_image = np.reshape(scaled_image,(scaled_image.shape[0], scaled_image.shape[1], 1))
    im = Image.fromarray(dc_image).convert('RGB').resize((256,256))  
    im.save("/kaggle/data/valid/not_normal/"+str(i)+".jpg")
    del dcm, dc_image, im
    gc.collect()

In [None]:
#Valid normal
for i in tqdm(range(10000,12000)):
    dcm = pydicom.dcmread("/kaggle/input/rsna-str-pulmonary-embolism-detection/train/"+df1.loc[i,'StudyInstanceUID']+'/'+df1.loc[i,'SeriesInstanceUID']+'/'+df1.loc[i,'SOPInstanceUID']+'.dcm')
    dc_image=dcm.pixel_array.astype('float32')
    #scaled_image = (np.maximum(dc_image, 0) / dc_image.max())
    #scaled_image = np.reshape(scaled_image,(scaled_image.shape[0], scaled_image.shape[1], 1))
    im = Image.fromarray(dc_image).convert('RGB').resize((256,256))  
    im.save("/kaggle/data/valid/normal/"+str(i)+".jpg")
    del dcm, dc_image, im
    gc.collect()

In [None]:
#Test not_normal
for i in tqdm(range(12000,14000)):
    dcm = pydicom.dcmread("/kaggle/input/rsna-str-pulmonary-embolism-detection/train/"+df.loc[i,'StudyInstanceUID']+'/'+df.loc[i,'SeriesInstanceUID']+'/'+df.loc[i,'SOPInstanceUID']+'.dcm')
    dc_image=dcm.pixel_array.astype('float32')
    #scaled_image = (np.maximum(dc_image, 0) / dc_image.max())
    #scaled_image = np.reshape(scaled_image,(scaled_image.shape[0], scaled_image.shape[1], 1))
    im = Image.fromarray(dc_image).convert('RGB').resize((256,256))  
    im.save("/kaggle/data/test/not_normal/"+str(i)+".jpg")
    del dcm, dc_image, im
    gc.collect()

In [None]:
#Test normal
for i in tqdm(range(12000,14000)):
    dcm = pydicom.dcmread("/kaggle/input/rsna-str-pulmonary-embolism-detection/train/"+df1.loc[i,'StudyInstanceUID']+'/'+df1.loc[i,'SeriesInstanceUID']+'/'+df1.loc[i,'SOPInstanceUID']+'.dcm')
    dc_image=dcm.pixel_array.astype('float32')
    #scaled_image = (np.maximum(dc_image, 0) / dc_image.max())
    #scaled_image = np.reshape(scaled_image,(scaled_image.shape[0], scaled_image.shape[1], 1))
    im = Image.fromarray(dc_image).convert('RGB').resize((256,256))  
    im.save("/kaggle/data/test/normal/"+str(i)+".jpg")
    del dcm, dc_image, im
    gc.collect()

In [None]:
train_datagen = ImageDataGenerator(
      featurewise_center=False,  
      samplewise_center=False, 
      featurewise_std_normalization=False,  
      samplewise_std_normalization=False, 
      rescale=1./255,
      rotation_range=20,
      width_shift_range=0.2,
      height_shift_range=0.2,
      shear_range=0.2,
      zoom_range=0.2,
      horizontal_flip=True,
      vertical_flip=True,
      fill_mode='nearest')

In [None]:
train_generator = train_datagen.flow_from_directory(
        '/kaggle/data/train',
        target_size=(256, 256),
        batch_size=64,
        class_mode='binary')

In [None]:
valid_datagen = ImageDataGenerator(
      rescale=1./255)

In [None]:
valid_generator = valid_datagen.flow_from_directory(
        '/kaggle/data/valid',
        target_size=(256, 256),
        batch_size=64,
        class_mode='binary')

In [None]:
test_datagen = ImageDataGenerator(
      rescale=1./255)

In [None]:
test_generator = valid_datagen.flow_from_directory(
        '/kaggle/data/test',
        target_size=(256, 256),
        batch_size=64,
        class_mode='binary')

In [None]:
"""tpu = tf.distribute.cluster_resolver.TPUClusterResolver.connect()

tpu_strategy = tf.distribute.experimental.TPUStrategy(tpu)"""

In [None]:
model = Sequential()
model.add(Conv2D(32 , (3,3) , strides = 1 , padding = 'same' , activation = 'relu' , input_shape = ( 256, 256, 3)))
model.add(BatchNormalization())
model.add(MaxPool2D((2,2) , strides = 2 , padding = 'same'))
model.add(Conv2D(64 , (3,3) , strides = 1 , padding = 'same' , activation = 'relu'))
model.add(Dropout(0.1))
model.add(BatchNormalization())
model.add(MaxPool2D((2,2) , strides = 2 , padding = 'same'))
model.add(Conv2D(64 , (3,3) , strides = 1 , padding = 'same' , activation = 'relu'))
model.add(BatchNormalization())
model.add(MaxPool2D((2,2) , strides = 2 , padding = 'same'))
model.add(Conv2D(128 , (3,3) , strides = 1 , padding = 'same' , activation = 'relu'))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(MaxPool2D((2,2) , strides = 2 , padding = 'same'))
model.add(Conv2D(256 , (3,3) , strides = 1 , padding = 'same' , activation = 'relu'))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(MaxPool2D((2,2) , strides = 2 , padding = 'same'))
model.add(Flatten())
model.add(Dense(units = 128 , activation = 'relu'))
model.add(Dropout(0.2))
model.add(Dense(units = 1 , activation = 'sigmoid'))

In [None]:
!mkdir /kaggle/models

In [None]:
loss = tf.keras.losses.BinaryCrossentropy()
model.compile(loss=loss, 
              optimizer='Adam', 
              metrics=['binary_accuracy'])

learning_rate_reduction = ReduceLROnPlateau(monitor='val_binary_accuracy', patience = 2, verbose=1,factor=0.3, min_lr=0.000001)

filepath = "/kaggle/models/saved-model-{epoch:02d}-{val_binary_accuracy:.2f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, 
                             save_best_only=False,save_freq='epoch')


In [None]:
history = model.fit_generator(
      train_generator,
      epochs=25,
      validation_data=valid_generator,
      validation_steps=4,
      callbacks=[checkpoint,learning_rate_reduction],
      verbose=1,
    
      )

In [None]:
plt.plot(history.history['binary_accuracy'], label='The score of correct predictions on the training set')
plt.plot(history.history['val_binary_accuracy'], label='The score of correct predictions on the val set')
plt.xlabel('Epoch')
plt.ylabel('Score correct answers')
plt.legend()
plt.show()

In [None]:
best_acc = 0
best_model = ""
for i in os.listdir("/kaggle/models"):
    model.load_weights("/kaggle/models/"+i)
    loss, acc = model.evaluate_generator(test_generator, steps=3, verbose=0)
    if acc > best_acc:
        best_model = i
        best_acc = acc

In [None]:
model.load_weights("/kaggle/models/"+best_model)
loss, acc = model.evaluate_generator(test_generator, steps=3, verbose=0)
acc = acc *100
print(f"accuracy is: {acc:.2f}%")