In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [2]:
!pip install pydicom

Collecting pydicom
[?25l  Downloading https://files.pythonhosted.org/packages/f4/15/df16546bc59bfca390cf072d473fb2c8acd4231636f64356593a63137e55/pydicom-2.1.2-py3-none-any.whl (1.9MB)
[K     |████████████████████████████████| 1.9MB 4.3MB/s 
[?25hInstalling collected packages: pydicom
Successfully installed pydicom-2.1.2


In [3]:
import numpy as np
import matplotlib.pyplot as plt
import pydicom
from pydicom.data import get_testdata_file
import pandas as pd

In [4]:
!unzip -q "/content/gdrive/My Drive/GL_AIML/Kaggle/rsna-pneumonia-detection-challenge.zip"

In [5]:
train_files_path = '/content/stage_2_train_images'
test_files_path = '/content/stage_2_test_images'

In [6]:
import os
train_file_names = os.listdir(train_files_path)
test_file_names = os.listdir(test_files_path)

In [7]:
print('Number of training images we have: '+ str(len(train_file_names)))
print('Number of testing images we have: '+ str(len(test_file_names)))

Number of training images we have: 26684
Number of testing images we have: 3000


In [8]:
train_patient_ids = []
for file_name in train_file_names:
    name,extn = file_name.split('.')
    train_patient_ids.append(name)
    if(extn != 'dcm'):
        print(name+' is not a dcm file')

In [9]:
test_patient_ids = []
for file_name in test_file_names:
    name,extn = file_name.split('.')
    test_patient_ids.append(name)
    if(extn != 'dcm'):
        print(name+' is not a dcm file')

In [10]:
train_patient_ids.sort()
test_patient_ids.sort()

In [11]:
train_labels_df = pd.read_csv('/content/stage_2_train_labels.csv',index_col = 'patientId')

In [12]:
train_labels_df.sort_values("patientId", inplace = True)

In [13]:
train_labels_df.head(10)

Unnamed: 0_level_0,x,y,width,height,Target
patientId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0004cfab-14fd-4e49-80ba-63a80b6bddd6,,,,,0
000924cf-0f8d-42bd-9158-1af53881a557,,,,,0
000db696-cf54-4385-b10b-6b16fbb3f985,316.0,318.0,170.0,478.0,1
000db696-cf54-4385-b10b-6b16fbb3f985,660.0,375.0,146.0,402.0,1
000fe35a-2649-43d4-b027-e67796d412e0,570.0,282.0,269.0,409.0,1
000fe35a-2649-43d4-b027-e67796d412e0,83.0,227.0,296.0,438.0,1
001031d9-f904-4a23-b3e5-2c088acd19c6,66.0,160.0,373.0,608.0,1
001031d9-f904-4a23-b3e5-2c088acd19c6,552.0,164.0,376.0,676.0,1
0010f549-b242-4e94-87a8-57d79de215fc,,,,,0
001916b8-3d30-4935-a5d1-8eaddb1646cd,198.0,375.0,114.0,206.0,1


In [14]:
train_labels_df.shape

(30227, 5)

In [15]:
train_labels_df2 = train_labels_df.copy(deep=True)
train_labels_df2.reset_index(inplace=True)
patient_classfification = train_labels_df2.drop_duplicates(subset=["patientId"])

In [16]:
patient_classfification.set_index('patientId',inplace=True)

In [17]:
patient_classfification.head(10)

Unnamed: 0_level_0,x,y,width,height,Target
patientId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0004cfab-14fd-4e49-80ba-63a80b6bddd6,,,,,0
000924cf-0f8d-42bd-9158-1af53881a557,,,,,0
000db696-cf54-4385-b10b-6b16fbb3f985,316.0,318.0,170.0,478.0,1
000fe35a-2649-43d4-b027-e67796d412e0,570.0,282.0,269.0,409.0,1
001031d9-f904-4a23-b3e5-2c088acd19c6,66.0,160.0,373.0,608.0,1
0010f549-b242-4e94-87a8-57d79de215fc,,,,,0
001916b8-3d30-4935-a5d1-8eaddb1646cd,198.0,375.0,114.0,206.0,1
0022073f-cec8-42ec-ab5f-bc2314649235,161.0,230.0,223.0,486.0,1
0022995a-45eb-4cfa-9a59-cd15f5196c64,,,,,0
0025d2de-bd78-4d36-9f72-e15a5e22ca82,,,,,0


In [18]:
patient_classfification.drop(columns=['x','y','width','height'],inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


In [19]:
patient_classfification.shape

(26684, 1)

In [20]:
len(train_patient_ids)

26684

In [21]:
patient_classfification.loc[str('000db696-cf54-4385-b10b-6b16fbb3f985')].values[0]

1

In [22]:
import cv2
count=0
for i in range(len(train_patient_ids)):
  print('Processing image:'+str(i))
  patient_id = train_patient_ids[i]
  img_file_name = train_files_path+'/'+patient_id+'.dcm'
  img_dicom = pydicom.dcmread(img_file_name)
  img = img_dicom.pixel_array
  res_img = cv2.resize(img, dsize=(224, 224), interpolation=cv2.INTER_CUBIC)  
  res_img_col=cv2.merge(( res_img,res_img,res_img))
  if(patient_classfification.loc[str(patient_id)].values[0]==0):
    cv2.imwrite('/content/gdrive/MyDrive/GL_AIML/Capstone_Project/Data_for_DenseNet/train_dataset/class_0/'+patient_id+'.jpg', res_img_col)
    count=count+1
  if(patient_classfification.loc[str(patient_id)].values[0]==1):
    cv2.imwrite('/content/gdrive/MyDrive/GL_AIML/Capstone_Project/Data_for_DenseNet/train_dataset/class_1/'+patient_id+'.jpg', res_img_col)
    count=count+1
print('Saved Images:'+str(count))

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Processing image:21685
Processing image:21686
Processing image:21687
Processing image:21688
Processing image:21689
Processing image:21690
Processing image:21691
Processing image:21692
Processing image:21693
Processing image:21694
Processing image:21695
Processing image:21696
Processing image:21697
Processing image:21698
Processing image:21699
Processing image:21700
Processing image:21701
Processing image:21702
Processing image:21703
Processing image:21704
Processing image:21705
Processing image:21706
Processing image:21707
Processing image:21708
Processing image:21709
Processing image:21710
Processing image:21711
Processing image:21712
Processing image:21713
Processing image:21714
Processing image:21715
Processing image:21716
Processing image:21717
Processing image:21718
Processing image:21719
Processing image:21720
Processing image:21721
Processing image:21722
Processing image:21723
Processing image:21724
Processing imag

In [1]:
import os
train_files_path_0 = '/content/gdrive/MyDrive/GL_AIML/Capstone_Project/Data_for_DenseNet/train_dataset/class_0/'
tf= os.listdir(train_files_path_0)
print(len(tf))

20672


In [2]:
import os
train_files_path_1 = '/content/gdrive/MyDrive/GL_AIML/Capstone_Project/Data_for_DenseNet/train_dataset/class_1/'
tf= os.listdir(train_files_path_1)
print(len(tf))

6012


In [5]:
from PIL import Image
import numpy as np
from tensorflow.keras.applications.densenet import preprocess_input
tfp1 = os.listdir(train_files_path_0)
X_train = []
count=0
for i in tfp1:
  count=count+1
  img =Image.open(train_files_path_0+i)
  X_train.append(preprocess_input(np.array(img,dtype=np.float32)))
  if(count==10000):
    break
  

In [6]:
tfp2 = os.listdir(train_files_path_1)
for i in tfp2:
  img =Image.open(train_files_path_1+i)
  X_train.append(preprocess_input(np.array(img,dtype=np.float32)))

In [7]:
len(X_train)

16012

In [8]:
X_train = np.array(X_train)

In [9]:
X_train.shape

(16012, 224, 224, 3)

In [20]:
y_train = np.zeros(16012)
for i in range(10000,16012):
  y_train[i] = 1.0

In [21]:
y_train.shape

(16012,)

In [22]:
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt

from tensorflow.keras.layers import Dense,GlobalAveragePooling2D,Convolution2D,BatchNormalization
from tensorflow.keras.layers import Flatten,MaxPooling2D,Dropout

from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.applications.densenet import preprocess_input

from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator,img_to_array

from tensorflow.keras.models import Model

from tensorflow.keras.optimizers import Adam

from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau

import warnings
warnings.filterwarnings("ignore")

In [23]:
model_d=DenseNet121(weights='imagenet',include_top=False, input_shape=(224, 224, 3)) 
for layer in model_d.layers[:-10]:
    layer.trainable = False
for layer in model_d.layers[-10:]:
    layer.trainable = True
x=model_d.output

x= GlobalAveragePooling2D()(x)
x= BatchNormalization()(x)
x= Dropout(0.5)(x)
x= Dense(1024,activation='relu')(x) 
x= Dense(512,activation='relu')(x) 
x= BatchNormalization()(x)
x= Dropout(0.5)(x)

preds=Dense(1,activation='sigmoid')(x) #FC-layer

In [24]:

model=Model(inputs=model_d.input,outputs=preds)
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
zero_padding2d_2 (ZeroPadding2D (None, 230, 230, 3)  0           input_2[0][0]                    
__________________________________________________________________________________________________
conv1/conv (Conv2D)             (None, 112, 112, 64) 9408        zero_padding2d_2[0][0]           
__________________________________________________________________________________________________
conv1/bn (BatchNormalization)   (None, 112, 112, 64) 256         conv1/conv[0][0]                 
____________________________________________________________________________________________

In [25]:
from tensorflow.keras.optimizers import Adam
optimizer = Adam(learning_rate=1e-3, beta_1=0.9, beta_2=0.999, epsilon=1e-8, decay=0.0, amsgrad=False)
model.compile(optimizer=optimizer,loss='binary_crossentropy',metrics=['accuracy'])

In [26]:
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

checkpoint = ModelCheckpoint("/content/drive/MyDrive/GL_AIML/Capstone_Project/densenet_model.h5", monitor="accuracy", verbose=1, save_best_only=True, save_weights_only=True)

stop = EarlyStopping(monitor="accuracy", patience=5)

In [27]:
#X_train = X_train.reshape(-1, 224, 224, 3)
#import tensorflow
#y_train = tensorflow.keras.utils.to_categorical(y_train)
model.fit(X_train, y_train,epochs=20, batch_size=32, verbose=1)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7f35fc9c6f50>