In [30]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import zipfile

zip_ref = zipfile.ZipFile("/content/drive/My Drive/Colab Notebooks/Final Project/chest_xray.zip", 'r')

zip_ref.extractall("/tmp")
zip_ref.close()

In [3]:
import os
import keras

Using TensorFlow backend.


In [4]:
!ls "/tmp/chest_xray/"

test  train  val


In [16]:
base_dir = '/tmp/chest_xray/'

#the original dataset interchanged 'test' with 'validation,' with the latter being the 'holdout' dataset
# train_dir = os.path.join(base_dir, 'train')
# test_dir = os.path.join(base_dir, 'test')
# validation_dir = os.path.join(base_dir, 'val')

In [17]:
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras import backend as K

In [18]:
# dimensions of our images.
img_width, img_height = 150, 150

train_dir = os.path.join(base_dir, 'train')
test_dir = os.path.join(base_dir, 'test')
val_dir = os.path.join(base_dir, 'val')
nb_train_samples = 2000
nb_validation_samples = 800
epochs = 50
batch_size = 16

if K.image_data_format() == 'channels_first':
    input_shape = (3, img_width, img_height)
else:
    input_shape = (img_width, img_height, 1)

In [19]:
model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=input_shape))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(2))
model.add(Activation('sigmoid'))

model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

In [20]:
# this is the augmentation configuration we will use for training
train_datagen = ImageDataGenerator(
    rescale=1. / 255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True)

# this is the augmentation configuration we will use for testing:
# only rescaling
test_datagen = ImageDataGenerator(rescale=1. / 255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    color_mode='grayscale',
    class_mode='categorical')

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    color_mode='grayscale',
    class_mode='categorical')

Found 5216 images belonging to 2 classes.
Found 624 images belonging to 2 classes.


In [21]:
history = model.fit_generator(train_generator,
                              steps_per_epoch=nb_train_samples // batch_size,
                              epochs=epochs,
                              validation_data=test_generator,
                              validation_steps=nb_validation_samples // batch_size)


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [22]:
import pandas as pd
import numpy as np

In [23]:
history_df = pd.DataFrame(history.history)

history_df

Unnamed: 0,val_loss,val_accuracy,loss,accuracy
0,0.590772,0.68125,0.55269,0.7355
1,0.322793,0.8525,0.367792,0.847
2,0.323459,0.76,0.297791,0.881
3,0.262259,0.8275,0.304968,0.8845
4,0.634335,0.75,0.270081,0.89
5,0.806834,0.83875,0.261103,0.91
6,0.253095,0.835,0.247176,0.9075
7,0.385289,0.85125,0.226595,0.9165
8,1.011089,0.805,0.209623,0.932
9,0.463437,0.845,0.217738,0.9235


In [24]:
val_generator = test_datagen.flow_from_directory(
    val_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    color_mode='grayscale',
    class_mode='categorical')


test_generator.reset()

pred = model.predict_generator(val_generator,verbose=1)

predicted_class_indices=np.argmax(pred,axis=1)

labels = (val_generator.class_indices)
labels = dict((v,k) for k,v in labels.items())
predictions = [labels[k] for k in predicted_class_indices]

filenames=val_generator.filenames
results=pd.DataFrame({"Filename":filenames,
                      "Predictions":predictions})

print(results.to_string())

Found 16 images belonging to 2 classes.
                                   Filename Predictions
0          NORMAL/NORMAL2-IM-1427-0001.jpeg   PNEUMONIA
1          NORMAL/NORMAL2-IM-1430-0001.jpeg   PNEUMONIA
2          NORMAL/NORMAL2-IM-1431-0001.jpeg   PNEUMONIA
3          NORMAL/NORMAL2-IM-1436-0001.jpeg   PNEUMONIA
4          NORMAL/NORMAL2-IM-1437-0001.jpeg   PNEUMONIA
5          NORMAL/NORMAL2-IM-1438-0001.jpeg   PNEUMONIA
6          NORMAL/NORMAL2-IM-1440-0001.jpeg   PNEUMONIA
7          NORMAL/NORMAL2-IM-1442-0001.jpeg   PNEUMONIA
8   PNEUMONIA/person1946_bacteria_4874.jpeg   PNEUMONIA
9   PNEUMONIA/person1946_bacteria_4875.jpeg   PNEUMONIA
10  PNEUMONIA/person1947_bacteria_4876.jpeg      NORMAL
11  PNEUMONIA/person1949_bacteria_4880.jpeg      NORMAL
12  PNEUMONIA/person1950_bacteria_4881.jpeg   PNEUMONIA
13  PNEUMONIA/person1951_bacteria_4882.jpeg   PNEUMONIA
14  PNEUMONIA/person1952_bacteria_4883.jpeg   PNEUMONIA
15  PNEUMONIA/person1954_bacteria_4886.jpeg   PNEUMONIA


In [31]:
from tensorflow.keras.utils import plot_model
from tensorflow.keras.models import model_from_json

# save_path = '/content/drive/My Drive/Colab Notebooks/Final Project/models/chollet_imbalanced/'

# dot_img_file = '/content/drive/My Drive/Colab Notebooks/Final Project/images/Chollet_imbalanced.png'
# plot_model(model, to_file=dot_img_file, show_shapes=True)

results.to_csv('/content/drive/My Drive/Colab Notebooks/Final Project/models/chollet_imbalanced/chollet_imbal_eval.csv', index=False)
history_df.to_csv('/content/drive/My Drive/Colab Notebooks/Final Project/models/chollet_imbalanced/chollet_imbal_acc.csv', index=False)

model_json = model.to_json()
with open('/content/drive/My Drive/Colab Notebooks/Final Project/models/chollet_imbalanced/chollet_imbal_model.json', 'w') as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
model.save_weights('/content/drive/My Drive/Colab Notebooks/Final Project/models/chollet_imbalanced/chollet_imbal_model.h5')