Discuss how the first model behaves and the next steps

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from glob import glob
from tensorflow import keras
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications import VGG16
from tensorflow.keras import layers
from sklearn.preprocessing import OneHotEncoder
from skimage.transform import resize
import matplotlib.pyplot as plt

In [2]:
from util import getXY, dicom2df

In [3]:
# Loading data
rle_df = pd.read_csv('train-rle.csv')
rle_df.columns = ['ImageId', 'EncodedPixels']

In [4]:
train_file_list = sorted(glob('dicom-images-train/*/*/*.dcm'))
metadata_df = dicom2df(train_file_list, rle_df)

100%|██████████| 12089/12089 [01:18<00:00, 153.69it/s]


**Problem Here**: loading whole data-set would yield a memory error!

In [5]:
x, y = getXY(metadata_df, verbose=True)

12089it [14:20, 14.04it/s]


12089 images extracted of shape (256, 256, 3)
Found 2711 positive cases and 9378 negative cases


**Problem Here**: labels. According to the website "Images without pneumothorax have a mask value of -1." We set `label = 0` iff `'encoded_pixels_list' = ['-1']`

In [None]:
# A smaller data set?
x, y = getXY(metadata_df.iloc[0:1000], verbose=True)

In [6]:
x_train, x_test, y_train, y_test = train_test_split(
        x, y, test_size=0.2, random_state=9001)

In [7]:
y_train = OneHotEncoder().fit_transform(y_train.reshape(-1,1)).toarray()
y_test = OneHotEncoder().fit_transform(y_test.reshape(-1,1)).toarray()

In [9]:
base_model = VGG16(include_top=False, weights='imagenet', 
                   input_shape=(256, 256, 3))
base_model.trainable = False

inputs = tf.keras.Input(shape=(256, 256, 3))
x = base_model(inputs)
x = tf.keras.layers.Flatten()(x)
x = tf.keras.layers.Dense(256, activation='relu')(x)
x = tf.keras.layers.Dense(64, activation='relu')(x)
outputs = tf.keras.layers.Dense(2, activation='softmax')(x)
model = keras.Model(inputs, outputs)

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [11]:
for l in model.layers: print(l.name, l.trainable)
model.summary()

input_2 True
vgg16 False
flatten True
dense True
dense_1 True
dense_2 True
Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 256, 256, 3)]     0         
_________________________________________________________________
vgg16 (Model)                (None, 8, 8, 512)         14714688  
_________________________________________________________________
flatten (Flatten)            (None, 32768)             0         
_________________________________________________________________
dense (Dense)                (None, 256)               8388864   
_________________________________________________________________
dense_1 (Dense)              (None, 64)                16448     
_________________________________________________________________
dense_2 (Dense)              (None, 2)                 130       
Total params: 23,120,130
Trainable params: 8,405,442

In [12]:
model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=0.001),loss="binary_crossentropy",metrics=["accuracy"])

In [13]:
model.fit(x_train, y_train, batch_size=32, epochs=3)

Train on 9671 samples
Epoch 1/10
 736/9671 [=>............................] - ETA: 1:07:48 - loss: 0.5728 - accuracy: 0.7301

KeyboardInterrupt: 

In [None]:
pred = model.predict(x_train)
score = model.evaluate(x_test, y_test, verbose=0)
print(score[0],score[1])