# Trial 1

Architecture:

- 2 Convolutional Layers and 1 Fully-Connected Layer

Results:

- Best Validation Loss: 0.53 (Epoch 7/35)
- ~20 mins per epoch

In [None]:
import os
import pandas as pd
import math
import numpy as np
import tensorflow as tf

from datetime import datetime
from keras.models import Sequential
from keras.layers import Activation, BatchNormalization, Conv2D, Dense, Dropout, Flatten, MaxPooling2D
from keras import backend as K
from keras import regularizers
from keras.callbacks import Callback
from keras.callbacks import ModelCheckpoint
from keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator
from matplotlib.pyplot import imshow
from PIL import Image
from sklearn.metrics import roc_auc_score

K.tensorflow_backend._get_available_gpus()

In [None]:
datagen = ImageDataGenerator(rescale=1./255, horizontal_flip=True)

# The training set just has blanks instead of 0s
train_labels = pd.read_csv("../CheXpert-v1.0-small/train.csv").fillna(0)
train_labels["Path"] = '../' + train_labels["Path"]
validation_labels = pd.read_csv('../CheXpert-v1.0-small/valid.csv')
validation_labels["Path"] = '../' + validation_labels["Path"]

In [None]:
# Filter out Lateral images.  We'll train two models -> one for lateral and one for frontal

frontal_train_labels = train_labels[train_labels['Frontal/Lateral'] == 'Frontal']
frontal_validation_labels = validation_labels[validation_labels['Frontal/Lateral'] == 'Frontal']

# Filter out uncertains in the training dataset.  There are no uncertains in the validation dataset.
frontal_train_labels = frontal_train_labels[frontal_train_labels["Lung Opacity"] != -1.0]

In [None]:
train_datagen = datagen.flow_from_dataframe(dataframe=frontal_train_labels,
                                            directory=".",
                                            x_col="Path",
                                            y_col=['Lung Opacity'],
                                            class_mode = "raw",
                                            color_mode='grayscale',
                                            target_size=(100, 100),
                                            batch_size=32)
validation_datagen = datagen.flow_from_dataframe(dataframe=frontal_validation_labels,
                                            directory=".",
                                            x_col="Path",
                                            y_col=['Lung Opacity'],
                                            class_mode = "raw",
                                            color_mode='grayscale',
                                            target_size=(100, 100),
                                            batch_size=32)

In [None]:
classifier = Sequential()
classifier.add(Conv2D(32, (5, 5), input_shape=(200, 200, 3), activation='relu'))
classifier.add(MaxPooling2D(pool_size=(2, 2)))
classifier.add(Conv2D(64, (3, 3), activation='relu'))
classifier.add(MaxPooling2D(pool_size=(2, 2)))
classifier.add(Flatten())

classifier.add(Dense(activation="relu", units=128))
classifier.add(Dense(activation="sigmoid", units=1))

classifier.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

classifier.fit_generator(
    train_datagen,
    steps_per_epoch=5280,
    epochs=120,
    validation_data=validation_datagen,
    validation_steps=800,
    workers=4,
    verbose=2)