In [1]:
%load_ext tensorboard
import pandas as pd
from keras.preprocessing.image import ImageDataGenerator
from keras import Sequential, optimizers
from keras.layers import Conv2D, Activation, MaxPooling2D, Dropout, Flatten, Dense
import tensorflow as tf
from zipfile import ZipFile
from PIL import Image
from keras.callbacks import History
import matplotlib.pyplot as plt
from keras.callbacks import TensorBoard
import time
history = History()

In [2]:
#filename = 'train.zip'
#with ZipFile(filename, 'r') as zip:
#   print('Extracting files')
#    zip.extractall("./all_train_images")
#    print('Done')

#filename = 'test.zip'
#with ZipFile(filename, 'r') as zip:
#    print('Extracting files')
#    zip.extractall("./all_test_images")
#    print('Done')

In [3]:
labels = pd.read_csv("train_labels.csv")
labels["label"] = labels["label"].replace({0: '0', 1: '1'})
labels["id"] = labels["id"].astype(str) + '.tif'
labels.head()

Unnamed: 0,id,label
0,f38a6374c348f90b587e046aac6079959adf3835.tif,0
1,c18f2d887b7ae4f6742ee445113fa1aef383ed77.tif,1
2,755db6279dae599ebb4d39a9123cce439965282d.tif,0
3,bc3f0c64fb968ff4a8bd33af6971ecae77c75e08.tif,0
4,068aba587a4950175d04c680d38943fd488d6a9d.tif,0


In [4]:
train_datagen = ImageDataGenerator(
    rotation_range=15,
    rescale=1./255,
    shear_range=0.1,
    zoom_range=0.2,
    horizontal_flip=True,
    width_shift_range=0.1,
    height_shift_range=0.1,
    validation_split = 0.3
)

train_generator = train_datagen.flow_from_dataframe(
    labels, 
    "./all_train_images/train", 
    x_col='id',
    y_col='label',
    target_size=(96,96),
    class_mode='binary',
    batch_size=18,
    subset = 'training'
)#batch_size=36 got a 77% accuracy

validation_generator = train_datagen.flow_from_dataframe(
        labels,
        "./all_train_images/train",
        x_col='id',
        y_col='label',
        target_size=(96, 96),
        batch_size=18,
        class_mode='binary',
        subset='validation'
    )

Found 154018 validated image filenames belonging to 2 classes.
Found 66007 validated image filenames belonging to 2 classes.


In [5]:
model = Sequential()
model.add(Conv2D(32, (3, 3), padding='valid',
                 input_shape=(96,96,3)))
model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), padding='valid'))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))

In [6]:
NAME = 'malignant-cell-detection-cnn-{}'.format(int(time.time()))
tensorboard = TensorBoard(log_dir='./logs/{}'.format(NAME))

In [7]:
%tensorboard --logdir logs

Reusing TensorBoard on port 6006 (pid 8584), started 0:26:30 ago. (Use '!kill 8584' to kill it.)

In [8]:
model.compile(loss="binary_crossentropy", 
              metrics = ['accuracy', "Precision", "Recall"], 
              optimizer='adam')

In [9]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 94, 94, 32)        896       
_________________________________________________________________
activation (Activation)      (None, 94, 94, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 92, 92, 32)        9248      
_________________________________________________________________
activation_1 (Activation)    (None, 92, 92, 32)        0         
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 46, 46, 32)        0         
_________________________________________________________________
dropout (Dropout)            (None, 46, 46, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 44, 44, 64)        1

In [10]:
history = model.fit(train_generator, 
                    validation_data = validation_generator, 
                    epochs=10, shuffle = True, 
                    callbacks=[tensorboard])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [11]:
history.history

{'loss': [0.4582623243331909,
  0.39752790331840515,
  0.37003210186958313,
  0.3503762483596802,
  0.33863821625709534,
  0.33214983344078064,
  0.32497403025627136,
  0.32440462708473206,
  0.3204171359539032,
  0.3155997395515442],
 'accuracy': [0.7850576043128967,
  0.8237608671188354,
  0.8397979736328125,
  0.8494786620140076,
  0.8561336994171143,
  0.8591268658638,
  0.8641976714134216,
  0.8642236590385437,
  0.8663922548294067,
  0.8682621717453003],
 'precision': [0.7703388333320618,
  0.8107908368110657,
  0.8349007964134216,
  0.8471871018409729,
  0.8558348417282104,
  0.8598010540008545,
  0.8671750426292419,
  0.8661092519760132,
  0.8699688911437988,
  0.8724363446235657],
 'recall': [0.6691284775733948,
  0.7371675968170166,
  0.753764808177948,
  0.7669336795806885,
  0.7756808996200562,
  0.7795097827911377,
  0.7851810455322266,
  0.7865748405456543,
  0.7881287932395935,
  0.7905318737030029],
 'val_loss': [0.49683040380477905,
  0.36643916368484497,
  0.403103560