In [73]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

# Standard
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy import signal
from PIL import Image
import scipy

# # Tensorflow and Keras
from keras.datasets import mnist
from keras.models import Sequential, Model, Input
from keras.layers import Dense, Dropout, Activation
from keras.layers import Conv2D, MaxPooling2D, Flatten
from tensorflow.keras.optimizers import SGD
from keras.regularizers import l2
import tensorflow as tf

# Xception because other model not working
from tensorflow.keras.applications.xception import preprocess_input
from tensorflow.keras.applications import Xception
from tensorflow.keras.preprocessing.image import img_to_array, load_img
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import SGD, RMSprop

from keras_preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img

#os.chdir('../')
from src import image_preprocess as ip

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Start with histology data
for each 40X, 100X, 200X, 400X, have 644 items for benign and 1300 for malignant

Augment our benign data since we only have ~650 images

In [28]:
# Data is already fairly uniform. No major changes for augmentation
datagen = ImageDataGenerator(width_shift_range=0.1,
                            height_shift_range=0.1,
                            shear_range=0.1,
                            zoom_range=0.1,
                            horizontal_flip=True,
                            fill_mode='constant',
                            cval=0)

In [55]:
print(os.getcwd())
img = Image.open('data/Histology/100X/benign/SOB_B_A-14-22549AB-100-001.png')
x = img_to_array(img)
x.shape

/home/maureen/Documents/Galvanize/Capstone1/Capstone3/Cancer_Prediction


(460, 700, 3)

In [50]:
files = [f for f in os.listdir('data/Histology/100X/benign')]

for f in files:
    img = Image.open(os.path.join('data/Histology/100X/benign',f))
    x = ip.reshape_image(img)
    ip.create_new_images(x)    


In [40]:
root_dir = '/home/maureen/Documents/Galvanize/Capstone1/Capstone3/Cancer_Prediction'

## Make the model

In [93]:
# Taken frm cnn lectures 
import keras
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense

model = Sequential()
model.add(Conv2D(filters=8, kernel_size=(5, 5), strides=(1, 1),
                 activation='relu',
                 input_shape=(460, 700, 3),
                 kernel_regularizer=l2(0.01)))

model.add(MaxPooling2D(pool_size=(2, 2),
                       strides=(2, 2)))


model.add(Conv2D(filters=32, kernel_size=(5, 5),
                 strides=(1, 1),
                 activation='relu',
                 kernel_regularizer=l2(0.01)))

model.add(MaxPooling2D(pool_size=(2, 2),
                       strides=(2, 2)))

model.add(Flatten())
model.add(Dense(128,
                activation='relu',
                kernel_regularizer=l2(0.01)))
model.add(Dense(10,
                activation='softmax',
                kernel_regularizer=l2(0.01)))

model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adam(lr=0.001),
              metrics=['accuracy'])

In [None]:
model.fit(X_train[..., None], y_train,
          epochs=5,
          verbose=1,
          batch_size=50,
          validation_data=(X_test[..., None], y_test))

In [None]:
model.layers

In [None]:
weights1 = model.layers[0].get_weights()[0]
weights1.shape

In [74]:
# Try new model because previous isn't working
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Flatten, Dropout
from tensorflow.keras.models import Model

def create_transfer_model(input_size, n_categories, weights = 'imagenet'):
        # note that the "top" is not included in the weights below
        base_model = Xception(weights=weights,
                          include_top=False,
                          input_shape=input_size)
        
        model = base_model.output
        model = GlobalAveragePooling2D()(model)
        predictions = Dense(n_categories, activation='softmax')(model)
        model = Model(inputs=base_model.input, outputs=predictions)
        
        return model

In [81]:
model = create_transfer_model((460,700,3),2) 

In [82]:
def print_model_properties(model, indices = 0):
    for i, layer in enumerate(model.layers[indices:]):
        print(i+indices, layer.name,layer.trainable)

In [83]:
print_model_properties(model)

0 input_5 True
1 block1_conv1 True
2 block1_conv1_bn True
3 block1_conv1_act True
4 block1_conv2 True
5 block1_conv2_bn True
6 block1_conv2_act True
7 block2_sepconv1 True
8 block2_sepconv1_bn True
9 block2_sepconv2_act True
10 block2_sepconv2 True
11 block2_sepconv2_bn True
12 conv2d_16 True
13 block2_pool True
14 batch_normalization_16 True
15 add_48 True
16 block3_sepconv1_act True
17 block3_sepconv1 True
18 block3_sepconv1_bn True
19 block3_sepconv2_act True
20 block3_sepconv2 True
21 block3_sepconv2_bn True
22 conv2d_17 True
23 block3_pool True
24 batch_normalization_17 True
25 add_49 True
26 block4_sepconv1_act True
27 block4_sepconv1 True
28 block4_sepconv1_bn True
29 block4_sepconv2_act True
30 block4_sepconv2 True
31 block4_sepconv2_bn True
32 conv2d_18 True
33 block4_pool True
34 batch_normalization_18 True
35 add_50 True
36 block5_sepconv1_act True
37 block5_sepconv1 True
38 block5_sepconv1_bn True
39 block5_sepconv2_act True
40 block5_sepconv2 True
41 block5_sepconv2_bn Tru

## Change head

In [85]:
def change_trainable_layers(model, trainable_index):
    for layer in model.layers[:trainable_index]:
        layer.trainable = False
    for layer in model.layers[trainable_index:]:
        layer.trainable = True

In [88]:
_ = change_trainable_layers(model, 132)
print_model_properties(model, 130)

130 block14_sepconv2_bn False
131 block14_sepconv2_act False
132 global_average_pooling2d_4 True
133 dense_2 True


In [87]:
# Compile model
model.compile(optimizer=RMSprop(lr=0.0005), loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
# Train model
model.fit(X_train[..., None], y_train,
          epochs=5,
          verbose=1,
          batch_size=50,
          validation_data=(X_test[..., None], y_test))

## Try fast AI

In [95]:
from fastai.vision import *

In [100]:
path = Path('data/Histology/100X')

In [102]:
# Go through folder, resize anything larger than 500 pix
for folder in ['benign', 'malignant']:
    print(folder)
    verify_images(path/folder, delete=True, max_size=500)

benign


malignant


In [33]:
## For Mammograms

In [None]:
# Data is already fairly uniform. No major changes for augmentation
datagen = ImageDataGenerator(rotation_range=5,
                            width_shift_range=0.1,
                            height_shift_range=0.1,
                            shear_range=0.1,
                            zoom_range=0.1,
                            horizontal_flip=True,
                            fill_mode='constant',
                            cval=0)