In [19]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import display # Allows the use of display() for DataFrames
import seaborn as sb
%matplotlib inline

import keras
from keras import optimizers
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential, Model
from keras.layers import Input, Conv2D, MaxPooling2D, Activation, Dropout, Flatten, Dense, Add, Concatenate
from keras.layers.normalization import BatchNormalization
from keras.callbacks import ModelCheckpoint

In [2]:
## loading from dataframe https://medium.com/@vijayabhaskar96/tutorial-on-keras-flow-from-dataframe-1fd4493d237c

def generate_dataframe_from_csv(path):
    data = pd.read_csv(path)
    columns = (data.apply(lambda r: pd.Series(gen_image_paths(r)), axis=1)
        .stack()
        .rename("img_path")
        .reset_index(level=1, drop=True))
    data["sirna"] = data["sirna"].apply(lambda s: str(s))
    return data.join(columns).reset_index(drop=True)

def gen_image_paths(row):
    path_root = f"train/{row['experiment']}/Plate{row['plate']}/{row['well']}"
    return [f"{path_root}_s{site}_w{image}.png" for site in range(1, 3) for image in range(1,7)]

In [3]:
# from main import generate_dataframe_from_csv

df = generate_dataframe_from_csv("train.csv")
display(df)

Unnamed: 0,id_code,experiment,plate,well,sirna,img_path
0,HEPG2-01_1_B03,HEPG2-01,1,B03,513,train/HEPG2-01/Plate1/B03_s1_w1.png
1,HEPG2-01_1_B03,HEPG2-01,1,B03,513,train/HEPG2-01/Plate1/B03_s1_w2.png
2,HEPG2-01_1_B03,HEPG2-01,1,B03,513,train/HEPG2-01/Plate1/B03_s1_w3.png
3,HEPG2-01_1_B03,HEPG2-01,1,B03,513,train/HEPG2-01/Plate1/B03_s1_w4.png
4,HEPG2-01_1_B03,HEPG2-01,1,B03,513,train/HEPG2-01/Plate1/B03_s1_w5.png
5,HEPG2-01_1_B03,HEPG2-01,1,B03,513,train/HEPG2-01/Plate1/B03_s1_w6.png
6,HEPG2-01_1_B03,HEPG2-01,1,B03,513,train/HEPG2-01/Plate1/B03_s2_w1.png
7,HEPG2-01_1_B03,HEPG2-01,1,B03,513,train/HEPG2-01/Plate1/B03_s2_w2.png
8,HEPG2-01_1_B03,HEPG2-01,1,B03,513,train/HEPG2-01/Plate1/B03_s2_w3.png
9,HEPG2-01_1_B03,HEPG2-01,1,B03,513,train/HEPG2-01/Plate1/B03_s2_w4.png


In [4]:
train_datagen = ImageDataGenerator(
        rescale=1./255,
#         shear_range=0.2,
#         zoom_range=0.2,
#         horizontal_flip=True,
        validation_split=0.25)

train_generator = train_datagen.flow_from_dataframe(
        df,
        directory="./",
        x_col="img_path",
        y_col="sirna",
        target_size=(224, 224),
        batch_size=32,
        subset="training",
        class_mode='categorical')

valid_generator = train_datagen.flow_from_dataframe(
        df,
        directory="./",
        x_col="img_path",
        y_col="sirna",
        target_size=(224, 224),
        batch_size=32,
        subset="validation",
        class_mode='categorical')

Found 328635 images belonging to 1108 classes.
Found 109545 images belonging to 1108 classes.


In [43]:
# https://www.pyimagesearch.com/2019/02/04/keras-multiple-inputs-and-mixed-data/
from keras.backend import concatenate

# def build_cnn_layer(shape=(150,150,3,)):
#     inputlayer = Input(shape=shape, name="inputlayer")
#     x = Conv2D(filters=32, kernel_size=3, padding='same')(inputlayer)
#     x = BatchNormalization()(x)
#     x = Activation("relu")(x)
#     x = MaxPooling2D(pool_size=4)(x)
#     x = Conv2D(filters=64, kernel_size=3, padding='same')(x)
# #     model = Model(inputs=inputlayer, outputs=x)
#     return x #model

# def build_sequential_layer(previous_layers):
# #     combined = concatenate([x.output for x in previous_layers])
# #     combined = BatchNormalization()(combined)
# #     combined = Activation("relu")(combined)
# #     combined = Flatten()(combined)
# #     combined = Dense(1108, activation="softmax")(combined)
# #     return combined
#     combined = Add()(previous_layers)
# #     combined = BatchNormalization()(combined)
#     combined = Activation("relu")(combined)
# #     combined = Flatten()(combined)
#     combined = Dense(1108, activation="softmax")(combined)
#     return combined

# def build_model():
#     cnn_layers = []
#     for i in range(0,6):
#         layer = build_cnn_layer()
#         print(layer)
#         cnn_layers.append(layer)
    
# #     model = Model(inputs=[x.input for x in cnn_layers], outputs=build_sequential_layer(cnn_layers))
#     model = Model(inputs=cnn_layers, outputs=build_sequential_layer(cnn_layers))
#     optimizer = optimizers.Adam()    
#     model.compile(optimizer, loss='categorical_crossentropy')
#     return model


def build_cnn_layer(i, shape=(150,150,3,)):
    name = f"inputlayer_{i}"
    inputlayer = Input(shape=shape, name=name)
    x = Conv2D(filters=32, kernel_size=3, padding='same')(inputlayer)
    x = BatchNormalization(name=f"bn_cnn_{i}")(x)
    x = Activation("relu")(x)
    x = MaxPooling2D(pool_size=4)(x)
    x = Conv2D(filters=64, kernel_size=3, padding='same')(x)
    model = Model(inputs=inputlayer, outputs=x)
    return model

def build_sequential_layer(previous_layers):
    combined = Concatenate()([x.output for x in previous_layers])
    combined = BatchNormalization(name="batch_norm_1")(combined)
#     combined = Activation("relu", name="act_layer")(combined)
#     combined = Flatten(name="flattener")(combined)
#     z = Dense(2000, activation="softmax")(combined)
    z = Dense(1108, activation="softmax")(combined)
    return z
#     combined = Add()(previous_layers)
#     combined = BatchNormalization()(combined)
#     combined = Activation("relu")(combined)
#     combined = Flatten()(combined)
#     combined = Dense(1108, activation="softmax")(combined)
#     return combined

def build_model():
    cnn_layers = []
    for i in range(0,6):
        layer = build_cnn_layer(i)
        cnn_layers.append(layer)

    output_layer = build_sequential_layer(cnn_layers)
    print(output_layer)
    model = Model(inputs=[x.input for x in cnn_layers], outputs=output_layer)
#     model = Model(inputs=cnn_layers, outputs=build_sequential_layer(cnn_layers))
    optimizer = optimizers.Adam()    
    model.compile(optimizer, loss='categorical_crossentropy')
    return model

In [44]:
model = build_model()
model.summary()

Tensor("dense_15/truediv:0", shape=(?, 37, 37, 1108), dtype=float32)
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
inputlayer_0 (InputLayer)       (None, 150, 150, 3)  0                                            
__________________________________________________________________________________________________
inputlayer_1 (InputLayer)       (None, 150, 150, 3)  0                                            
__________________________________________________________________________________________________
inputlayer_2 (InputLayer)       (None, 150, 150, 3)  0                                            
__________________________________________________________________________________________________
inputlayer_3 (InputLayer)       (None, 150, 150, 3)  0                                            
________________________________________

In [10]:
## Simple model
# model = Sequential([
#     Conv2D(filters=32, kernel_size=2, padding='same', activation='relu', input_shape=(150, 150, 3)),
#     MaxPooling2D(pool_size=2),
#     Conv2D(filters=64, kernel_size=2, padding='same', activation='relu'),
#     MaxPooling2D(pool_size=2),
#     Conv2D(filters=128, kernel_size=2, padding='same', activation='relu'),
#     MaxPooling2D(pool_size=2),
#     Flatten(),
#     Dense(300, activation="relu"),
#     Dropout(.4),
#     Dense(1108, activation="softmax")    
# ])
## Model from dog project
model = Sequential([
    BatchNormalization(input_shape=(224, 224, 3)),
    Conv2D(filters=32, kernel_size=3, padding='same'),
    BatchNormalization(),
    Activation("relu"),
    MaxPooling2D(pool_size=4),
    Conv2D(filters=64, kernel_size=3, padding='same'),
    BatchNormalization(),
    Activation("relu"),
    Flatten(),
    Dense(1108, activation="softmax") 
])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
batch_normalization_4 (Batch (None, 224, 224, 3)       12        
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 224, 224, 32)      896       
_________________________________________________________________
batch_normalization_5 (Batch (None, 224, 224, 32)      128       
_________________________________________________________________
activation_3 (Activation)    (None, 224, 224, 32)      0         
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 56, 56, 32)        0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 56, 56, 64)        18496     
_________________________________________________________________
batch_normalization_6 (Batch (None, 56, 56, 64)        256       
__________

In [11]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
STEP_SIZE_VALID=valid_generator.n//valid_generator.batch_size

In [12]:
print(STEP_SIZE_TRAIN, STEP_SIZE_VALID)

10269 3423


In [13]:
epochs = 10

checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.from_scratch.hdf5', 
                               verbose=1, save_best_only=True)

model.fit_generator(train_generator, 
                    steps_per_epoch=15,
                    validation_data=valid_generator,
                    validation_steps=15,                    
                    epochs=epochs, callbacks=[checkpointer], verbose=1)

Epoch 1/10

Epoch 00001: val_loss improved from inf to 16.08452, saving model to saved_models/weights.best.from_scratch.hdf5
Epoch 2/10

Epoch 00002: val_loss did not improve from 16.08452
Epoch 3/10

Epoch 00003: val_loss did not improve from 16.08452
Epoch 4/10

Epoch 00004: val_loss did not improve from 16.08452
Epoch 5/10

Epoch 00005: val_loss did not improve from 16.08452
Epoch 6/10

Epoch 00006: val_loss did not improve from 16.08452
Epoch 7/10

Epoch 00007: val_loss did not improve from 16.08452
Epoch 8/10

Epoch 00008: val_loss did not improve from 16.08452
Epoch 9/10

Epoch 00009: val_loss did not improve from 16.08452
Epoch 10/10

Epoch 00010: val_loss did not improve from 16.08452


<keras.callbacks.History at 0x1fb0f98d7f0>