In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import display # Allows the use of display() for DataFrames
import seaborn as sb
%matplotlib inline

import keras
from keras import optimizers
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential, Model
from keras.layers import Input, Conv2D, MaxPooling2D, Activation, Dropout, Flatten, Dense, Add, Concatenate
from keras.layers.normalization import BatchNormalization
from keras.callbacks import ModelCheckpoint
import timeit

Using TensorFlow backend.


In [2]:
## loading from dataframe https://medium.com/@vijayabhaskar96/tutorial-on-keras-flow-from-dataframe-1fd4493d237c

def generate_dataframe_from_csv(path):
    data = pd.read_csv(path)
    columns = (data.apply(lambda r: pd.Series(gen_image_paths(r)), axis=1)
        .stack()
        .rename("img_path")
        .reset_index(level=1, drop=True))
    data["sirna"] = data["sirna"].apply(lambda s: str(s))
    return data.join(columns).reset_index(drop=True)

def gen_image_paths(row):
    path_root = f"train/{row['experiment']}/Plate{row['plate']}/{row['well']}"
    return [f"{path_root}_s{site}_w{image}.png" for site in range(1, 3) for image in range(1,7)]


def generate_dataframe_from_csv2(path):
    data = pd.read_csv(path)
    columns = (data.apply(lambda r: pd.Series(gen_image_paths2(r)), axis=1)
        .stack()
        .rename("img_path")
        .reset_index(level=1, drop=True))
    data["sirna"] = data["sirna"].apply(lambda s: str(s))
    return data.join(columns).reset_index(drop=True)

def gen_image_paths2(row):
    path_root = f"train/{row['experiment']}/Plate{row['plate']}/{row['well']}"
    return [f"{path_root}_s{site}" for site in range(1, 3)] 

def generate_dataframe_from_csv3(path):
    data = pd.read_csv(path)
    columns = (data.apply(lambda r: pd.Series(gen_image_paths2(r)), axis=1)
        .stack()
        .rename("img_path")
        .reset_index(level=1, drop=True))
    data["sirna"] = data["sirna"].apply(lambda s: str(s))
    data = data.join(columns).reset_index(drop=True)
    
    for i in range(1,7):
        data[f"img_path_{i}"] = data.apply(lambda row: f"{row['img_path']}_w{i}.png", axis=1)
    return data
# def append_file(row):
#     path_root = row["img_path"]
#     return f"{path_root}_w{i}"
    
def gen_image_paths3(row):
    path_root = f"train/{row['experiment']}/Plate{row['plate']}/{row['well']}"
    return [f"{path_root}_s{site}" for site in range(1, 3)] 
 

In [3]:
def get_model_inputs(df):
    trainY = df["sirna"]
    im_paths = df["img_path"].apply(lambda r: [f"{r}_w{image}.png" for image in range(1,7)])
#     images = df["img_path"].apply(lambda r: np.array([image for image in range(1,7)]))
    splits = np.hsplit(np.stack(np.array(im_paths)), 6)
    
    images = [np.hstack(s) for s in splits]
    
    return (images, trainY) 

In [4]:
# from main import generate_dataframe_from_csv

df = generate_dataframe_from_csv3("train.csv")
# display(df)

In [5]:
(images, trainY) = get_model_inputs(df)

In [6]:
# https://github.com/keras-team/keras/issues/8130#issuecomment-336855177
# https://github.com/keras-team/keras/issues/3386
# def create_multi_generator(df):
#     train_generator = ImageDataGenerator(
#         rescale=1./255,
#         validation_split=0.25)
#     gens = []
#     valid_gens = []
#     for i in range(1,7):
#         gens.append(train_datagen.flow_from_dataframe(
#             df,
#             directory="./",
#             x_col=f"img_path_{i}",
#             y_col="sirna",
#             target_size=(150, 150),
#             batch_size=32,
#             subset="training",
#             class_mode='categorical'))
        
# #         valid_gens.append(train_datagen.flow_from_dataframe(
# #             df,
# #             directory="./",
# #             x_col=f"img_path_{i}",
# #             y_col="sirna",
# #             target_size=(150, 150),
# #             batch_size=32,
# #             subset="validation",
# #             class_mode='categorical'))
    
#     while True:
#         next_set = [gen.next() for gen in gens]
#         yield [x[0] for x in next_set], next_set[0][1]

def create_multi_generator(df, train_datagen, subset):
    gens = []
    for i in range(1,7):
        gens.append(train_datagen.flow_from_dataframe(
            df,
            directory="./",
            x_col=f"img_path_{i}",
            y_col="sirna",
            target_size=(224, 224),
            batch_size=32,
            subset=subset,
            class_mode='categorical'))
           
    while True:
        next_set = [gen.next() for gen in gens]
        yield [x[0] for x in next_set], next_set[0][1]
    

In [7]:
train_datagen = ImageDataGenerator(
        rescale=1./255,
#         shear_range=0.2,
#         zoom_range=0.2,
#         horizontal_flip=True,
        validation_split=0.25)

train_generator = create_multi_generator(df, train_datagen, "training") 
valid_generator = create_multi_generator(df, train_datagen, "validation") 
# train_generator = train_datagen.flow_from_dataframe(
#         df,
#         directory="./",
#         x_col="img_path",
#         y_col="sirna",
#         target_size=(224, 224),
#         batch_size=32,
#         subset="training",
#         class_mode='categorical')

# valid_generator = train_datagen.flow_from_dataframe(
#         df,
#         directory="./",
#         x_col="img_path",
#         y_col="sirna",
#         target_size=(224, 224),
#         batch_size=32,
#         subset="validation",
#         class_mode='categorical')

In [8]:
# https://www.pyimagesearch.com/2019/02/04/keras-multiple-inputs-and-mixed-data/
# from keras.backend import concatenate

# def build_cnn_layer(i, shape=(150,150,3,)):
#     name = f"inputlayer_{i}"
#     inputlayer = Input(shape=shape, name=name)
#     x = Conv2D(filters=32, kernel_size=3, padding='same')(inputlayer)
#     x = BatchNormalization(name=f"bn_cnn_{i}")(x)
#     x = Activation("relu")(x)
#     x = MaxPooling2D(pool_size=4)(x)
#     x = Conv2D(filters=64, kernel_size=3, padding='same')(x)
#     model = Model(inputs=inputlayer, outputs=x)
#     return model

def build_cnn_layer(i, shape=(224,224,3,)):
    name = f"inputlayer_{i}"
    inputlayer = Input(shape=shape, name=name)
    x = Conv2D(filters=64, kernel_size=3, padding='same')(inputlayer)
#     x = Conv2D(filters=64, kernel_size=3, padding='same')(inputlayer)
    x = Activation("relu")(x)
    x = BatchNormalization(name=f"bn_cnn_1_{i}")(x)
    x = MaxPooling2D(pool_size=2)(x)
    
    x = Conv2D(filters=128, kernel_size=3, padding='same')(x)
#     x = Conv2D(filters=128, kernel_size=3, padding='same')(x)
    x = Activation("relu")(x)
    x = BatchNormalization(name=f"bn_cnn_2_{i}")(x)
    x = MaxPooling2D(pool_size=2)(x)
    
    x = Conv2D(filters=256, kernel_size=3, padding='same')(x)
    x = Activation("relu")(x)
    x = BatchNormalization(name=f"bn_cnn_3_{i}")(x)
    x = MaxPooling2D(pool_size=2)(x)
    
    x = Conv2D(filters=512, kernel_size=3, padding='same')(x)
    x = Activation("relu")(x)
    x = BatchNormalization(name=f"bn_cnn_4_{i}")(x)
    x = MaxPooling2D(pool_size=2)(x)
    
    x = Conv2D(filters=1024, kernel_size=3, padding='same')(x)
    x = Activation("relu")(x)
    x = BatchNormalization(name=f"bn_cnn_5_{i}")(x)
    x = MaxPooling2D(pool_size=2)(x)

#     x = Conv2D(filters=256, kernel_size=3, padding='same')(x)
    x = Flatten(name=f"flattener_{i}")(x)
#     x = Dense(4000, activation="relu")(x)
#     x = BatchNormalization(name=f"bn_cnn_4_{i}")(x)
#     x = Dense(2000, activation="relu")(x)
#     x = BatchNormalization(name=f"bn_cnn_5_{i}")(x)
    x = Dense(1108, activation="softmax")(x)
    model = Model(inputs=inputlayer, outputs=x)
    return model

def build_sequential_layer(previous_layers):
    combined = Concatenate()([x.output for x in previous_layers])
    combined = BatchNormalization(name="batch_norm_1")(combined)
    combined = Activation("relu", name="act_layer")(combined)
#     combined = Flatten(name="flattener")(combined)
    z = Dense(2000, activation="softmax")(combined)
    z = Dense(1108, activation="softmax")(combined)
    return z
#     combined = Add()(previous_layers)
#     combined = BatchNormalization()(combined)
#     combined = Activation("relu")(combined)
#     combined = Flatten()(combined)
#     combined = Dense(1108, activation="softmax")(combined)
#     return combined

def build_model():
    cnn_layers = []
    for i in range(0,6):
        layer = build_cnn_layer(i)
        cnn_layers.append(layer)

    output_layer = build_sequential_layer(cnn_layers)
    model = Model(inputs=[x.input for x in cnn_layers], outputs=output_layer)
    optimizer = optimizers.Adam()    
    model.compile(optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [9]:
model = build_model()
model.summary()

Instructions for updating:
Colocations handled automatically by placer.
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
inputlayer_0 (InputLayer)       (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
inputlayer_1 (InputLayer)       (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
inputlayer_2 (InputLayer)       (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
inputlayer_3 (InputLayer)       (None, 224, 224, 3)  0                                            
_____________________________________

In [10]:
epochs = 10

checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.from_scratch.hdf5', 
                               verbose=1, save_best_only=True)
model.fit_generator(train_generator, 
                    steps_per_epoch=15,
                    validation_data=valid_generator,
                    validation_steps=15,                    
                    epochs=epochs, callbacks=[checkpointer], verbose=1)


# model.fit(images, trainY,
# #                     steps_per_epoch=15,
# #                     validation_data=valid_generator,
# #                     validation_steps=15,                    
#                     epochs=epochs, callbacks=[checkpointer], verbose=1)

Instructions for updating:
Use tf.cast instead.
Epoch 1/10
Found 18257 images belonging to 1108 classes.
Found 54773 images belonging to 1108 classes.
Found 18257 images belonging to 1108 classes.
Found 54773 images belonging to 1108 classes.
Found 18257 images belonging to 1108 classes.
Found 54773 images belonging to 1108 classes.
Found 18257 images belonging to 1108 classes.
Found 54773 images belonging to 1108 classes.
Found 18257 images belonging to 1108 classes.
Found 54773 images belonging to 1108 classes.
Found 54773 images belonging to 1108 classes.
Found 18257 images belonging to 1108 classes.

Epoch 00001: val_loss improved from inf to 7.01838, saving model to saved_models/weights.best.from_scratch.hdf5


KeyboardInterrupt: 

In [10]:
## Simple model
# model = Sequential([
#     Conv2D(filters=32, kernel_size=2, padding='same', activation='relu', input_shape=(150, 150, 3)),
#     MaxPooling2D(pool_size=2),
#     Conv2D(filters=64, kernel_size=2, padding='same', activation='relu'),
#     MaxPooling2D(pool_size=2),
#     Conv2D(filters=128, kernel_size=2, padding='same', activation='relu'),
#     MaxPooling2D(pool_size=2),
#     Flatten(),
#     Dense(300, activation="relu"),
#     Dropout(.4),
#     Dense(1108, activation="softmax")    
# ])
## Model from dog project
# model = Sequential([
#     BatchNormalization(input_shape=(224, 224, 3)),
#     Conv2D(filters=32, kernel_size=3, padding='same'),
#     BatchNormalization(),
#     Activation("relu"),
#     MaxPooling2D(pool_size=4),
#     Conv2D(filters=64, kernel_size=3, padding='same'),
#     BatchNormalization(),
#     Activation("relu"),
#     Flatten(),
#     Dense(1108, activation="softmax") 
# ])
# model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
batch_normalization_4 (Batch (None, 224, 224, 3)       12        
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 224, 224, 32)      896       
_________________________________________________________________
batch_normalization_5 (Batch (None, 224, 224, 32)      128       
_________________________________________________________________
activation_3 (Activation)    (None, 224, 224, 32)      0         
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 56, 56, 32)        0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 56, 56, 64)        18496     
_________________________________________________________________
batch_normalization_6 (Batch (None, 56, 56, 64)        256       
__________

In [11]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
STEP_SIZE_VALID=valid_generator.n//valid_generator.batch_size

In [12]:
print(STEP_SIZE_TRAIN, STEP_SIZE_VALID)

10269 3423


In [60]:
epochs = 10

checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.from_scratch.hdf5', 
                               verbose=1, save_best_only=True)

model.fit_generator(train_generator, 
                    steps_per_epoch=15,
                    validation_data=valid_generator,
                    validation_steps=15,                    
                    epochs=epochs, callbacks=[checkpointer], verbose=1)

NameError: name 'train_generator' is not defined