In [1]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.callbacks import ModelCheckpoint

In [2]:
import os
import sys
module_path = os.path.abspath(os.path.join('../learning'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [19]:
from os import makedirs, path

In [11]:
from constants import *
from preprocessing import *

In [4]:
os.chdir(r'/storage/users/g-and-n/plates')

In [5]:
csvs = ['24792.csv','25912.csv','24509.csv','24633.csv','25987.csv','25680.csv','25422.csv','24517.csv','25664.csv','25575.csv','26674.csv','25945.csv','24687.csv','24752.csv','24311.csv','26622.csv','26641.csv','24594.csv','25676.csv','24774.csv','26562.csv','25997.csv','26640.csv','24562.csv','25938.csv','25708.csv','24321.csv','24735.csv','26786.csv','25571.csv','26666.csv','24294.csv','24640.csv','25985.csv','24661.csv']

In [6]:
channel = 'AGP'

In [7]:
test_plate = '24509.csv'

In [8]:
df_test_mock_x, df_test_mock_y, df_test_treated_x, df_test_treated_y, df_train_x, df_train_y = \
                split_train_test('csvs/', csvs, test_plate, channel, inter_channel=False)

100%|██████████| 35/35 [18:23<00:00, 33.22s/it]


In [9]:
scale_method = 'Std'

In [12]:
x_scaler = fit_scaler(df_train_x, scale_method)
y_scaler = fit_scaler(df_train_y, scale_method)

df_train_x_scaled = scale_data(df_train_x, x_scaler)
df_train_y_scaled = scale_data(df_train_y, y_scaler)

df_test_treated_x_scaled = scale_data(df_test_treated_x, x_scaler)
df_test_treated_y_scaled = scale_data(df_test_treated_y, y_scaler)
df_test_mock_x_scaled = scale_data(df_test_mock_x, x_scaler)
df_test_mock_y_scaled = scale_data(df_test_mock_y, y_scaler)

In [13]:
def create_model_dnn(task_channel, df_train_X, df_train_Y, test_plate):
    """
    In this cell we are creating and training a multi layer perceptron (we refer to it as deep neural network, DNN) model

    task_channel: the current channel that we aim to predict
    df_train_X: contains all available features excluding the features related to 'task_channel' we aim to predict (train)
    df_train_Y: contains all available features related to 'task_channel' only for the train
    test_plate: the ID of a given plate. This information assist us while printing the results.

    return: trained dnn model
    """
    folder = 'dnn_models'
    makedirs(folder, exist_ok=True)

    # Stracture of the network#
    inputs = Input(shape=(df_train_X.shape[1],))
    dense1 = Dense(512, activation='relu')(inputs)
    dense2 = Dense(256, activation='relu')(dense1)
    dense3 = Dense(128, activation='relu')(dense2)
    dense4 = Dense(100, activation='relu')(dense3)
    dense5 = Dense(50, activation='relu')(dense4)
    dense6 = Dense(25, activation='relu')(dense5)
    dense7 = Dense(10, activation='relu')(dense6)
    predictions = Dense(df_train_Y.shape[1], activation='sigmoid')(dense7)

    # model compilation
    model = Model(inputs=inputs, outputs=predictions)
    model.compile(optimizer='adam', loss='mse')

    # model training
    test_plate_number = test_plate[:5]
    filepath = path.join(folder, f'{test_plate_number}_{task_channel}.h5')
    my_callbacks = [
        ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=False, save_weights_only=False,
                        mode='auto', period=1)]
    model.fit(df_train_X, df_train_Y, epochs=5, batch_size=1024 * 8, verbose=1, shuffle=True, validation_split=0.2,
              callbacks=my_callbacks)
    return model

In [21]:
def create_model_dnn(task_channel, df_train_X, df_train_Y, test_plate, inter_channel=True):
    """
    In this cell we are creating and training a multi layer perceptron (we refer to it as deep neural network, DNN) model

    task_channel: the current channel that we aim to predict
    df_train_X: contains all available features excluding the features related to 'task_channel' we aim to predict (train)
    df_train_Y: contains all available features related to 'task_channel' only for the train
    test_plate: the ID of a given plate. This information assist us while printing the results.

    return: trained dnn model
    """
    folder = 'dnn_models'
    makedirs(folder, exist_ok=True)

    if inter_channel:
        # Stracture of the network#
        inputs = Input(shape=(df_train_X.shape[1],))
        dense1 = Dense(512, activation='relu')(inputs)
        dense2 = Dense(256, activation='relu')(dense1)
        dense3 = Dense(128, activation='relu')(dense2)
        dense4 = Dense(100, activation='relu')(dense3)
        dense5 = Dense(50, activation='relu')(dense4)
        dense6 = Dense(25, activation='relu')(dense5)
        dense7 = Dense(10, activation='relu')(dense6)
        predictions = Dense(df_train_Y.shape[1], activation='sigmoid')(dense7)
        
    else:
        inputs = Input(shape=(df_train_X.shape[1],))
        dense1 = Dense(64, activation='relu')(inputs)
        dense2 = Dense(32, activation='relu')(dense1)
        dense3 = Dense(16, activation='relu')(dense2)
        dense4 = Dense(8, activation='relu')(dense3)
        dense5 = Dense(16, activation='relu')(dense4)
        dense6 = Dense(32, activation='relu')(dense5)
        dense7 = Dense(64, activation='relu')(dense6)
        predictions = Dense(df_train_Y.shape[1], activation='linear')(dense7)

    # model compilation
    model = Model(inputs=inputs, outputs=predictions)
    model.compile(optimizer='adam', loss='mse')

    # model training
    test_plate_number = test_plate[:5]
    inter_str = '' if inter_channel else '1to1'
    filepath = path.join(folder, f'{test_plate_number}_{task_channel}{inter_str}.h5')
    my_callbacks = [
        ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=False, save_weights_only=False,
                        mode='auto', period=1)]
    model.fit(df_train_X, df_train_Y, epochs=10, batch_size=1024 * 8, verbose=1, shuffle=True, validation_split=0.2,
              callbacks=my_callbacks)
    return model

In [22]:
df_train_y_scaled.shape, df_train_x_scaled.shape

((1308762, 86), (1308762, 86))

In [24]:
model =create_model_dnn(channel, df_train_x_scaled, df_train_y_scaled, test_plate, inter_channel=False)

Epoch 1/10

Epoch 00001: saving model to dnn_models/24509_AGP1to1.h5
Epoch 2/10

Epoch 00002: saving model to dnn_models/24509_AGP1to1.h5
Epoch 3/10

Epoch 00003: saving model to dnn_models/24509_AGP1to1.h5
Epoch 4/10

Epoch 00004: saving model to dnn_models/24509_AGP1to1.h5
Epoch 5/10

Epoch 00005: saving model to dnn_models/24509_AGP1to1.h5
Epoch 6/10

Epoch 00006: saving model to dnn_models/24509_AGP1to1.h5
Epoch 7/10

Epoch 00007: saving model to dnn_models/24509_AGP1to1.h5
Epoch 8/10

Epoch 00008: saving model to dnn_models/24509_AGP1to1.h5
Epoch 9/10

Epoch 00009: saving model to dnn_models/24509_AGP1to1.h5
Epoch 10/10

Epoch 00010: saving model to dnn_models/24509_AGP1to1.h5


In [25]:
model.summary()

Model: "model_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         [(None, 86)]              0         
_________________________________________________________________
dense_24 (Dense)             (None, 64)                5568      
_________________________________________________________________
dense_25 (Dense)             (None, 32)                2080      
_________________________________________________________________
dense_26 (Dense)             (None, 16)                528       
_________________________________________________________________
dense_27 (Dense)             (None, 8)                 136       
_________________________________________________________________
dense_28 (Dense)             (None, 16)                144       
_________________________________________________________________
dense_29 (Dense)             (None, 32)                544 