# Imports

In [6]:
%reload_ext autoreload
%autoreload 2

In [7]:
import pickle
import os
from time import time
import numpy as np

from tensorflow.keras import backend as K
from tensorflow.keras.layers import Input, GlobalAveragePooling1D, MaxPooling1D, Concatenate
from tensorflow.keras.layers import Dense, Conv1D, Dropout, BatchNormalization, LeakyReLU, ELU
from tensorflow.keras import Model
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Softmax

# Data loading

In [8]:
n_tips = ['674', '489', '87']

In [9]:
pickle_base = '/workspace/deep_birth_death/deep_learning/pickles/testing_k/dataset_'
data = dict()
for i in n_tips:
    with open(pickle_base + i + "_10k.pkl", 'rb') as f:
        data[i] = pickle.load(f)

# Model definition

In [5]:
def create_nn(n_out, n_tips, clas=False, div_scenario = None):
    input_data = Input(shape=(n_tips, 1))

    final_filters = 128
    x = Conv1D(16, kernel_size=3, padding='same')(input_data)
    x = ELU()(x)
    x = Conv1D(16, kernel_size=3, padding='same')(x)
    x = ELU()(x)
    x = MaxPooling1D(pool_size=2)(x)
    
    if n_tips > 256:
        final_filters = 64
        x = Conv1D(32, kernel_size=3, padding='same')(x)
        x = ELU()(x)
        x = Conv1D(32, kernel_size=3, padding='same')(x)
        x = ELU()(x)
        x = MaxPooling1D(pool_size=2)(x)
    
        if n_tips > 512:
            final_filters = 128
            x = Conv1D(64, kernel_size=3, padding='same')(x)
            x = ELU()(x)
            x = Conv1D(64, kernel_size=3, padding='same')(x)
            x = ELU()(x)
            x = MaxPooling1D(pool_size=2)(x)

    x = Conv1D(final_filters, kernel_size=3, padding='same')(x)
    x = ELU()(x)
    x = Dropout(0.3)(x)

    x = GlobalAveragePooling1D()(x)

    x = Dense(32)(x)
    x = ELU()(x)
    x = Dropout(0.3)(x)
    
    x = Dense(n_out, name='logits')(x)
    if clas:
        
        output_class = Softmax()(x)
        
    else:
        if div_scenario != "SAT":
            out_list = []

            for i in range(n_out):
                y = Dense(32)(x)
                y = ELU()(y)
                y = Dropout(0.3)(y)

                y = Dense(1)(y)
                y = LeakyReLU(alpha=10)(y)
                out_list.append(y)

            output_class = Concatenate()(out_list)
            
        elif div_scenario == "WW":
            out_list = []

            for i in range(n_out):
                y = Dense(32)(x)
                y = ELU()(y)
                y = Dropout(0.3)(y)

                y = Dense(1)(y)
                y = Linear(y)
                out_list.append(y)

            output_class = Concatenate()(out_list)                
            
        else:
            x = Dense(32)(x)
            x = ELU()(x)
            x = Dropout(0.3)(x)
            x = Dense(1)(x)
            output_class = LeakyReLU(alpha=10)(x)
            
    return Model(input_data, output_class)

# Training

In [6]:
callback = EarlyStopping(monitor='val_loss', patience=100, restore_best_weights=True)
out_folder_path = "/workspace/deep_birth_death/deep_learning/models/testing_k_sims/"
os.makedirs(out_folder_path + 'class/', exist_ok=True)
os.makedirs(out_folder_path + 'reg/', exist_ok=True)

## Classification training

In [7]:
for i in n_tips:
    print("\nClasification training", i, 'tips')

    nn_model = create_nn(len(data[i]['y_class_train'][0]),
                         int(i), clas=True)
    nn_model.compile(loss="categorical_crossentropy",
                     optimizer=Adam(learning_rate=0.0001), metrics=['accuracy'])

    start = time()
    history = nn_model.fit(data[i]['X_train'], data[i]['y_class_train'],
                           batch_size=128, epochs=1000, validation_split=0.1,
                           callbacks=[callback], verbose=0)
    elapsed_time = time()-start
    print('Elapsed time', elapsed_time)

    save_path = out_folder_path + 'class/' + i + "_classification_temperature"

    nn_model.save(save_path + "model.keras")
    with open(save_path + "history.pkl", 'wb') as f:
            pickle.dump(history.history, f)
    with open(save_path + "model_data.pkl", 'wb') as f:
            pickle.dump([nn_model.count_params(), elapsed_time], f)


Clasification training 674 tips
Elapsed time 358.1347794532776

Clasification training 489 tips
Elapsed time 282.8572881221771

Clasification training 87 tips
Elapsed time 1600.7302286624908


## Regression training

In [8]:
for i in n_tips:
    for label in np.unique(data[i]['div_info_train']):
        div_scenario = label.split('/')[1].split('_')[0]
        for data_norm in ['_norm']:

            # Get regression values of the corresponding scenario
            X_train = data[i]['X_train'][data[i]['div_info_train'] == label]
            y_reg_train = data[i]['y_reg' + data_norm + '_train'][data[i]['div_info_train'] == label]
            y_reg_train = [np.array(elem) for elem in y_reg_train]
            
            resc_factor_train = data[i]['resc_factor_train'][data[i]['div_info_train'] == label]
            
            print("\nRegression training", i, 'tips,', div_scenario, 'model,', data_norm, 'norm')
            nn_model = create_nn(len(y_reg_train[0]),
                                 int(i), div_scenario=div_scenario)
            nn_model.compile(loss="mae", optimizer=Adam(learning_rate=0.001),
                             metrics=['mse'])

            start = time()
            history = nn_model.fit(np.expand_dims(X_train, axis=2),
                                   np.expand_dims(y_reg_train, axis=2),
                                   batch_size=128, epochs=1000, validation_split=0.1,
                                   callbacks=[callback], verbose=0)
            elapsed_time = time()-start
            print('Elapsed time', elapsed_time)

            save_path = out_folder_path + 'reg/' + div_scenario + '/'
            os.makedirs(save_path, exist_ok=True)
            save_path +=  i + "_regression" + data_norm + '_'

            nn_model.save(save_path + "model.keras")
            with open(save_path + "history.pkl", 'wb') as f:
                    pickle.dump(history.history, f)
            with open(save_path + "model_data.pkl", 'wb') as f:
                    pickle.dump([nn_model.count_params(), elapsed_time], f)
                    
            pred = nn_model.predict(np.expand_dims(data[i]['X_test'][data[i]['div_info_test'] == label], axis=2))
            y_reg_test = data[i]['y_reg' + data_norm + '_test'][data[i]['div_info_test'] == label]
            y_reg_test = [np.array(elem) for elem in y_reg_test]
            resc_factor_test = data[i]['resc_factor_test'][data[i]['div_info_test'] == label]

            real = y_reg_test
            error = (pred-real)**2
            
            print('Errors')
            for j in range(len(pred[0])):
                print(np.mean(error[:,j]))
            print('Total error:', np.mean(error))
            
            pred/=resc_factor_test[:, np.newaxis]
            real = y_reg_test
            real/=resc_factor_test[:, np.newaxis]
            error = (pred-real)**2
            print('Denorm Errors')
            for j in range(len(pred[0])):
                print(np.mean(error[:,j]))
            print('Total error:', np.mean(error))


Regression training 674 tips, BD model, _norm norm
Elapsed time 66.30570125579834
Errors
0.001032213610016864
0.007684610870764718
Total error: 0.0043584122403907905
Denorm Errors
0.027478449686707943
0.20180106036907244
Total error: 0.1146397550278902

Regression training 674 tips, HE model, _norm norm
Elapsed time 107.69273853302002
Errors
0.00041024855124178485
0.0004688418829981688
Total error: 0.0004395452171199768
Denorm Errors
0.0849473976509293
0.09522789095646356
Total error: 0.09008764430369641

Regression training 674 tips, ME model, _norm norm
Elapsed time 98.31474280357361
Errors
0.0034132949877734363
0.009675997254262565
129.50510261214487
0.0034088083949885667
Total error: 32.38040017819547
Denorm Errors
0.0037703681893708142
0.013530973562615052
61.597438524892446
0.0039956478203202975
Total error: 15.404683878616186

Regression training 674 tips, SAT model, _norm norm
Elapsed time 123.03559184074402
Errors
nan
Total error: nan
Denorm Errors
nan
Total error: nan

Regre