In [1]:
import pandas as pd 
import numpy as np 

df_train = pd.read_csv('clean_data/train.csv')
df_test = pd.read_csv('clean_data/test.csv')

target_cols = ['EC1', 'EC2']
num_cols = ['BertzCT', 'Chi1', 'Chi1n', 'Chi1v', 'Chi2n', 'Chi2v', 'Chi3v',
            'Chi4n', 'EState_VSA1', 'EState_VSA2', 'ExactMolWt', 'FpDensityMorgan1',
            'FpDensityMorgan2', 'FpDensityMorgan3', 'HallKierAlpha',
            'HeavyAtomMolWt', 'Kappa3', 'MaxAbsEStateIndex', 'MinEStateIndex',
            'NumHeteroatoms', 'PEOE_VSA10', 'PEOE_VSA14', 'PEOE_VSA6', 'PEOE_VSA7',
            'PEOE_VSA8', 'SMR_VSA10', 'SMR_VSA5', 'SlogP_VSA3', 'VSA_EState9',
            'fr_COO', 'fr_COO2']
binary_cols = ['EC3', 'EC4', 'EC5', 'EC6']

x_train = df_train[num_cols].to_numpy()
y_train = df_train[target_cols].to_numpy()

x_test = df_test[num_cols].to_numpy()

In [2]:
from sklearn.model_selection import train_test_split

x_train, x_cv, y_train, y_cv = train_test_split(x_train, y_train, test_size=0.2, random_state=42)

In [3]:
import tensorflow as tf
from sklearn.metrics import roc_auc_score
import optuna

def objective_ec1(trial):
    # Define the hyperparameters to be tuned
    num_layers = trial.suggest_int('num_layers', 1, 3)
    num_units = trial.suggest_int('num_units', 16, 128)
    regularisation = trial.suggest_float('regularisation', 0.0, 0.1)
    epochs = trial.suggest_int('epochs', 5, 20)
    batch_size = trial.suggest_categorical('batch_size', [16, 32, 64])

    # Define the model architecture
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Dense(num_units, activation='relu', input_shape=(x_train.shape[1],)))
    for _ in range(num_layers):
        model.add(tf.keras.layers.Dense(num_units, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(regularisation)))
    model.add(tf.keras.layers.Dense(1, activation='sigmoid'))

    # Compile the model with AUC-ROC as the metric
    model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=[tf.keras.metrics.AUC(curve='ROC')])

    # Train the model
    model.fit(x_train, y_train[:, 0], epochs=epochs, batch_size=batch_size, validation_data=(x_cv, y_cv[:, 0]), verbose=0)

    # Calculate the AUC-ROC score on the validation set
    y_pred = model.predict(x_cv)
    auc_roc = roc_auc_score(y_cv[:, 0], y_pred)

    return auc_roc

def objective_ec2(trial):
    # Define the hyperparameters to be tuned
    num_layers = trial.suggest_int('num_layers', 1, 3)
    num_units = trial.suggest_int('num_units', 16, 128)
    regularisation = trial.suggest_float('regularisation', 0.0, 0.1)
    epochs = trial.suggest_int('epochs', 5, 20)
    batch_size = trial.suggest_categorical('batch_size', [16, 32, 64])

    # Define the model architecture
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Dense(num_units, activation='relu', input_shape=(x_train.shape[1],)))
    for _ in range(num_layers):
        model.add(tf.keras.layers.Dense(num_units, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(regularisation)))
    model.add(tf.keras.layers.Dense(1, activation='sigmoid'))

    # Compile the model with AUC-ROC as the metric
    model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=[tf.keras.metrics.AUC(curve='ROC')])

    # Train the model
    model.fit(x_train, y_train[:, 1], epochs=epochs, batch_size=batch_size, validation_data=(x_cv, y_cv[:, 1]), verbose=0)

    # Calculate the AUC-ROC score on the validation set
    y_pred = model.predict(x_cv)
    auc_roc = roc_auc_score(y_cv[:, 1], y_pred)

    return auc_roc


In [4]:
# Create Optuna study for model_ec1
study_ec1 = optuna.create_study(direction='maximize')
study_ec1.optimize(objective_ec1, n_trials=50)

# Get the best hyperparameters and best AUC-ROC score for model_ec1
best_params_ec1 = study_ec1.best_params
best_auc_roc_ec1 = study_ec1.best_value

# Define the best model architecture for model_ec1
best_model_ec1 = tf.keras.Sequential()
best_model_ec1.add(tf.keras.layers.Dense(best_params_ec1['num_units'], activation='relu', input_shape=(x_train.shape[1],)))
for _ in range(best_params_ec1['num_layers']):
    best_model_ec1.add(tf.keras.layers.Dense(best_params_ec1['num_units'], activation='relu', kernel_regularizer=tf.keras.regularizers.l2(best_params_ec1['regularisation'])))
best_model_ec1.add(tf.keras.layers.Dense(1, activation='sigmoid'))

# Compile the best model for model_ec1
best_model_ec1.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=[tf.keras.metrics.AUC(curve='ROC')])

# Train the best model for model_ec1
best_model_ec1.fit(x_train, y_train[:, 0], epochs=best_params_ec1['epochs'], batch_size=best_params_ec1['batch_size'], validation_data=(x_cv, y_cv[:, 0]), verbose=0)

# Get the predicted probabilities on the test set for model_ec1
cv_y_pred_prob_1 = best_model_ec1.predict(x_cv)
train_y_pred_prob_1 = best_model_ec1.predict(x_train)

# Calculate AUC-ROC score on the test set for model_ec1
train_auc_roc_1 = roc_auc_score(y_train[:, 0], train_y_pred_prob_1)
cv_auc_roc_1 = roc_auc_score(y_cv[:, 0], cv_y_pred_prob_1)


# Create Optuna study for model_ec2
study_ec2 = optuna.create_study(direction='maximize')
study_ec2.optimize(objective_ec2, n_trials=50)

# Get the best hyperparameters and best AUC-ROC score for model_ec2
best_params_ec2 = study_ec2.best_params
best_auc_roc_ec2 = study_ec2.best_value

# Define the best model architecture for model_ec2
best_model_ec2 = tf.keras.Sequential()
best_model_ec2.add(tf.keras.layers.Dense(best_params_ec2['num_units'], activation='relu', input_shape=(x_train.shape[1],)))
for _ in range(best_params_ec2['num_layers']):
    best_model_ec2.add(tf.keras.layers.Dense(best_params_ec2['num_units'], activation='relu', kernel_regularizer=tf.keras.regularizers.l2(best_params_ec2['regularisation'])))
best_model_ec2.add(tf.keras.layers.Dense(1, activation='sigmoid'))

# Compile the best model for model_ec2
best_model_ec2.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=[tf.keras.metrics.AUC(curve='ROC')])

# Train the best model for model_ec2
best_model_ec2.fit(x_train, y_train[:, 1], epochs=best_params_ec2['epochs'], batch_size=best_params_ec2['batch_size'], validation_data=(x_cv, y_cv[:, 1]), verbose=0)

# Get the predicted probabilities on the test set for model_ec2
cv_y_pred_prob_2 = best_model_ec2.predict(x_cv)
train_y_pred_prob_2 = best_model_ec2.predict(x_train)

# Calculate AUC-ROC score on the test set for model_ec2
train_auc_roc_2 = roc_auc_score(y_train[:, 1], train_y_pred_prob_2)
cv_auc_roc_2 = roc_auc_score(y_cv[:, 1], cv_y_pred_prob_2)


print('\n', '='*150, '\n')

print("Train AUC-ROC score 1:", train_auc_roc_1)
print("Train AUC-ROC score 2:", train_auc_roc_2)

print("CV AUC-ROC score 1:", cv_auc_roc_1)
print("CV AUC-ROC score 2:", cv_auc_roc_2)

print("\nAvg AUC-ROC score: ", (cv_auc_roc_1 + cv_auc_roc_2)/2)


[I 2023-07-10 14:27:06,874] A new study created in memory with name: no-name-5973ce17-f41b-4eab-a73d-7a681bf62197




[I 2023-07-10 14:27:15,493] Trial 0 finished with value: 0.705157721048127 and parameters: {'num_layers': 2, 'num_units': 70, 'regularisation': 0.08792828892304248, 'epochs': 8, 'batch_size': 32}. Best is trial 0 with value: 0.705157721048127.




[I 2023-07-10 14:27:33,873] Trial 1 finished with value: 0.6999756196918823 and parameters: {'num_layers': 1, 'num_units': 19, 'regularisation': 0.026257927246995463, 'epochs': 16, 'batch_size': 16}. Best is trial 0 with value: 0.705157721048127.




[I 2023-07-10 14:27:39,128] Trial 2 finished with value: 0.6891670164263612 and parameters: {'num_layers': 1, 'num_units': 121, 'regularisation': 0.026583004922898557, 'epochs': 7, 'batch_size': 32}. Best is trial 0 with value: 0.705157721048127.




[I 2023-07-10 14:27:48,079] Trial 3 finished with value: 0.7078135801402332 and parameters: {'num_layers': 3, 'num_units': 128, 'regularisation': 0.06563922762943102, 'epochs': 11, 'batch_size': 32}. Best is trial 3 with value: 0.7078135801402332.




[I 2023-07-10 14:27:54,985] Trial 4 finished with value: 0.707047452103496 and parameters: {'num_layers': 2, 'num_units': 124, 'regularisation': 0.06855503803204444, 'epochs': 9, 'batch_size': 32}. Best is trial 3 with value: 0.7078135801402332.




[I 2023-07-10 14:28:08,915] Trial 5 finished with value: 0.7020291720159326 and parameters: {'num_layers': 1, 'num_units': 100, 'regularisation': 0.09557302902433483, 'epochs': 12, 'batch_size': 16}. Best is trial 3 with value: 0.7078135801402332.




[I 2023-07-10 14:28:19,641] Trial 6 finished with value: 0.7081561388998616 and parameters: {'num_layers': 2, 'num_units': 37, 'regularisation': 0.09165296557728086, 'epochs': 17, 'batch_size': 32}. Best is trial 6 with value: 0.7081561388998616.




[I 2023-07-10 14:28:32,523] Trial 7 finished with value: 0.6882416962933702 and parameters: {'num_layers': 3, 'num_units': 53, 'regularisation': 0.00024135024390965488, 'epochs': 19, 'batch_size': 32}. Best is trial 6 with value: 0.7081561388998616.




[I 2023-07-10 14:28:37,590] Trial 8 finished with value: 0.5 and parameters: {'num_layers': 3, 'num_units': 49, 'regularisation': 0.09478991023648008, 'epochs': 6, 'batch_size': 32}. Best is trial 6 with value: 0.7081561388998616.




[I 2023-07-10 14:28:41,874] Trial 9 finished with value: 0.7048303356870103 and parameters: {'num_layers': 3, 'num_units': 85, 'regularisation': 0.05226854364731538, 'epochs': 8, 'batch_size': 64}. Best is trial 6 with value: 0.7081561388998616.




[I 2023-07-10 14:28:47,795] Trial 10 finished with value: 0.7064963234083876 and parameters: {'num_layers': 2, 'num_units': 16, 'regularisation': 0.07736816580577575, 'epochs': 15, 'batch_size': 64}. Best is trial 6 with value: 0.7081561388998616.




[I 2023-07-10 14:28:55,654] Trial 11 finished with value: 0.7008878238363289 and parameters: {'num_layers': 2, 'num_units': 42, 'regularisation': 0.07065112374611728, 'epochs': 12, 'batch_size': 32}. Best is trial 6 with value: 0.7081561388998616.




[I 2023-07-10 14:29:09,564] Trial 12 finished with value: 0.5 and parameters: {'num_layers': 3, 'num_units': 101, 'regularisation': 0.08083871539979537, 'epochs': 20, 'batch_size': 32}. Best is trial 6 with value: 0.7081561388998616.




[I 2023-07-10 14:29:15,316] Trial 13 finished with value: 0.702903314075976 and parameters: {'num_layers': 2, 'num_units': 35, 'regularisation': 0.09997651588842807, 'epochs': 15, 'batch_size': 64}. Best is trial 6 with value: 0.7081561388998616.




[I 2023-07-10 14:29:26,789] Trial 14 finished with value: 0.7015446519685299 and parameters: {'num_layers': 3, 'num_units': 65, 'regularisation': 0.06126100286031808, 'epochs': 17, 'batch_size': 32}. Best is trial 6 with value: 0.7081561388998616.




[I 2023-07-10 14:29:39,005] Trial 15 finished with value: 0.7075283716834552 and parameters: {'num_layers': 2, 'num_units': 88, 'regularisation': 0.0827978861316466, 'epochs': 10, 'batch_size': 16}. Best is trial 6 with value: 0.7081561388998616.




[I 2023-07-10 14:29:48,146] Trial 16 finished with value: 0.7026932010830205 and parameters: {'num_layers': 3, 'num_units': 33, 'regularisation': 0.0640564575295418, 'epochs': 13, 'batch_size': 32}. Best is trial 6 with value: 0.7081561388998616.




[I 2023-07-10 14:29:59,439] Trial 17 finished with value: 0.7077768039370598 and parameters: {'num_layers': 1, 'num_units': 109, 'regularisation': 0.08681688561474564, 'epochs': 18, 'batch_size': 32}. Best is trial 6 with value: 0.7081561388998616.




[I 2023-07-10 14:30:16,208] Trial 18 finished with value: 0.7030406461913226 and parameters: {'num_layers': 2, 'num_units': 62, 'regularisation': 0.05193299349614561, 'epochs': 14, 'batch_size': 16}. Best is trial 6 with value: 0.7081561388998616.




[I 2023-07-10 14:30:21,597] Trial 19 finished with value: 0.7062280885838436 and parameters: {'num_layers': 3, 'num_units': 87, 'regularisation': 0.07474545626879907, 'epochs': 11, 'batch_size': 64}. Best is trial 6 with value: 0.7081561388998616.




[I 2023-07-10 14:30:25,854] Trial 20 finished with value: 0.70299872646323 and parameters: {'num_layers': 2, 'num_units': 77, 'regularisation': 0.0995460215454539, 'epochs': 5, 'batch_size': 32}. Best is trial 6 with value: 0.7081561388998616.




[I 2023-07-10 14:30:37,661] Trial 21 finished with value: 0.7049864416683127 and parameters: {'num_layers': 1, 'num_units': 113, 'regularisation': 0.08722978316489689, 'epochs': 18, 'batch_size': 32}. Best is trial 6 with value: 0.7081561388998616.




[I 2023-07-10 14:30:49,567] Trial 22 finished with value: 0.6907350199980249 and parameters: {'num_layers': 1, 'num_units': 110, 'regularisation': 0.08818811960709641, 'epochs': 18, 'batch_size': 32}. Best is trial 6 with value: 0.7081561388998616.




[I 2023-07-10 14:31:02,353] Trial 23 finished with value: 0.6982340221541905 and parameters: {'num_layers': 1, 'num_units': 128, 'regularisation': 0.07777598286951115, 'epochs': 20, 'batch_size': 32}. Best is trial 6 with value: 0.7081561388998616.




[I 2023-07-10 14:31:13,404] Trial 24 finished with value: 0.6879760332312858 and parameters: {'num_layers': 1, 'num_units': 115, 'regularisation': 0.0899030345022471, 'epochs': 17, 'batch_size': 32}. Best is trial 6 with value: 0.7081561388998616.




[I 2023-07-10 14:31:23,437] Trial 25 finished with value: 0.7048578535453289 and parameters: {'num_layers': 2, 'num_units': 101, 'regularisation': 0.0823731808569969, 'epochs': 14, 'batch_size': 32}. Best is trial 6 with value: 0.7081561388998616.




[I 2023-07-10 14:31:33,737] Trial 26 finished with value: 0.7035619424418988 and parameters: {'num_layers': 1, 'num_units': 107, 'regularisation': 0.07178638130227866, 'epochs': 16, 'batch_size': 32}. Best is trial 6 with value: 0.7081561388998616.




[I 2023-07-10 14:31:41,072] Trial 27 finished with value: 0.7031051974290605 and parameters: {'num_layers': 2, 'num_units': 27, 'regularisation': 0.06340781558879875, 'epochs': 10, 'batch_size': 32}. Best is trial 6 with value: 0.7081561388998616.




[I 2023-07-10 14:31:50,487] Trial 28 finished with value: 0.5 and parameters: {'num_layers': 3, 'num_units': 120, 'regularisation': 0.08862843126257722, 'epochs': 19, 'batch_size': 64}. Best is trial 6 with value: 0.7081561388998616.




[I 2023-07-10 14:32:06,996] Trial 29 finished with value: 0.5 and parameters: {'num_layers': 3, 'num_units': 73, 'regularisation': 0.09294776571901499, 'epochs': 13, 'batch_size': 16}. Best is trial 6 with value: 0.7081561388998616.




[I 2023-07-10 14:32:14,423] Trial 30 finished with value: 0.7080406667654224 and parameters: {'num_layers': 1, 'num_units': 94, 'regularisation': 0.08420738008963202, 'epochs': 11, 'batch_size': 32}. Best is trial 6 with value: 0.7081561388998616.




[I 2023-07-10 14:32:21,860] Trial 31 finished with value: 0.7026646545197183 and parameters: {'num_layers': 1, 'num_units': 95, 'regularisation': 0.08493191289435766, 'epochs': 11, 'batch_size': 32}. Best is trial 6 with value: 0.7081561388998616.




[I 2023-07-10 14:32:28,267] Trial 32 finished with value: 0.70279272829021 and parameters: {'num_layers': 1, 'num_units': 117, 'regularisation': 0.07671816218214646, 'epochs': 9, 'batch_size': 32}. Best is trial 6 with value: 0.7081561388998616.




[I 2023-07-10 14:32:35,549] Trial 33 finished with value: 0.7043653610343011 and parameters: {'num_layers': 1, 'num_units': 106, 'regularisation': 0.08405562460990211, 'epochs': 11, 'batch_size': 32}. Best is trial 6 with value: 0.7081561388998616.




[I 2023-07-10 14:32:45,982] Trial 34 finished with value: 0.7054398433899532 and parameters: {'num_layers': 1, 'num_units': 127, 'regularisation': 0.0923921306289461, 'epochs': 16, 'batch_size': 32}. Best is trial 6 with value: 0.7081561388998616.




[I 2023-07-10 14:32:51,695] Trial 35 finished with value: 0.7088448568865626 and parameters: {'num_layers': 1, 'num_units': 94, 'regularisation': 0.06883398438166553, 'epochs': 8, 'batch_size': 32}. Best is trial 35 with value: 0.7088448568865626.




[I 2023-07-10 14:33:00,663] Trial 36 finished with value: 0.7074368169398908 and parameters: {'num_layers': 2, 'num_units': 81, 'regularisation': 0.06896459605716851, 'epochs': 7, 'batch_size': 16}. Best is trial 35 with value: 0.7088448568865626.




[I 2023-07-10 14:33:06,269] Trial 37 finished with value: 0.7087887924649418 and parameters: {'num_layers': 1, 'num_units': 94, 'regularisation': 0.062005522976151244, 'epochs': 8, 'batch_size': 32}. Best is trial 35 with value: 0.7088448568865626.




[I 2023-07-10 14:33:12,276] Trial 38 finished with value: 0.7030787082757259 and parameters: {'num_layers': 1, 'num_units': 94, 'regularisation': 0.06074414286409703, 'epochs': 8, 'batch_size': 32}. Best is trial 35 with value: 0.7088448568865626.




[I 2023-07-10 14:33:17,427] Trial 39 finished with value: 0.7079684002403054 and parameters: {'num_layers': 1, 'num_units': 58, 'regularisation': 0.056885511058906935, 'epochs': 7, 'batch_size': 32}. Best is trial 35 with value: 0.7088448568865626.




[I 2023-07-10 14:33:23,630] Trial 40 finished with value: 0.7077374559714267 and parameters: {'num_layers': 1, 'num_units': 93, 'regularisation': 0.04010078733157288, 'epochs': 9, 'batch_size': 32}. Best is trial 35 with value: 0.7088448568865626.




[I 2023-07-10 14:33:27,996] Trial 41 finished with value: 0.7102204926262426 and parameters: {'num_layers': 1, 'num_units': 51, 'regularisation': 0.05838135765602229, 'epochs': 6, 'batch_size': 32}. Best is trial 41 with value: 0.7102204926262426.




[I 2023-07-10 14:33:31,730] Trial 42 finished with value: 0.7005696968200672 and parameters: {'num_layers': 1, 'num_units': 43, 'regularisation': 0.06745502397973904, 'epochs': 5, 'batch_size': 32}. Best is trial 41 with value: 0.7102204926262426.




[I 2023-07-10 14:33:36,099] Trial 43 finished with value: 0.6990430986240042 and parameters: {'num_layers': 1, 'num_units': 50, 'regularisation': 0.07150284826906866, 'epochs': 6, 'batch_size': 32}. Best is trial 41 with value: 0.7102204926262426.




[I 2023-07-10 14:33:41,527] Trial 44 finished with value: 0.7075198848673382 and parameters: {'num_layers': 1, 'num_units': 22, 'regularisation': 0.0566765603294165, 'epochs': 8, 'batch_size': 32}. Best is trial 41 with value: 0.7102204926262426.




[I 2023-07-10 14:33:44,543] Trial 45 finished with value: 0.6929053303377444 and parameters: {'num_layers': 1, 'num_units': 42, 'regularisation': 0.07916813050053909, 'epochs': 6, 'batch_size': 64}. Best is trial 41 with value: 0.7102204926262426.




[I 2023-07-10 14:33:49,763] Trial 46 finished with value: 0.7053994667193364 and parameters: {'num_layers': 1, 'num_units': 66, 'regularisation': 0.044666651093722365, 'epochs': 7, 'batch_size': 32}. Best is trial 41 with value: 0.7102204926262426.




[I 2023-07-10 14:33:55,982] Trial 47 finished with value: 0.7080566116926723 and parameters: {'num_layers': 1, 'num_units': 79, 'regularisation': 0.07467119737035621, 'epochs': 9, 'batch_size': 32}. Best is trial 41 with value: 0.7102204926262426.




[I 2023-07-10 14:34:03,746] Trial 48 finished with value: 0.6937036054052275 and parameters: {'num_layers': 2, 'num_units': 74, 'regularisation': 0.07308013799752147, 'epochs': 6, 'batch_size': 16}. Best is trial 41 with value: 0.7102204926262426.




[I 2023-07-10 14:34:10,112] Trial 49 finished with value: 0.7043172690763052 and parameters: {'num_layers': 1, 'num_units': 81, 'regularisation': 0.06866297801238404, 'epochs': 9, 'batch_size': 32}. Best is trial 41 with value: 0.7102204926262426.




[I 2023-07-10 14:34:15,841] A new study created in memory with name: no-name-04a661b8-beb9-46c4-976a-37d7a8c165cc




[I 2023-07-10 14:34:19,271] Trial 0 finished with value: 0.5 and parameters: {'num_layers': 3, 'num_units': 76, 'regularisation': 0.06572062341245898, 'epochs': 5, 'batch_size': 64}. Best is trial 0 with value: 0.5.




[I 2023-07-10 14:34:39,960] Trial 1 finished with value: 0.5 and parameters: {'num_layers': 3, 'num_units': 55, 'regularisation': 0.038846953221454705, 'epochs': 17, 'batch_size': 16}. Best is trial 0 with value: 0.5.




[I 2023-07-10 14:34:55,311] Trial 2 finished with value: 0.5 and parameters: {'num_layers': 3, 'num_units': 69, 'regularisation': 0.0873037825158332, 'epochs': 12, 'batch_size': 16}. Best is trial 0 with value: 0.5.




[I 2023-07-10 14:35:18,982] Trial 3 finished with value: 0.5 and parameters: {'num_layers': 3, 'num_units': 22, 'regularisation': 0.06269889113773404, 'epochs': 20, 'batch_size': 16}. Best is trial 0 with value: 0.5.




[I 2023-07-10 14:35:23,439] Trial 4 finished with value: 0.5 and parameters: {'num_layers': 3, 'num_units': 63, 'regularisation': 0.049140770119614735, 'epochs': 5, 'batch_size': 32}. Best is trial 0 with value: 0.5.




[I 2023-07-10 14:35:34,702] Trial 5 finished with value: 0.4941519953051643 and parameters: {'num_layers': 3, 'num_units': 25, 'regularisation': 0.026305230048325003, 'epochs': 17, 'batch_size': 32}. Best is trial 0 with value: 0.5.




[I 2023-07-10 14:35:38,037] Trial 6 finished with value: 0.5943188820422536 and parameters: {'num_layers': 1, 'num_units': 102, 'regularisation': 0.07262524372650488, 'epochs': 7, 'batch_size': 64}. Best is trial 6 with value: 0.5943188820422536.




[I 2023-07-10 14:35:44,791] Trial 7 finished with value: 0.5425520833333334 and parameters: {'num_layers': 3, 'num_units': 115, 'regularisation': 0.03434918409361816, 'epochs': 8, 'batch_size': 32}. Best is trial 6 with value: 0.5943188820422536.




[I 2023-07-10 14:35:53,042] Trial 8 finished with value: 0.5 and parameters: {'num_layers': 3, 'num_units': 122, 'regularisation': 0.04170579662912688, 'epochs': 16, 'batch_size': 64}. Best is trial 6 with value: 0.5943188820422536.




[I 2023-07-10 14:36:06,288] Trial 9 finished with value: 0.5883626760563381 and parameters: {'num_layers': 2, 'num_units': 74, 'regularisation': 0.026200176172021583, 'epochs': 20, 'batch_size': 32}. Best is trial 6 with value: 0.5943188820422536.




[I 2023-07-10 14:36:10,525] Trial 10 finished with value: 0.5727890258215962 and parameters: {'num_layers': 1, 'num_units': 99, 'regularisation': 0.0027211037631403867, 'epochs': 10, 'batch_size': 64}. Best is trial 6 with value: 0.5943188820422536.




[I 2023-07-10 14:36:19,341] Trial 11 finished with value: 0.5829760856807512 and parameters: {'num_layers': 1, 'num_units': 90, 'regularisation': 0.09817240429594198, 'epochs': 14, 'batch_size': 32}. Best is trial 6 with value: 0.5943188820422536.




[I 2023-07-10 14:36:27,412] Trial 12 finished with value: 0.5887613703051643 and parameters: {'num_layers': 2, 'num_units': 101, 'regularisation': 0.016555139890791333, 'epochs': 20, 'batch_size': 64}. Best is trial 6 with value: 0.5943188820422536.




[I 2023-07-10 14:36:31,405] Trial 13 finished with value: 0.5896944688967136 and parameters: {'num_layers': 2, 'num_units': 104, 'regularisation': 0.0019125256253641875, 'epochs': 8, 'batch_size': 64}. Best is trial 6 with value: 0.5943188820422536.




[I 2023-07-10 14:36:35,007] Trial 14 finished with value: 0.5938398620892019 and parameters: {'num_layers': 1, 'num_units': 109, 'regularisation': 0.0009759819686778334, 'epochs': 8, 'batch_size': 64}. Best is trial 6 with value: 0.5943188820422536.




[I 2023-07-10 14:36:38,753] Trial 15 finished with value: 0.5816978433098592 and parameters: {'num_layers': 1, 'num_units': 123, 'regularisation': 0.06203116020335183, 'epochs': 8, 'batch_size': 64}. Best is trial 6 with value: 0.5943188820422536.




[I 2023-07-10 14:36:42,917] Trial 16 finished with value: 0.5880820862676057 and parameters: {'num_layers': 1, 'num_units': 88, 'regularisation': 0.07582306158335769, 'epochs': 10, 'batch_size': 64}. Best is trial 6 with value: 0.5943188820422536.




[I 2023-07-10 14:36:45,819] Trial 17 finished with value: 0.5831598444835681 and parameters: {'num_layers': 1, 'num_units': 42, 'regularisation': 0.05171027305551369, 'epochs': 6, 'batch_size': 64}. Best is trial 6 with value: 0.5943188820422536.




[I 2023-07-10 14:36:50,677] Trial 18 finished with value: 0.5871288145539906 and parameters: {'num_layers': 1, 'num_units': 111, 'regularisation': 0.07655282529112317, 'epochs': 11, 'batch_size': 64}. Best is trial 6 with value: 0.5943188820422536.




[I 2023-07-10 14:36:54,275] Trial 19 finished with value: 0.5887782423708919 and parameters: {'num_layers': 2, 'num_units': 89, 'regularisation': 0.014183953510946241, 'epochs': 7, 'batch_size': 64}. Best is trial 6 with value: 0.5943188820422536.




[I 2023-07-10 14:37:11,737] Trial 20 finished with value: 0.5 and parameters: {'num_layers': 2, 'num_units': 127, 'regularisation': 0.05201525030221402, 'epochs': 14, 'batch_size': 16}. Best is trial 6 with value: 0.5943188820422536.




[I 2023-07-10 14:37:16,509] Trial 21 finished with value: 0.5858670774647887 and parameters: {'num_layers': 2, 'num_units': 103, 'regularisation': 0.00038764835036880546, 'epochs': 9, 'batch_size': 64}. Best is trial 6 with value: 0.5943188820422536.




[I 2023-07-10 14:37:20,004] Trial 22 finished with value: 0.5823870305164318 and parameters: {'num_layers': 1, 'num_units': 111, 'regularisation': 0.0078114863595340886, 'epochs': 7, 'batch_size': 64}. Best is trial 6 with value: 0.5943188820422536.




[I 2023-07-10 14:37:24,324] Trial 23 finished with value: 0.5870312500000001 and parameters: {'num_layers': 2, 'num_units': 93, 'regularisation': 0.01121556160580133, 'epochs': 9, 'batch_size': 64}. Best is trial 6 with value: 0.5943188820422536.




[I 2023-07-10 14:37:27,769] Trial 24 finished with value: 0.5910735768779343 and parameters: {'num_layers': 1, 'num_units': 108, 'regularisation': 0.02041680568085662, 'epochs': 7, 'batch_size': 64}. Best is trial 6 with value: 0.5943188820422536.




[I 2023-07-10 14:37:30,799] Trial 25 finished with value: 0.5843973738262911 and parameters: {'num_layers': 1, 'num_units': 80, 'regularisation': 0.018722710546967586, 'epochs': 6, 'batch_size': 64}. Best is trial 6 with value: 0.5943188820422536.




[I 2023-07-10 14:37:33,968] Trial 26 finished with value: 0.5698081719483568 and parameters: {'num_layers': 1, 'num_units': 114, 'regularisation': 0.023486033770728207, 'epochs': 6, 'batch_size': 64}. Best is trial 6 with value: 0.5943188820422536.




[I 2023-07-10 14:37:38,834] Trial 27 finished with value: 0.5890724031690141 and parameters: {'num_layers': 1, 'num_units': 85, 'regularisation': 0.009396249412452116, 'epochs': 12, 'batch_size': 64}. Best is trial 6 with value: 0.5943188820422536.




[I 2023-07-10 14:37:47,862] Trial 28 finished with value: 0.5912323943661971 and parameters: {'num_layers': 1, 'num_units': 117, 'regularisation': 0.032244968297167, 'epochs': 7, 'batch_size': 16}. Best is trial 6 with value: 0.5943188820422536.




[I 2023-07-10 14:37:54,381] Trial 29 finished with value: 0.5785416666666666 and parameters: {'num_layers': 1, 'num_units': 118, 'regularisation': 0.03093338244768823, 'epochs': 5, 'batch_size': 16}. Best is trial 6 with value: 0.5943188820422536.




[I 2023-07-10 14:38:06,112] Trial 30 finished with value: 0.5653858568075117 and parameters: {'num_layers': 1, 'num_units': 128, 'regularisation': 0.031641740226641124, 'epochs': 10, 'batch_size': 16}. Best is trial 6 with value: 0.5943188820422536.




[I 2023-07-10 14:38:14,857] Trial 31 finished with value: 0.5904221684272299 and parameters: {'num_layers': 1, 'num_units': 107, 'regularisation': 0.019637160844143603, 'epochs': 7, 'batch_size': 16}. Best is trial 6 with value: 0.5943188820422536.




[I 2023-07-10 14:38:21,217] Trial 32 finished with value: 0.5856099618544601 and parameters: {'num_layers': 1, 'num_units': 99, 'regularisation': 0.02341110755588008, 'epochs': 5, 'batch_size': 16}. Best is trial 6 with value: 0.5943188820422536.




[I 2023-07-10 14:38:32,289] Trial 33 finished with value: 0.5706294014084506 and parameters: {'num_layers': 1, 'num_units': 118, 'regularisation': 0.00853804286473351, 'epochs': 9, 'batch_size': 16}. Best is trial 6 with value: 0.5943188820422536.




[I 2023-07-10 14:38:35,587] Trial 34 finished with value: 0.5790089495305165 and parameters: {'num_layers': 1, 'num_units': 96, 'regularisation': 0.03957608255088334, 'epochs': 7, 'batch_size': 64}. Best is trial 6 with value: 0.5943188820422536.




[I 2023-07-10 14:38:43,063] Trial 35 finished with value: 0.5841175176056338 and parameters: {'num_layers': 1, 'num_units': 65, 'regularisation': 0.017526337205205154, 'epochs': 6, 'batch_size': 16}. Best is trial 6 with value: 0.5943188820422536.




[I 2023-07-10 14:38:52,789] Trial 36 finished with value: 0.5893100792253521 and parameters: {'num_layers': 1, 'num_units': 53, 'regularisation': 0.013954755254137051, 'epochs': 8, 'batch_size': 16}. Best is trial 6 with value: 0.5943188820422536.




[I 2023-07-10 14:38:57,865] Trial 37 finished with value: 0.5868353873239437 and parameters: {'num_layers': 2, 'num_units': 110, 'regularisation': 0.03608392637762361, 'epochs': 11, 'batch_size': 64}. Best is trial 6 with value: 0.5943188820422536.




[I 2023-07-10 14:39:01,723] Trial 38 finished with value: 0.5811263937793427 and parameters: {'num_layers': 1, 'num_units': 80, 'regularisation': 0.028279592780944465, 'epochs': 5, 'batch_size': 32}. Best is trial 6 with value: 0.5943188820422536.




[I 2023-07-10 14:39:05,822] Trial 39 finished with value: 0.5861175909624413 and parameters: {'num_layers': 1, 'num_units': 122, 'regularisation': 0.04515510274451426, 'epochs': 9, 'batch_size': 64}. Best is trial 6 with value: 0.5943188820422536.




[I 2023-07-10 14:39:14,937] Trial 40 finished with value: 0.5857808832159624 and parameters: {'num_layers': 2, 'num_units': 107, 'regularisation': 0.0359506610551478, 'epochs': 13, 'batch_size': 32}. Best is trial 6 with value: 0.5943188820422536.




[I 2023-07-10 14:39:23,523] Trial 41 finished with value: 0.5862367957746479 and parameters: {'num_layers': 1, 'num_units': 106, 'regularisation': 0.022049682412355356, 'epochs': 7, 'batch_size': 16}. Best is trial 6 with value: 0.5943188820422536.




[I 2023-07-10 14:39:32,129] Trial 42 finished with value: 0.5615338908450704 and parameters: {'num_layers': 1, 'num_units': 115, 'regularisation': 0.02859089535626047, 'epochs': 7, 'batch_size': 16}. Best is trial 6 with value: 0.5943188820422536.




[I 2023-07-10 14:39:41,504] Trial 43 finished with value: 0.592959580399061 and parameters: {'num_layers': 1, 'num_units': 96, 'regularisation': 0.0055088623191913225, 'epochs': 8, 'batch_size': 16}. Best is trial 6 with value: 0.5943188820422536.




[I 2023-07-10 14:39:49,044] Trial 44 finished with value: 0.5947014377934272 and parameters: {'num_layers': 1, 'num_units': 95, 'regularisation': 0.005652188067914251, 'epochs': 6, 'batch_size': 16}. Best is trial 44 with value: 0.5947014377934272.




[I 2023-07-10 14:39:58,785] Trial 45 finished with value: 0.584975792253521 and parameters: {'num_layers': 1, 'num_units': 96, 'regularisation': 0.004349795637134183, 'epochs': 8, 'batch_size': 16}. Best is trial 44 with value: 0.5947014377934272.




[I 2023-07-10 14:40:17,598] Trial 46 finished with value: 0.585712294600939 and parameters: {'num_layers': 1, 'num_units': 82, 'regularisation': 0.005129323689953162, 'epochs': 17, 'batch_size': 16}. Best is trial 44 with value: 0.5947014377934272.




[I 2023-07-10 14:40:24,937] Trial 47 finished with value: 0.5922725938967137 and parameters: {'num_layers': 1, 'num_units': 69, 'regularisation': 0.0003642891551854724, 'epochs': 6, 'batch_size': 16}. Best is trial 44 with value: 0.5947014377934272.




[I 2023-07-10 14:40:32,301] Trial 48 finished with value: 0.582918867370892 and parameters: {'num_layers': 1, 'num_units': 73, 'regularisation': 0.0005064917731468646, 'epochs': 6, 'batch_size': 16}. Best is trial 44 with value: 0.5947014377934272.




[I 2023-07-10 14:40:39,147] Trial 49 finished with value: 0.5827167693661972 and parameters: {'num_layers': 2, 'num_units': 58, 'regularisation': 0.0049495648841783955, 'epochs': 5, 'batch_size': 16}. Best is trial 44 with value: 0.5947014377934272.




Train AUC-ROC score 1: 0.6996886791584781
Train AUC-ROC score 2: 0.6026649061206346
CV AUC-ROC score 1: 0.6942426468167754
CV AUC-ROC score 2: 0.5863134536384976

Avg AUC-ROC score:  0.6402780502276365


In [5]:
y_pred_1, y_pred_2 = best_model_ec1.predict(x_test), best_model_ec2.predict(x_test)



In [6]:
# Flattening the multidimensional arrays
y_pred_1_flat = y_pred_1.flatten()
y_pred_2_flat = y_pred_2.flatten()

# Creating the DataFrames
df_y_pred_1 = pd.DataFrame({'EC1': y_pred_1_flat})
df_y_pred_2 = pd.DataFrame({'EC2': y_pred_2_flat})

# Rest of the code remains the same
ids = df_test['id']
df_ids = pd.DataFrame({'id': ids})

result = pd.concat([df_ids, df_y_pred_1, df_y_pred_2], axis=1)

result.to_csv('submissions/submission_2_neural_net_3.csv', index=False)
