## BIOINFORMATICS THESIS: MULTIMODAL NEURAL NETWORK

# CELL LINE: A549

In [24]:
import pandas as pd 
import numpy as np
import os

import torch
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
from collections import defaultdict, OrderedDict
import pickle

import sqlite3
from sqlalchemy import create_engine

device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [25]:
# create a database to store optuna studies with sqlite backend
#engine = create_engine('sqlite:///BIOINF_optuna_tuning.db')

In [26]:
from BIOINF_tesi.data_pipe import CELL_LINES, TASKS

In [27]:
cell_line = CELL_LINES[0]
cell_line

'A549'

---

In [28]:
from BIOINF_tesi.data_pipe import Load_Create_Task
from BIOINF_tesi.data_pipe import Build_DataLoader_Pipeline

In [29]:
from BIOINF_tesi.models import FFNN, CNN, CNN_LSTM
from BIOINF_tesi.models.utils import fit, Param_Search, Kfold_CV

In [None]:
with open ('results_dict.pickle', 'rb') as fin:
    results_dict = pickle.load(fin)
    results_dict = defaultdict(lambda: defaultdict(dict), results_dict)

## 1) ACTIVE ENHANCERS vs INACTIVE ENHANCERS

In [31]:
task = TASKS[0]
task

'active_E_vs_inactive_E'

In [32]:
pipe_data_load = Build_DataLoader_Pipeline(path_name=f'{task}.pickle')

Data Preprocessing Done!


### 1. FFNN

In [27]:
model=FFNN

In [28]:
# IMBALANCED
type_augm_genfeatures = 'smote'

kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=False,
                augmentation=True,
                type_augm_genfeatures=type_augm_genfeatures,
                model = model,
                device = device,
                task=task,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}_{type_augm_genfeatures}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_{type_augm_genfeatures}_HP',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_{type_augm_genfeatures}_TEST')

  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-03 20:18:10,020][0m Using an existing study with name 'A549_active_E_vs_inactive_E_FFNN_smote_1' instead of creating a new one.[0m


>>> ITERATION N. 1





Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-03 20:19:16,489][0m Trial 1 finished with value: 0.05580985915492958 and parameters: {'n_layers': 2, 'n_units_l0': 256, 'dropout_l0': 0.0, 'n_units_l1': 16, 'dropout_l1': 0.0, 'optimizer': 'Adam', 'lr': 0.00021735193110052726, 'weight_decay': 0.007555933237491655}. Best is trial 1 with value: 0.05580985915492958.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-03 20:20:57,626][0m Trial 2 finished with value: 0.05595950704225351 and parameters: {'n_layers': 4, 'n_units_l0': 128, 'dropout_l0': 0.4, 'n_units_l1': 16, 'dropout_l1': 0.4, 'n_units_l2': 16, 'dropout_l2': 0.4, 'n_units_l3': 32, 'dropout_l3': 0.4, 'optimizer': 'RMSprop', 'lr': 2.7822665047156823e-05, 'weight_decay': 0.0041818396326109035}. Best is trial 2 with value: 0.05595950704225351.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

	add(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add(Tensor other, *, Number alpha) (Triggered internally at  ../torch/csrc/utils/python_arg_parser.cpp:1025.)
  grad = grad.add(group['weight_decay'], p.data)


EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-03 20:22:07,955][0m Trial 3 finished with value: 0.056998239436619705 and parameters: {'n_layers': 3, 'n_units_l0': 256, 'dropout_l0': 0.2, 'n_units_l1': 64, 'dropout_l1': 0.3, 'n_units_l2': 64, 'dropout_l2': 0.4, 'optimizer': 'Nadam', 'lr': 0.0003250707125537707, 'weight_decay': 0.0011787209188903456}. Best is trial 3 with value: 0.056998239436619705.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  4
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.056998239436619705
  Params: 
    dropout_l0: 0.2
    dropout_l1: 0.3
    dropout_l2: 0.4
    lr: 0.0003250707125537707
    n_layers: 3
    n_units_l0: 256
    n_units_l1: 64
    n_units_l2: 64
    optimizer: Nadam
    weight_decay: 0.0011787209188903456



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-03 20:23:52,791][0m A new study created in RDB with name: A549_active_E_vs_inactive_E_FFNN_smote_1_2[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.0932457576995809


>>> ITERATION N. 2



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-03 20:25:57,354][0m Trial 0 finished with value: 0.05764084507042251 and parameters: {'n_layers': 4, 'n_units_l0': 128, 'dropout_l0': 0.3, 'n_units_l1': 128, 'dropout_l1': 0.2, 'n_units_l2': 4, 'dropout_l2': 0.5, 'n_units_l3': 4, 'dropout_l3': 0.0, 'optimizer': 'RMSprop', 'lr': 1.1144985724816305e-05, 'weight_decay': 0.0006920569873982636}. Best is trial 0 with value: 0.05764084507042251.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-03 20:27:01,933][0m Trial 1 finished with value: 0.05779049295774647 and parameters: {'n_layers': 3, 'n_units_l0': 32, 'dropout_l0': 0.3, 'n_units_l1': 32, 'dropout_l1': 0.3, 'n_units_l2': 4, 'dropout_l2': 0.0, 'optimizer': 'Adam', 'lr': 0.05133572629314612, 'weight_decay': 0.00017967912023946235}. Best is trial 1 with value: 0.05779049295774647.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 1 out of 5


  recall = tps / tps[-1]


EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-03 20:30:17,437][0m Trial 2 finished with value: 0.057341549295774646 and parameters: {'n_layers': 2, 'n_units_l0': 32, 'dropout_l0': 0.0, 'n_units_l1': 64, 'dropout_l1': 0.0, 'optimizer': 'Adam', 'lr': 0.06946739310481091, 'weight_decay': 0.002182589678726423}. Best is trial 1 with value: 0.05779049295774647.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.05779049295774647
  Params: 
    dropout_l0: 0.3
    dropout_l1: 0.3
    dropout_l2: 0.0
    lr: 0.05133572629314612
    n_layers: 3
    n_units_l0: 32
    n_units_l1: 32
    n_units_l2: 4
    optimizer: Adam
    weight_decay: 0.00017967912023946235



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-03 20:32:54,935][0m A new study created in RDB with name: A549_active_E_vs_inactive_E_FFNN_smote_1_2_3[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.05743545183714002


>>> ITERATION N. 3



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-03 20:34:08,269][0m Trial 0 finished with value: 0.07387081619475988 and parameters: {'n_layers': 2, 'n_units_l0': 256, 'dropout_l0': 0.2, 'n_units_l1': 16, 'dropout_l1': 0.3, 'optimizer': 'RMSprop', 'lr': 0.0016970758230422669, 'weight_decay': 0.0016414778265749026}. Best is trial 0 with value: 0.07387081619475988.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-03 20:35:10,083][0m Trial 1 finished with value: 0.052852112676056315 and parameters: {'n_layers': 2, 'n_units_l0': 32, 'dropout_l0': 0.2, 'n_units_l1': 64, 'dropout_l1': 0.4, 'optimizer': 'RMSprop', 'lr': 0.0001019850693489737, 'weight_decay': 0.024812758003474045}. Best is trial 0 with value: 0.07387081619475988.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5


  recall = tps / tps[-1]


EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-03 20:36:32,802][0m Trial 2 finished with value: 0.05270246478873238 and parameters: {'n_layers': 3, 'n_units_l0': 64, 'dropout_l0': 0.2, 'n_units_l1': 32, 'dropout_l1': 0.3, 'n_units_l2': 4, 'dropout_l2': 0.5, 'optimizer': 'Adam', 'lr': 0.0009634372344764329, 'weight_decay': 0.0009393966982712533}. Best is trial 0 with value: 0.07387081619475988.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.07387081619475988
  Params: 
    dropout_l0: 0.2
    dropout_l1: 0.3
    lr: 0.0016970758230422669
    n_layers: 2
    n_units_l0: 256
    n_units_l1: 16
    optimizer: RMSprop
    weight_decay: 0.0016414778265749026



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.08063164642666949



3-FOLD CROSS-VALIDATION AUPRC TEST SCORE: 0.0771


In [29]:
results_dict[cell_line][task][f'{model.__name__}_{type_augm_genfeatures}'] = kf_CV.scores_dict

In [30]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

In [31]:
# IMBALANCED
type_augm_genfeatures = 'double'

kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=False,
                augmentation=True,
                type_augm_genfeatures=type_augm_genfeatures,
                model = model,
                device = device,
                task=task,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}_{type_augm_genfeatures}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_{type_augm_genfeatures}_HP.pt',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_{type_augm_genfeatures}_TEST.pt')

  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-03 20:38:08,825][0m A new study created in RDB with name: A549_active_E_vs_inactive_E_FFNN_double_1[0m


>>> ITERATION N. 1



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-03 20:39:43,680][0m Trial 0 finished with value: 0.056109154929577444 and parameters: {'n_layers': 3, 'n_units_l0': 256, 'dropout_l0': 0.0, 'n_units_l1': 16, 'dropout_l1': 0.2, 'n_units_l2': 16, 'dropout_l2': 0.5, 'optimizer': 'Nadam', 'lr': 4.2085758024902316e-05, 'weight_decay': 0.0005540997678870126}. Best is trial 0 with value: 0.056109154929577444.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-03 20:41:05,644][0m Trial 1 finished with value: 0.05566021126760562 and parameters: {'n_layers': 1, 'n_units_l0': 32, 'dropout_l0': 0.3, 'optimizer': 'RMSprop', 'lr': 0.03683023742189593, 'weight_decay': 0.04661540715848799}. Best is trial 0 with value: 0.056109154929577444.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-03 20:43:35,164][0m Trial 2 finished with value: 0.05566021126760566 and parameters: {'n_layers': 3, 'n_units_l0': 128, 'dropout_l0': 0.4, 'n_units_l1': 128, 'dropout_l1': 0.2, 'n_units_l2': 32, 'dropout_l2': 0.4, 'optimizer': 'RMSprop', 'lr': 0.0007501586991241983, 'weight_decay': 0.004548431874520829}. Best is trial 0 with value: 0.056109154929577444.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.056109154929577444
  Params: 
    dropout_l0: 0.0
    dropout_l1: 0.2
    dropout_l2: 0.5
    lr: 4.2085758024902316e-05
    n_layers: 3
    n_units_l0: 256
    n_units_l1: 16
    n_units_l2: 16
    optimizer: Nadam
    weight_decay: 0.0005540997678870126



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-03 20:46:45,753][0m A new study created in RDB with name: A549_active_E_vs_inactive_E_FFNN_double_1_2[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.055543694141012886


>>> ITERATION N. 2



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-03 20:48:08,870][0m Trial 0 finished with value: 0.05734154929577463 and parameters: {'n_layers': 2, 'n_units_l0': 64, 'dropout_l0': 0.4, 'n_units_l1': 32, 'dropout_l1': 0.3, 'optimizer': 'Nadam', 'lr': 0.0007888841763635341, 'weight_decay': 0.02160333445544555}. Best is trial 0 with value: 0.05734154929577463.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-03 20:49:34,066][0m Trial 1 finished with value: 0.07838651372102076 and parameters: {'n_layers': 2, 'n_units_l0': 256, 'dropout_l0': 0.3, 'n_units_l1': 64, 'dropout_l1': 0.3, 'optimizer': 'Adam', 'lr': 6.317152382884526e-05, 'weight_decay': 0.002051677533249752}. Best is trial 1 with value: 0.07838651372102076.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-03 20:50:36,245][0m Trial 2 finished with value: 0.07962141829219295 and parameters: {'n_layers': 2, 'n_units_l0': 32, 'dropout_l0': 0.4, 'n_units_l1': 128, 'dropout_l1': 0.3, 'optimizer': 'Nadam', 'lr': 0.0024463740487153172, 'weight_decay': 0.00020354409029732702}. Best is trial 2 with value: 0.07962141829219295.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.07962141829219295
  Params: 
    dropout_l0: 0.4
    dropout_l1: 0.3
    lr: 0.0024463740487153172
    n_layers: 2
    n_units_l0: 32
    n_units_l1: 128
    optimizer: Nadam
    weight_decay: 0.00020354409029732702



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-03 20:53:13,659][0m A new study created in RDB with name: A549_active_E_vs_inactive_E_FFNN_double_1_2_3[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.1038407451600395


>>> ITERATION N. 3



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


  recall = tps / tps[-1]
[32m[I 2021-10-03 20:54:25,685][0m Trial 0 finished with value: 0.10595655909036192 and parameters: {'n_layers': 2, 'n_units_l0': 64, 'dropout_l0': 0.2, 'n_units_l1': 32, 'dropout_l1': 0.0, 'optimizer': 'Adam', 'lr': 0.005770934152207585, 'weight_decay': 0.00011086806083352249}. Best is trial 0 with value: 0.10595655909036192.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-03 20:55:27,107][0m Trial 1 finished with value: 0.0855964985718507 and parameters: {'n_layers': 1, 'n_units_l0': 32, 'dropout_l0': 0.4, 'optimizer': 'Adam', 'lr': 0.0009706442070192908, 'weight_decay': 0.0018958543751050115}. Best is trial 0 with value: 0.10595655909036192.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-03 20:56:29,642][0m Trial 2 finished with value: 0.08883954714008567 and parameters: {'n_layers': 1, 'n_units_l0': 256, 'dropout_l0': 0.3, 'optimizer': 'Nadam', 'lr': 0.00012213011653329847, 'weight_decay': 0.00021342594321345}. Best is trial 0 with value: 0.10595655909036192.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.10595655909036192
  Params: 
    dropout_l0: 0.2
    dropout_l1: 0.0
    lr: 0.005770934152207585
    n_layers: 2
    n_units_l0: 64
    n_units_l1: 32
    optimizer: Adam
    weight_decay: 0.00011086806083352249



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.09819916724985658



3-FOLD CROSS-VALIDATION AUPRC TEST SCORE: 0.08586


In [32]:
results_dict[cell_line][task][f'{model.__name__}_{type_augm_genfeatures}'] = kf_CV.scores_dict

In [33]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---
### 2. CNN

In [12]:
model=CNN

In [None]:
# IMBALANCED
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=True,
                augmentation=True,
                model = model,
                device = device,
                task=task,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST')

In [None]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [None]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---
### 3. CNN-LSTM

In [33]:
model=CNN_LSTM

In [None]:
# IMBALANCED
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=True,
                augmentation=True,
                model = model,
                device = device,
                task=task,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST')

  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-05 11:16:35,891][0m A new study created in RDB with name: A549_active_E_vs_inactive_E_CNN_LSTM_1[0m


>>> ITERATION N. 1



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

	add(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add(Tensor other, *, Number alpha) (Triggered internally at  ../torch/csrc/utils/python_arg_parser.cpp:1025.)
  grad = grad.add(group['weight_decay'], p.data)


In [None]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [None]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---

## 2) ACTIVE PROMOTERS vs INACTIVE PROMOTERS

In [None]:
task = TASKS[1]
task

In [None]:
pipe_data_load = Build_DataLoader_Pipeline(path_name=f'{task}.pickle')

---

### 1. FFNN

In [36]:
model=FFNN

In [37]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=False,
                augmentation=False,
                model = model,
                device = device,
                task=task,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST')

  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-03 20:59:37,330][0m A new study created in RDB with name: A549_active_P_vs_inactive_P_FFNN_1[0m


>>> ITERATION N. 1



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-03 21:02:12,184][0m Trial 0 finished with value: 0.10343997058282774 and parameters: {'n_layers': 1, 'n_units_l0': 32, 'dropout_l0': 0.0, 'optimizer': 'Nadam', 'lr': 0.01137443006308322, 'weight_decay': 0.048396022007748}. Best is trial 0 with value: 0.10343997058282774.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-03 21:03:48,959][0m Trial 1 finished with value: 0.10344364772936208 and parameters: {'n_layers': 2, 'n_units_l0': 128, 'dropout_l0': 0.2, 'n_units_l1': 128, 'dropout_l1': 0.4, 'optimizer': 'Nadam', 'lr': 0.074346379168385, 'weight_decay': 0.0904213229439284}. Best is trial 1 with value: 0.10344364772936208.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-03 21:07:21,784][0m Trial 2 finished with value: 0.10344272844272843 and parameters: {'n_layers': 4, 'n_units_l0': 256, 'dropout_l0': 0.2, 'n_units_l1': 64, 'dropout_l1': 0.4, 'n_units_l2': 4, 'dropout_l2': 0.0, 'n_units_l3': 16, 'dropout_l3': 0.4, 'optimizer': 'Nadam', 'lr': 0.00018673597709379993, 'weight_decay': 0.00026192128171035126}. Best is trial 1 with value: 0.10344364772936208.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.10344364772936208
  Params: 
    dropout_l0: 0.2
    dropout_l1: 0.4
    lr: 0.074346379168385
    n_layers: 2
    n_units_l0: 128
    n_units_l1: 128
    optimizer: Nadam
    weight_decay: 0.0904213229439284



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-03 21:12:40,178][0m A new study created in RDB with name: A549_active_P_vs_inactive_P_FFNN_1_2[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.1021302076697669


>>> ITERATION N. 2



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-03 21:14:13,335][0m Trial 0 finished with value: 0.10290034932892078 and parameters: {'n_layers': 1, 'n_units_l0': 32, 'dropout_l0': 0.4, 'optimizer': 'RMSprop', 'lr': 0.0010950034534189157, 'weight_decay': 0.0743834442273973}. Best is trial 0 with value: 0.10290034932892078.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-03 21:16:35,337][0m Trial 1 finished with value: 0.10290218790218791 and parameters: {'n_layers': 1, 'n_units_l0': 256, 'dropout_l0': 0.2, 'optimizer': 'Nadam', 'lr': 0.00051869443856522, 'weight_decay': 0.013465528503967442}. Best is trial 1 with value: 0.10290218790218791.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-03 21:18:39,840][0m Trial 2 finished with value: 0.10290126861555438 and parameters: {'n_layers': 2, 'n_units_l0': 32, 'dropout_l0': 0.2, 'n_units_l1': 64, 'dropout_l1': 0.2, 'optimizer': 'Adam', 'lr': 8.144104963571114e-05, 'weight_decay': 0.013888429371149593}. Best is trial 1 with value: 0.10290218790218791.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.10290218790218791
  Params: 
    dropout_l0: 0.2
    lr: 0.00051869443856522
    n_layers: 1
    n_units_l0: 256
    optimizer: Nadam
    weight_decay: 0.013465528503967442



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-03 21:25:27,161][0m A new study created in RDB with name: A549_active_P_vs_inactive_P_FFNN_1_2_3[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.10388584533061541


>>> ITERATION N. 3



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-03 21:29:19,857][0m Trial 0 finished with value: 0.1051507630079059 and parameters: {'n_layers': 4, 'n_units_l0': 256, 'dropout_l0': 0.3, 'n_units_l1': 128, 'dropout_l1': 0.4, 'n_units_l2': 16, 'dropout_l2': 0.5, 'n_units_l3': 4, 'dropout_l3': 0.0, 'optimizer': 'Adam', 'lr': 0.0006051157262554248, 'weight_decay': 0.0009248311634644928}. Best is trial 0 with value: 0.1051507630079059.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-03 21:31:09,209][0m Trial 1 finished with value: 0.10515903658760806 and parameters: {'n_layers': 2, 'n_units_l0': 64, 'dropout_l0': 0.3, 'n_units_l1': 32, 'dropout_l1': 0.3, 'optimizer': 'RMSprop', 'lr': 0.0001720875485210396, 'weight_decay': 0.0008303011324629708}. Best is trial 1 with value: 0.10515903658760806.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-03 21:33:16,500][0m Trial 2 finished with value: 0.10515811730097449 and parameters: {'n_layers': 3, 'n_units_l0': 128, 'dropout_l0': 0.3, 'n_units_l1': 16, 'dropout_l1': 0.3, 'n_units_l2': 64, 'dropout_l2': 0.5, 'optimizer': 'Nadam', 'lr': 0.025002364130802812, 'weight_decay': 0.0087153058431164}. Best is trial 1 with value: 0.10515903658760806.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.10515903658760806
  Params: 
    dropout_l0: 0.3
    dropout_l1: 0.3
    lr: 0.0001720875485210396
    n_layers: 2
    n_units_l0: 64
    n_units_l1: 32
    optimizer: RMSprop
    weight_decay: 0.0008303011324629708



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.10347917069087635



3-FOLD CROSS-VALIDATION AUPRC TEST SCORE: 0.10317


In [38]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [39]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---
### 2. CNN

In [None]:
model=CNN

In [None]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=True,
                augmentation=False,
                model = model,
                device = device,
                task=task,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST')

In [None]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [None]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---
### 3. CNN-LSTM

In [None]:
model=CNN_LSTM

In [None]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=True,
                augmentation=False,
                model = model,
                device = device,
                task=task,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST')

In [None]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [None]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---

## 3) ACTIVE ENHANCERS vs ACTIVE PROMOTERS

In [40]:
task = TASKS[2]
task

'active_E_vs_active_P'

In [41]:
pipe_data_load = Build_DataLoader_Pipeline(path_name=f'{task}.pickle')

Data Preprocessing Done!


---
### 1. FFNN

In [42]:
model=FFNN

In [43]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=False,
                augmentation=False,
                model = model,
                device = device,
                task=task,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST')

  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-03 21:38:47,993][0m A new study created in RDB with name: A549_active_E_vs_active_P_FFNN_1[0m


>>> ITERATION N. 1



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-03 21:39:05,357][0m Trial 0 finished with value: 0.36552976185680836 and parameters: {'n_layers': 1, 'n_units_l0': 128, 'dropout_l0': 0.0, 'optimizer': 'Adam', 'lr': 0.0006996739566574173, 'weight_decay': 0.046119154033212574}. Best is trial 0 with value: 0.36552976185680836.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-03 21:39:21,476][0m Trial 1 finished with value: 0.5417157519795053 and parameters: {'n_layers': 2, 'n_units_l0': 256, 'dropout_l0': 0.0, 'n_units_l1': 128, 'dropout_l1': 0.4, 'optimizer': 'RMSprop', 'lr': 4.6760255101041886e-05, 'weight_decay': 0.0004036125549452425}. Best is trial 1 with value: 0.5417157519795053.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-03 21:39:36,919][0m Trial 2 finished with value: 0.26309027777777777 and parameters: {'n_layers': 4, 'n_units_l0': 32, 'dropout_l0': 0.2, 'n_units_l1': 64, 'dropout_l1': 0.3, 'n_units_l2': 4, 'dropout_l2': 0.5, 'n_units_l3': 16, 'dropout_l3': 0.4, 'optimizer': 'Adam', 'lr': 0.0001690153057637175, 'weight_decay': 0.00013302066681916707}. Best is trial 1 with value: 0.5417157519795053.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.5417157519795053
  Params: 
    dropout_l0: 0.0
    dropout_l1: 0.4
    lr: 4.6760255101041886e-05
    n_layers: 2
    n_units_l0: 256
    n_units_l1: 128
    optimizer: RMSprop
    weight_decay: 0.0004036125549452425



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-03 21:39:57,848][0m A new study created in RDB with name: A549_active_E_vs_active_P_FFNN_1_2[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.5988917300774468


>>> ITERATION N. 2



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-03 21:40:11,280][0m Trial 0 finished with value: 0.7606372355067003 and parameters: {'n_layers': 3, 'n_units_l0': 64, 'dropout_l0': 0.3, 'n_units_l1': 128, 'dropout_l1': 0.4, 'n_units_l2': 64, 'dropout_l2': 0.0, 'optimizer': 'RMSprop', 'lr': 0.006163863500800374, 'weight_decay': 0.00027488527989385846}. Best is trial 0 with value: 0.7606372355067003.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-03 21:40:33,958][0m Trial 1 finished with value: 0.2586458333333333 and parameters: {'n_layers': 4, 'n_units_l0': 64, 'dropout_l0': 0.0, 'n_units_l1': 128, 'dropout_l1': 0.2, 'n_units_l2': 64, 'dropout_l2': 0.5, 'n_units_l3': 16, 'dropout_l3': 0.5, 'optimizer': 'Adam', 'lr': 1.1274069382322681e-05, 'weight_decay': 0.006589900253521952}. Best is trial 0 with value: 0.7606372355067003.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-03 21:40:54,016][0m Trial 2 finished with value: 0.760018620198637 and parameters: {'n_layers': 3, 'n_units_l0': 64, 'dropout_l0': 0.0, 'n_units_l1': 128, 'dropout_l1': 0.3, 'n_units_l2': 4, 'dropout_l2': 0.4, 'optimizer': 'Adam', 'lr': 0.011746005896074135, 'weight_decay': 0.0006013136667603924}. Best is trial 0 with value: 0.7606372355067003.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.7606372355067003
  Params: 
    dropout_l0: 0.3
    dropout_l1: 0.4
    dropout_l2: 0.0
    lr: 0.006163863500800374
    n_layers: 3
    n_units_l0: 64
    n_units_l1: 128
    n_units_l2: 64
    optimizer: RMSprop
    weight_decay: 0.00027488527989385846



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-03 21:41:17,661][0m A new study created in RDB with name: A549_active_E_vs_active_P_FFNN_1_2_3[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.7460297126756875


>>> ITERATION N. 3



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-03 21:41:39,380][0m Trial 0 finished with value: 0.7100919907634775 and parameters: {'n_layers': 1, 'n_units_l0': 32, 'dropout_l0': 0.3, 'optimizer': 'Nadam', 'lr': 0.05053567105552497, 'weight_decay': 0.0019794036220136565}. Best is trial 0 with value: 0.7100919907634775.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-03 21:41:52,527][0m Trial 1 finished with value: 0.7284028997325649 and parameters: {'n_layers': 2, 'n_units_l0': 64, 'dropout_l0': 0.3, 'n_units_l1': 32, 'dropout_l1': 0.2, 'optimizer': 'RMSprop', 'lr': 0.001436590193244946, 'weight_decay': 0.0010102974506077042}. Best is trial 1 with value: 0.7284028997325649.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-03 21:42:15,040][0m Trial 2 finished with value: 0.26145833333333335 and parameters: {'n_layers': 4, 'n_units_l0': 128, 'dropout_l0': 0.3, 'n_units_l1': 32, 'dropout_l1': 0.2, 'n_units_l2': 32, 'dropout_l2': 0.5, 'n_units_l3': 4, 'dropout_l3': 0.4, 'optimizer': 'Nadam', 'lr': 0.0001961276451235053, 'weight_decay': 0.05242255792747359}. Best is trial 1 with value: 0.7284028997325649.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.7284028997325649
  Params: 
    dropout_l0: 0.3
    dropout_l1: 0.2
    lr: 0.001436590193244946
    n_layers: 2
    n_units_l0: 64
    n_units_l1: 32
    optimizer: RMSprop
    weight_decay: 0.0010102974506077042



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.7229671048568312



3-FOLD CROSS-VALIDATION AUPRC TEST SCORE: 0.6893


In [44]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [45]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---
### 2. CNN

In [None]:
model=CNN

In [None]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=True,
                augmentation=False,
                model = model,
                device = device,
                task=task,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST')

In [None]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [None]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---
### 3. CNN-LSTM

In [None]:
model=CNN_LSTM

In [None]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=True,
                augmentation=False,
                model = model,
                device = device,
                task=task,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST')

In [None]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [None]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---

## 4) INACTIVE ENHANCERS vs INACTIVE PROMOTERS

In [46]:
task = TASKS[3]
task

'inactive_E_vs_inactive_P'

In [47]:
pipe_data_load = Build_DataLoader_Pipeline(path_name=f'{task}.pickle')

Data Preprocessing Done!


---
### 1. FFNN

In [48]:
model=FFNN

In [49]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=False,
                augmentation=False,
                model = model,
                device = device,
                task=task,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST')

  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-03 21:42:35,400][0m A new study created in RDB with name: A549_inactive_E_vs_inactive_P_FFNN_1[0m


>>> ITERATION N. 1



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-03 21:45:02,171][0m Trial 0 finished with value: 0.5685744843966625 and parameters: {'n_layers': 3, 'n_units_l0': 128, 'dropout_l0': 0.4, 'n_units_l1': 128, 'dropout_l1': 0.4, 'n_units_l2': 16, 'dropout_l2': 0.0, 'optimizer': 'Nadam', 'lr': 0.0001864302573216639, 'weight_decay': 0.0009369566300514556}. Best is trial 0 with value: 0.5685744843966625.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-03 21:47:47,453][0m Trial 1 finished with value: 0.528999004845929 and parameters: {'n_layers': 2, 'n_units_l0': 128, 'dropout_l0': 0.4, 'n_units_l1': 16, 'dropout_l1': 0.4, 'optimizer': 'RMSprop', 'lr': 0.02846436670253958, 'weight_decay': 0.007421430571424544}. Best is trial 0 with value: 0.5685744843966625.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-03 21:50:10,319][0m Trial 2 finished with value: 0.5531803762777493 and parameters: {'n_layers': 3, 'n_units_l0': 64, 'dropout_l0': 0.3, 'n_units_l1': 64, 'dropout_l1': 0.2, 'n_units_l2': 32, 'dropout_l2': 0.0, 'optimizer': 'Adam', 'lr': 0.00947211744647189, 'weight_decay': 0.0042772582213979}. Best is trial 0 with value: 0.5685744843966625.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.5685744843966625
  Params: 
    dropout_l0: 0.4
    dropout_l1: 0.4
    dropout_l2: 0.0
    lr: 0.0001864302573216639
    n_layers: 3
    n_units_l0: 128
    n_units_l1: 128
    n_units_l2: 16
    optimizer: Nadam
    weight_decay: 0.0009369566300514556



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-03 21:53:53,329][0m A new study created in RDB with name: A549_inactive_E_vs_inactive_P_FFNN_1_2[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.5693598185852244


>>> ITERATION N. 2



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-03 21:57:04,912][0m Trial 0 finished with value: 0.3968927782134455 and parameters: {'n_layers': 4, 'n_units_l0': 32, 'dropout_l0': 0.0, 'n_units_l1': 128, 'dropout_l1': 0.0, 'n_units_l2': 64, 'dropout_l2': 0.5, 'n_units_l3': 4, 'dropout_l3': 0.5, 'optimizer': 'Adam', 'lr': 0.036693935985295904, 'weight_decay': 0.004520415981574784}. Best is trial 0 with value: 0.3968927782134455.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-03 22:01:01,646][0m Trial 1 finished with value: 0.39683200256647905 and parameters: {'n_layers': 4, 'n_units_l0': 32, 'dropout_l0': 0.3, 'n_units_l1': 64, 'dropout_l1': 0.3, 'n_units_l2': 4, 'dropout_l2': 0.0, 'n_units_l3': 16, 'dropout_l3': 0.5, 'optimizer': 'Adam', 'lr': 0.024797943644341264, 'weight_decay': 0.015201913105112283}. Best is trial 0 with value: 0.3968927782134455.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-03 22:04:32,590][0m Trial 2 finished with value: 0.5465227297540162 and parameters: {'n_layers': 1, 'n_units_l0': 32, 'dropout_l0': 0.3, 'optimizer': 'Nadam', 'lr': 0.009307438855487028, 'weight_decay': 0.0030004340093114116}. Best is trial 2 with value: 0.5465227297540162.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.5465227297540162
  Params: 
    dropout_l0: 0.3
    lr: 0.009307438855487028
    n_layers: 1
    n_units_l0: 32
    optimizer: Nadam
    weight_decay: 0.0030004340093114116



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-03 22:08:06,646][0m A new study created in RDB with name: A549_inactive_E_vs_inactive_P_FFNN_1_2_3[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.5606215857768828


>>> ITERATION N. 3



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-03 22:10:28,425][0m Trial 0 finished with value: 0.3979872430900069 and parameters: {'n_layers': 4, 'n_units_l0': 32, 'dropout_l0': 0.0, 'n_units_l1': 64, 'dropout_l1': 0.3, 'n_units_l2': 32, 'dropout_l2': 0.5, 'n_units_l3': 4, 'dropout_l3': 0.4, 'optimizer': 'RMSprop', 'lr': 0.0030784622388040266, 'weight_decay': 0.0016021830802125144}. Best is trial 0 with value: 0.3979872430900069.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-03 22:13:44,917][0m Trial 1 finished with value: 0.5716916430656522 and parameters: {'n_layers': 3, 'n_units_l0': 128, 'dropout_l0': 0.0, 'n_units_l1': 128, 'dropout_l1': 0.2, 'n_units_l2': 32, 'dropout_l2': 0.4, 'optimizer': 'Nadam', 'lr': 0.012186988149705986, 'weight_decay': 0.0001259617499905844}. Best is trial 1 with value: 0.5716916430656522.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-03 22:16:53,365][0m Trial 2 finished with value: 0.39798192771084334 and parameters: {'n_layers': 2, 'n_units_l0': 64, 'dropout_l0': 0.2, 'n_units_l1': 32, 'dropout_l1': 0.4, 'optimizer': 'RMSprop', 'lr': 0.0788630833159653, 'weight_decay': 0.0012726685208475366}. Best is trial 1 with value: 0.5716916430656522.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.5716916430656522
  Params: 
    dropout_l0: 0.0
    dropout_l1: 0.2
    dropout_l2: 0.4
    lr: 0.012186988149705986
    n_layers: 3
    n_units_l0: 128
    n_units_l1: 128
    n_units_l2: 32
    optimizer: Nadam
    weight_decay: 0.0001259617499905844



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.5794611387894281



3-FOLD CROSS-VALIDATION AUPRC TEST SCORE: 0.56981


In [50]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [51]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---
### 3. CNN-LSTM

In [None]:
model=CNN_LSTM

In [None]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=True,
                augmentation=False,
                model = model,
                device = device,
                task=task,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST')

In [None]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [None]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---
### 2. CNN

In [None]:
model=CNN

In [None]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=True,
                augmentation=False,
                model = model,
                device = device,
                task=task,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST')

In [None]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [None]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---

## 5) ACTIVE ENHANCERS + ACTIVE PROMOTERS vs INACTIVE REST

In [7]:
task = TASKS[4]
task

'active_EP_vs_inactive_rest'

In [8]:
pipe_data_load = Build_DataLoader_Pipeline(path_name=f'{task}.pickle')

Data Preprocessing Done!


---
### 1. FFNN

In [9]:
model=FFNN

In [10]:
# IMBALANCED
type_augm_genfeatures = 'smote'

kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=False,
                augmentation=True,
                type_augm_genfeatures=type_augm_genfeatures,
                model = model,
                device = device,
                task=task,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}_{type_augm_genfeatures}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_{type_augm_genfeatures}_HP',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_{type_augm_genfeatures}_TEST')

>>> ITERATION N. 1



  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-04 19:33:47,911][0m Using an existing study with name 'A549_active_EP_vs_inactive_rest_FFNN_smote_1' instead of creating a new one.[0m


Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.09660056710893344
  Params: 
    dropout_l0: 0.4
    lr: 0.00031455750547806395
    n_layers: 1
    n_units_l0: 32
    optimizer: Adam
    weight_decay: 0.00034027022870006317

AUPRC test score: 0.08902096738453834


>>> ITERATION N. 2



  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-04 19:33:50,738][0m Using an existing study with name 'A549_active_EP_vs_inactive_rest_FFNN_smote_1_2' instead of creating a new one.[0m


Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.08639830508474577
  Params: 
    dropout_l0: 0.3
    lr: 0.0216429316710965
    n_layers: 1
    n_units_l0: 64
    optimizer: RMSprop
    weight_decay: 0.002263964060991934

AUPRC test score: 0.08438783846872092


>>> ITERATION N. 3



  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-04 19:33:53,010][0m Using an existing study with name 'A549_active_EP_vs_inactive_rest_FFNN_smote_1_2_3' instead of creating a new one.[0m


Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.08605311355311358
  Params: 
    dropout_l0: 0.2
    dropout_l1: 0.3
    lr: 0.0026902025539661833
    n_layers: 2
    n_units_l0: 256
    n_units_l1: 32
    optimizer: RMSprop
    weight_decay: 0.0006167961367174577

AUPRC test score: 0.08370228410513138



3-FOLD CROSS-VALIDATION AUPRC TEST SCORE: 0.0857


In [56]:
results_dict[cell_line][task][f'{model.__name__}_{type_augm_genfeatures}'] = kf_CV.scores_dict

In [57]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

In [58]:
# IMBALANCED
type_augm_genfeatures = 'double'

kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=False,
                augmentation=True,
                type_augm_genfeatures=type_augm_genfeatures,
                model = model,
                device = device,
                task=task,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}_{type_augm_genfeatures}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_{type_augm_genfeatures}_HP',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_{type_augm_genfeatures}_TEST')

  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-03 23:22:02,165][0m A new study created in RDB with name: A549_active_EP_vs_inactive_rest_FFNN_double_1[0m


>>> ITERATION N. 1



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-03 23:24:50,921][0m Trial 0 finished with value: 0.08397140994598624 and parameters: {'n_layers': 3, 'n_units_l0': 256, 'dropout_l0': 0.4, 'n_units_l1': 128, 'dropout_l1': 0.0, 'n_units_l2': 4, 'dropout_l2': 0.4, 'optimizer': 'Nadam', 'lr': 2.4550263011194814e-05, 'weight_decay': 0.014915414933087647}. Best is trial 0 with value: 0.08397140994598624.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-03 23:28:51,055][0m Trial 1 finished with value: 0.08429968336748002 and parameters: {'n_layers': 4, 'n_units_l0': 128, 'dropout_l0': 0.4, 'n_units_l1': 16, 'dropout_l1': 0.0, 'n_units_l2': 16, 'dropout_l2': 0.0, 'n_units_l3': 32, 'dropout_l3': 0.0, 'optimizer': 'RMSprop', 'lr': 0.030352826584127253, 'weight_decay': 0.020427152549148585}. Best is trial 1 with value: 0.08429968336748002.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

  recall = tps / tps[-1]


EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-03 23:31:34,279][0m Trial 2 finished with value: 0.08397140994598624 and parameters: {'n_layers': 3, 'n_units_l0': 256, 'dropout_l0': 0.2, 'n_units_l1': 32, 'dropout_l1': 0.4, 'n_units_l2': 16, 'dropout_l2': 0.0, 'optimizer': 'RMSprop', 'lr': 0.03549441387183631, 'weight_decay': 0.0031842037422979523}. Best is trial 1 with value: 0.08429968336748002.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.08429968336748002
  Params: 
    dropout_l0: 0.4
    dropout_l1: 0.0
    dropout_l2: 0.0
    dropout_l3: 0.0
    lr: 0.030352826584127253
    n_layers: 4
    n_units_l0: 128
    n_units_l1: 16
    n_units_l2: 16
    n_units_l3: 32
    optimizer: RMSprop
    weight_decay: 0.020427152549148585



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-03 23:38:20,718][0m A new study created in RDB with name: A549_active_EP_vs_inactive_rest_FFNN_double_1_2[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.08757440476190477


>>> ITERATION N. 2



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-03 23:43:37,175][0m Trial 0 finished with value: 0.08600437697895336 and parameters: {'n_layers': 1, 'n_units_l0': 128, 'dropout_l0': 0.4, 'optimizer': 'Adam', 'lr': 0.005450747063461496, 'weight_decay': 0.00016020551377362763}. Best is trial 0 with value: 0.08600437697895336.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

  recall = tps / tps[-1]


EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-03 23:46:48,456][0m Trial 1 finished with value: 0.08607003166325207 and parameters: {'n_layers': 4, 'n_units_l0': 256, 'dropout_l0': 0.0, 'n_units_l1': 64, 'dropout_l1': 0.2, 'n_units_l2': 64, 'dropout_l2': 0.0, 'n_units_l3': 16, 'dropout_l3': 0.5, 'optimizer': 'RMSprop', 'lr': 0.04922609711281729, 'weight_decay': 0.03566623327107896}. Best is trial 1 with value: 0.08607003166325207.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-03 23:49:33,868][0m Trial 2 finished with value: 0.08613568634755077 and parameters: {'n_layers': 4, 'n_units_l0': 256, 'dropout_l0': 0.2, 'n_units_l1': 64, 'dropout_l1': 0.4, 'n_units_l2': 16, 'dropout_l2': 0.0, 'n_units_l3': 32, 'dropout_l3': 0.5, 'optimizer': 'Adam', 'lr': 3.9346798606441724e-05, 'weight_decay': 0.06248241357560623}. Best is trial 2 with value: 0.08613568634755077.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.08613568634755077
  Params: 
    dropout_l0: 0.2
    dropout_l1: 0.4
    dropout_l2: 0.0
    dropout_l3: 0.5
    lr: 3.9346798606441724e-05
    n_layers: 4
    n_units_l0: 256
    n_units_l1: 64
    n_units_l2: 16
    n_units_l3: 32
    optimizer: Adam
    weight_decay: 0.06248241357560623



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-03 23:57:54,814][0m A new study created in RDB with name: A549_active_EP_vs_inactive_rest_FFNN_double_1_2_3[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.08438997821350762


>>> ITERATION N. 3



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-04 00:05:27,930][0m Trial 0 finished with value: 0.085989010989011 and parameters: {'n_layers': 3, 'n_units_l0': 64, 'dropout_l0': 0.4, 'n_units_l1': 128, 'dropout_l1': 0.2, 'n_units_l2': 16, 'dropout_l2': 0.0, 'optimizer': 'RMSprop', 'lr': 2.052742018444849e-05, 'weight_decay': 0.002992568520070303}. Best is trial 0 with value: 0.085989010989011.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-04 00:09:27,279][0m Trial 1 finished with value: 0.0858608058608059 and parameters: {'n_layers': 3, 'n_units_l0': 64, 'dropout_l0': 0.3, 'n_units_l1': 32, 'dropout_l1': 0.3, 'n_units_l2': 16, 'dropout_l2': 0.4, 'optimizer': 'Nadam', 'lr': 0.030335932107974684, 'weight_decay': 0.003986464153007519}. Best is trial 0 with value: 0.085989010989011.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5


  recall = tps / tps[-1]


EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-04 00:15:10,737][0m Trial 2 finished with value: 0.08566849816849818 and parameters: {'n_layers': 3, 'n_units_l0': 64, 'dropout_l0': 0.3, 'n_units_l1': 16, 'dropout_l1': 0.2, 'n_units_l2': 16, 'dropout_l2': 0.0, 'optimizer': 'Adam', 'lr': 1.1125573105606508e-05, 'weight_decay': 0.00013649557499575374}. Best is trial 0 with value: 0.085989010989011.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.085989010989011
  Params: 
    dropout_l0: 0.4
    dropout_l1: 0.2
    dropout_l2: 0.0
    lr: 2.052742018444849e-05
    n_layers: 3
    n_units_l0: 64
    n_units_l1: 128
    n_units_l2: 16
    optimizer: RMSprop
    weight_decay: 0.002992568520070303



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.08369407071339179



3-FOLD CROSS-VALIDATION AUPRC TEST SCORE: 0.08522


In [59]:
results_dict[cell_line][task][f'{model.__name__}_{type_augm_genfeatures}'] = kf_CV.scores_dict

In [60]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

In [None]:
SA_optuna_tuning.db

---
### 2. CNN

In [None]:
model=CNN

In [None]:
# IMBALANCED
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=True,
                augmentation=True,
                model = model,
                device = device,
                task=task,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST')

In [None]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [60]:
results_dict['A549']['active_P_vs_inactive_P']['FFNN']['average_CV_AUPRC']

0.10317

In [None]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---
### 3. CNN-LSTM

In [None]:
model=CNN_LSTM

In [None]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=True,
                augmentation=True,
                model = model,
                device = device,
                task=task,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST')

In [None]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [None]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)