## BIOINFORMATICS THESIS: MULTIMODAL NEURAL NETWORK

# CELL LINE: K562

In [1]:
import pandas as pd 
import numpy as np
import os

import torch
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
from collections import defaultdict, OrderedDict
import pickle

import sqlite3
from sqlalchemy import create_engine

device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [2]:
# create a database to store optuna studies with sqlite backend
#engine = create_engine('sqlite:///BIOINF_optuna_tuning.db')

In [3]:
from BIOINF_tesi.data_pipe import CELL_LINES, TASKS

In [4]:
cell_line = CELL_LINES[5]
cell_line

'K562'

---

In [5]:
from BIOINF_tesi.data_pipe import Load_Create_Task
from BIOINF_tesi.data_pipe import Build_DataLoader_Pipeline

In [6]:
from BIOINF_tesi.models import FFNN, CNN, CNN_LSTM
from BIOINF_tesi.models.utils import fit, Param_Search, Kfold_CV

In [7]:
with open ('results_dict.pickle', 'rb') as fin:
    results_dict = pickle.load(fin)
    results_dict = defaultdict(lambda: defaultdict(dict), results_dict)

## 1) ACTIVE ENHANCERS vs INACTIVE ENHANCERS

In [8]:
task = TASKS[0]
task

'active_E_vs_inactive_E'

In [9]:
pipe_data_load = Build_DataLoader_Pipeline(path_name=f'{task}.pickle')

Data Preprocessing Done!


### 1. FFNN

In [10]:
model=FFNN

In [11]:
# IMBALANCED
type_augm_genfeatures = 'smote'

kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=False,
                rebalancing=True,
                type_augm_genfeatures=type_augm_genfeatures,
                model = model,
                device = device,
                task=task,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}_{type_augm_genfeatures}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_{type_augm_genfeatures}_HP',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_{type_augm_genfeatures}_TEST')

>>> ITERATION N. 1



  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-11 22:31:20,315][0m A new study created in RDB with name: K562_active_E_vs_inactive_E_FFNN_smote_1[0m


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-11 22:37:24,109][0m Trial 0 finished with value: 0.0865316901408451 and parameters: {'n_layers': 3, 'n_units_l0': 128, 'dropout_l0': 0.3, 'n_units_l1': 16, 'dropout_l1': 0.4, 'n_units_l2': 32, 'dropout_l2': 0.5, 'optimizer': 'RMSprop', 'lr': 0.08989603541802824, 'weight_decay': 0.04817876843912466}. Best is trial 0 with value: 0.0865316901408451.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

	add(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add(Tensor other, *, Number alpha) (Triggered internally at  ../torch/csrc/utils/python_arg_parser.cpp:1025.)
  grad = grad.add(group['weight_decay'], p.data)


EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-11 22:40:59,492][0m Trial 1 finished with value: 0.13861021234374168 and parameters: {'n_layers': 1, 'n_units_l0': 64, 'dropout_l0': 0.2, 'optimizer': 'Nadam', 'lr': 4.496830135760936e-05, 'weight_decay': 0.07753309189387532}. Best is trial 1 with value: 0.13861021234374168.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-11 22:44:34,810][0m Trial 2 finished with value: 0.08668133802816902 and parameters: {'n_layers': 3, 'n_units_l0': 32, 'dropout_l0': 0.4, 'n_units_l1': 16, 'dropout_l1': 0.4, 'n_units_l2': 64, 'dropout_l2': 0.5, 'optimizer': 'Adam', 'lr': 1.6755223239579784e-05, 'weight_decay': 0.00950999289632912}. Best is trial 1 with value: 0.13861021234374168.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.13861021234374168
  Params: 
    dropout_l0: 0.2
    lr: 4.496830135760936e-05
    n_layers: 1
    n_units_l0: 64
    optimizer: Nadam
    weight_decay: 0.07753309189387532





Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.13205896146857302


>>> ITERATION N. 2



  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-11 22:50:56,911][0m A new study created in RDB with name: K562_active_E_vs_inactive_E_FFNN_smote_1_2[0m


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-11 23:00:12,842][0m Trial 0 finished with value: 0.08748239436619722 and parameters: {'n_layers': 3, 'n_units_l0': 256, 'dropout_l0': 0.4, 'n_units_l1': 64, 'dropout_l1': 0.3, 'n_units_l2': 64, 'dropout_l2': 0.5, 'optimizer': 'RMSprop', 'lr': 0.017763418858336533, 'weight_decay': 0.0003227191369429108}. Best is trial 0 with value: 0.08748239436619722.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-11 23:07:27,264][0m Trial 1 finished with value: 0.08673415492957748 and parameters: {'n_layers': 3, 'n_units_l0': 128, 'dropout_l0': 0.0, 'n_units_l1': 32, 'dropout_l1': 0.0, 'n_units_l2': 32, 'dropout_l2': 0.4, 'optimizer': 'RMSprop', 'lr': 0.00029709186710367866, 'weight_decay': 0.023436626730391104}. Best is trial 0 with value: 0.08748239436619722.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-11 23:11:39,890][0m Trial 2 finished with value: 0.08703345070422533 and parameters: {'n_layers': 4, 'n_units_l0': 32, 'dropout_l0': 0.4, 'n_units_l1': 32, 'dropout_l1': 0.3, 'n_units_l2': 64, 'dropout_l2': 0.0, 'n_units_l3': 32, 'dropout_l3': 0.0, 'optimizer': 'Nadam', 'lr': 2.3824970982371686e-05, 'weight_decay': 0.013404445219448205}. Best is trial 0 with value: 0.08748239436619722.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.08748239436619722
  Params: 
    dropout_l0: 0.4
    dropout_l1: 0.3
    dropout_l2: 0.5
    lr: 0.017763418858336533
    n_layers: 3
    n_units_l0: 256
    n_units_l1: 64
    n_units_l2: 64
    optimizer: RMSprop
    weight_decay: 0.0003227191369429108



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.17485585000170564


>>> ITERATION N. 3



  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-11 23:19:10,596][0m A new study created in RDB with name: K562_active_E_vs_inactive_E_FFNN_smote_1_2_3[0m


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-11 23:22:45,817][0m Trial 0 finished with value: 0.15460474106054087 and parameters: {'n_layers': 1, 'n_units_l0': 32, 'dropout_l0': 0.4, 'optimizer': 'RMSprop', 'lr': 8.084654116773548e-05, 'weight_decay': 0.027495774832892773}. Best is trial 0 with value: 0.15460474106054087.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-11 23:29:20,894][0m Trial 1 finished with value: 0.08808098591549302 and parameters: {'n_layers': 3, 'n_units_l0': 32, 'dropout_l0': 0.3, 'n_units_l1': 32, 'dropout_l1': 0.4, 'n_units_l2': 64, 'dropout_l2': 0.5, 'optimizer': 'Adam', 'lr': 0.0001445022161429528, 'weight_decay': 0.026142895571803902}. Best is trial 0 with value: 0.15460474106054087.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-11 23:32:57,701][0m Trial 2 finished with value: 0.15907638075008804 and parameters: {'n_layers': 3, 'n_units_l0': 64, 'dropout_l0': 0.2, 'n_units_l1': 64, 'dropout_l1': 0.4, 'n_units_l2': 32, 'dropout_l2': 0.0, 'optimizer': 'RMSprop', 'lr': 3.891108536290205e-05, 'weight_decay': 0.0009862765076491997}. Best is trial 2 with value: 0.15907638075008804.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.15907638075008804
  Params: 
    dropout_l0: 0.2
    dropout_l1: 0.4
    dropout_l2: 0.0
    lr: 3.891108536290205e-05
    n_layers: 3
    n_units_l0: 64
    n_units_l1: 64
    n_units_l2: 32
    optimizer: RMSprop
    weight_decay: 0.0009862765076491997



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.16145775108191054



3-FOLD CROSS-VALIDATION AUPRC TEST SCORE: 0.15612


In [12]:
results_dict[cell_line][task][f'{model.__name__}_{type_augm_genfeatures}'] = kf_CV.scores_dict

In [13]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

In [14]:
# IMBALANCED
type_augm_genfeatures = 'double'

kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=False,
                rebalancing=True,
                type_augm_genfeatures=type_augm_genfeatures,
                model = model,
                device = device,
                task=task,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}_{type_augm_genfeatures}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_{type_augm_genfeatures}_HP',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_{type_augm_genfeatures}_TEST')

>>> ITERATION N. 1



  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-11 23:38:27,541][0m A new study created in RDB with name: K562_active_E_vs_inactive_E_FFNN_double_1[0m


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-11 23:42:06,167][0m Trial 0 finished with value: 0.16197792747002746 and parameters: {'n_layers': 1, 'n_units_l0': 128, 'dropout_l0': 0.2, 'optimizer': 'RMSprop', 'lr': 0.00012028090535809403, 'weight_decay': 0.01620646565837296}. Best is trial 0 with value: 0.16197792747002746.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-11 23:47:40,466][0m Trial 1 finished with value: 0.08638204225352115 and parameters: {'n_layers': 1, 'n_units_l0': 256, 'dropout_l0': 0.3, 'optimizer': 'Nadam', 'lr': 0.07390293939387886, 'weight_decay': 0.06855349192833118}. Best is trial 0 with value: 0.16197792747002746.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-11 23:51:54,214][0m Trial 2 finished with value: 0.08638204225352117 and parameters: {'n_layers': 4, 'n_units_l0': 64, 'dropout_l0': 0.0, 'n_units_l1': 16, 'dropout_l1': 0.3, 'n_units_l2': 16, 'dropout_l2': 0.0, 'n_units_l3': 4, 'dropout_l3': 0.4, 'optimizer': 'RMSprop', 'lr': 0.0002358805710370325, 'weight_decay': 0.03679496827199816}. Best is trial 0 with value: 0.16197792747002746.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.16197792747002746
  Params: 
    dropout_l0: 0.2
    lr: 0.00012028090535809403
    n_layers: 1
    n_units_l0: 128
    optimizer: RMSprop
    weight_decay: 0.01620646565837296



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-11 23:57:25,172][0m A new study created in RDB with name: K562_active_E_vs_inactive_E_FFNN_double_1_2[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.16311524764265714


>>> ITERATION N. 2



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 00:01:04,682][0m Trial 0 finished with value: 0.1663983012542305 and parameters: {'n_layers': 3, 'n_units_l0': 128, 'dropout_l0': 0.2, 'n_units_l1': 128, 'dropout_l1': 0.2, 'n_units_l2': 4, 'dropout_l2': 0.5, 'optimizer': 'Adam', 'lr': 0.00043059104946678074, 'weight_decay': 0.00011321221302565242}. Best is trial 0 with value: 0.1663983012542305.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 00:07:19,600][0m Trial 1 finished with value: 0.1680795756805088 and parameters: {'n_layers': 4, 'n_units_l0': 256, 'dropout_l0': 0.2, 'n_units_l1': 128, 'dropout_l1': 0.0, 'n_units_l2': 16, 'dropout_l2': 0.0, 'n_units_l3': 16, 'dropout_l3': 0.4, 'optimizer': 'Adam', 'lr': 0.005828798772148541, 'weight_decay': 0.00043064515666016385}. Best is trial 1 with value: 0.1680795756805088.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 00:12:06,909][0m Trial 2 finished with value: 0.08688380281690142 and parameters: {'n_layers': 3, 'n_units_l0': 32, 'dropout_l0': 0.2, 'n_units_l1': 32, 'dropout_l1': 0.0, 'n_units_l2': 32, 'dropout_l2': 0.4, 'optimizer': 'Nadam', 'lr': 0.007702077721342053, 'weight_decay': 0.05204213575340732}. Best is trial 1 with value: 0.1680795756805088.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.1680795756805088
  Params: 
    dropout_l0: 0.2
    dropout_l1: 0.0
    dropout_l2: 0.0
    dropout_l3: 0.4
    lr: 0.005828798772148541
    n_layers: 4
    n_units_l0: 256
    n_units_l1: 128
    n_units_l2: 16
    n_units_l3: 16
    optimizer: Adam
    weight_decay: 0.00043064515666016385



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.17203830655801022


>>> ITERATION N. 3



  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-12 00:19:38,947][0m A new study created in RDB with name: K562_active_E_vs_inactive_E_FFNN_double_1_2_3[0m


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 00:23:13,666][0m Trial 0 finished with value: 0.08808098591549297 and parameters: {'n_layers': 3, 'n_units_l0': 32, 'dropout_l0': 0.4, 'n_units_l1': 64, 'dropout_l1': 0.0, 'n_units_l2': 4, 'dropout_l2': 0.5, 'optimizer': 'RMSprop', 'lr': 0.03185534412695715, 'weight_decay': 0.050702870038405665}. Best is trial 0 with value: 0.08808098591549297.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 00:30:27,965][0m Trial 1 finished with value: 0.1828160480803533 and parameters: {'n_layers': 3, 'n_units_l0': 64, 'dropout_l0': 0.3, 'n_units_l1': 128, 'dropout_l1': 0.2, 'n_units_l2': 64, 'dropout_l2': 0.5, 'optimizer': 'RMSprop', 'lr': 0.01373981558615264, 'weight_decay': 0.0001511965897094776}. Best is trial 1 with value: 0.1828160480803533.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 00:34:03,569][0m Trial 2 finished with value: 0.1389375657777646 and parameters: {'n_layers': 2, 'n_units_l0': 64, 'dropout_l0': 0.3, 'n_units_l1': 16, 'dropout_l1': 0.4, 'optimizer': 'RMSprop', 'lr': 1.76863358673458e-05, 'weight_decay': 0.06750312396948567}. Best is trial 1 with value: 0.1828160480803533.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.1828160480803533
  Params: 
    dropout_l0: 0.3
    dropout_l1: 0.2
    dropout_l2: 0.5
    lr: 0.01373981558615264
    n_layers: 3
    n_units_l0: 64
    n_units_l1: 128
    n_units_l2: 64
    optimizer: RMSprop
    weight_decay: 0.0001511965897094776



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.10636966493441681



3-FOLD CROSS-VALIDATION AUPRC TEST SCORE: 0.14717


In [15]:
results_dict[cell_line][task][f'{model.__name__}_{type_augm_genfeatures}'] = kf_CV.scores_dict

In [16]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---
### 2. CNN

In [10]:
model=CNN

In [11]:
# IMBALANCED
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=True,
                rebalancing=True,
                model = model,
                device = device,
                task=task,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST')

  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-12 08:46:49,156][0m A new study created in RDB with name: K562_active_E_vs_inactive_E_CNN_1[0m


>>> ITERATION N. 1



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

  return torch.max_pool1d(input, kernel_size, stride, padding, dilation, ceil_mode)


EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 08:57:14,501][0m Trial 0 finished with value: 0.09461115092776 and parameters: {'n_layers': 1, 'out_channels_l0': 16, 'kernel_size_l0': 5, 'dropout_l0': 0.3, 'optimizer': 'RMSprop', 'lr': 6.467766709573852e-05, 'weight_decay': 0.0005031597842287323}. Best is trial 0 with value: 0.09461115092776.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

	add(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add(Tensor other, *, Number alpha) (Triggered internally at  ../torch/csrc/utils/python_arg_parser.cpp:1025.)
  grad = grad.add(group['weight_decay'], p.data)


EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 09:16:44,364][0m Trial 1 finished with value: 0.09152925291950005 and parameters: {'n_layers': 2, 'out_channels_l0': 64, 'kernel_size_l0': 15, 'dropout_l0': 0, 'out_channels_l1': 64, 'kernel_size_l1': 5, 'dropout_l1': 0.5, 'optimizer': 'Nadam', 'lr': 0.00024715805395912887, 'weight_decay': 0.08379154643253978}. Best is trial 0 with value: 0.09461115092776.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 09:22:49,621][0m Trial 2 finished with value: 0.09672807318831325 and parameters: {'n_layers': 2, 'out_channels_l0': 16, 'kernel_size_l0': 5, 'dropout_l0': 0.4, 'out_channels_l1': 32, 'kernel_size_l1': 5, 'dropout_l1': 0.5, 'optimizer': 'Nadam', 'lr': 5.860844990278421e-05, 'weight_decay': 0.0004471972911206199}. Best is trial 2 with value: 0.09672807318831325.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.09672807318831325
  Params: 
    dropout_l0: 0.4
    dropout_l1: 0.5
    kernel_size_l0: 5
    kernel_size_l1: 5
    lr: 5.860844990278421e-05
    n_layers: 2
    optimizer: Nadam
    out_channels_l0: 16
    out_channels_l1: 32
    weight_decay: 0.0004471972911206199





Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-12 09:35:19,715][0m A new study created in RDB with name: K562_active_E_vs_inactive_E_CNN_1_2[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.0938416621847121


>>> ITERATION N. 2



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 09:45:26,587][0m Trial 0 finished with value: 0.10409813880247991 and parameters: {'n_layers': 3, 'out_channels_l0': 32, 'kernel_size_l0': 5, 'dropout_l0': 0, 'out_channels_l1': 64, 'kernel_size_l1': 15, 'dropout_l1': 0.5, 'out_channels_l2': 64, 'kernel_size_l2': 11, 'dropout_l2': 0.5, 'optimizer': 'Adam', 'lr': 0.0008573663278416317, 'weight_decay': 0.0008457504910347888}. Best is trial 0 with value: 0.10409813880247991.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 09:50:47,064][0m Trial 1 finished with value: 0.09519923692054895 and parameters: {'n_layers': 1, 'out_channels_l0': 16, 'kernel_size_l0': 5, 'dropout_l0': 0.3, 'optimizer': 'Adam', 'lr': 0.0009121390789651904, 'weight_decay': 0.0023137766245504457}. Best is trial 0 with value: 0.10409813880247991.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 10:16:15,041][0m Trial 2 finished with value: 0.08850150228844167 and parameters: {'n_layers': 4, 'out_channels_l0': 64, 'kernel_size_l0': 15, 'dropout_l0': 0.2, 'out_channels_l1': 64, 'kernel_size_l1': 5, 'dropout_l1': 0.4, 'out_channels_l2': 128, 'kernel_size_l2': 5, 'dropout_l2': 0.4, 'out_channels_l3': 256, 'kernel_size_l3': 11, 'dropout_l3': 0.5, 'optimizer': 'Nadam', 'lr': 4.7412006140064835e-05, 'weight_decay': 0.022881007939455935}. Best is trial 0 with value: 0.10409813880247991.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.10409813880247991
  Params: 
    dropout_l0: 0
    dropout_l1: 0.5
    dropout_l2: 0.5
    kernel_size_l0: 5
    kernel_size_l1: 15
    kernel_size_l2: 11
    lr: 0.0008573663278416317
    n_layers: 3
    optimizer: Adam
    out_channels_l0: 32
    out_channels_l1: 64
    out_channels_l2: 64
    weight_decay: 0.0008457504910347888



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-12 10:31:20,608][0m A new study created in RDB with name: K562_active_E_vs_inactive_E_CNN_1_2_3[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.10700360791542411


>>> ITERATION N. 3



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 10:37:16,025][0m Trial 0 finished with value: 0.09885490499419419 and parameters: {'n_layers': 1, 'out_channels_l0': 16, 'kernel_size_l0': 5, 'dropout_l0': 0, 'optimizer': 'RMSprop', 'lr': 0.0004624916299522377, 'weight_decay': 0.03887450496714792}. Best is trial 0 with value: 0.09885490499419419.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 10:45:11,076][0m Trial 1 finished with value: 0.08793133802816903 and parameters: {'n_layers': 3, 'out_channels_l0': 16, 'kernel_size_l0': 11, 'dropout_l0': 0.4, 'out_channels_l1': 64, 'kernel_size_l1': 11, 'dropout_l1': 0, 'out_channels_l2': 64, 'kernel_size_l2': 11, 'dropout_l2': 0.4, 'optimizer': 'Adam', 'lr': 0.0007494512319745887, 'weight_decay': 0.02678649758793899}. Best is trial 0 with value: 0.09885490499419419.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 11:18:15,866][0m Trial 2 finished with value: 0.08808098591549295 and parameters: {'n_layers': 4, 'out_channels_l0': 64, 'kernel_size_l0': 15, 'dropout_l0': 0.3, 'out_channels_l1': 64, 'kernel_size_l1': 11, 'dropout_l1': 0.5, 'out_channels_l2': 96, 'kernel_size_l2': 5, 'dropout_l2': 0, 'out_channels_l3': 128, 'kernel_size_l3': 15, 'dropout_l3': 0.4, 'optimizer': 'Adam', 'lr': 0.0005607959826877003, 'weight_decay': 0.0007966659424304436}. Best is trial 0 with value: 0.09885490499419419.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.09885490499419419
  Params: 
    dropout_l0: 0
    kernel_size_l0: 5
    lr: 0.0004624916299522377
    n_layers: 1
    optimizer: RMSprop
    out_channels_l0: 16
    weight_decay: 0.03887450496714792



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.08617179741807351



3-FOLD CROSS-VALIDATION AUPRC TEST SCORE: 0.09567


In [12]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [13]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---

## 2) ACTIVE PROMOTERS vs INACTIVE PROMOTERS

In [14]:
task = TASKS[1]
task

'active_P_vs_inactive_P'

In [15]:
pipe_data_load = Build_DataLoader_Pipeline(path_name=f'{task}.pickle')

Data Preprocessing Done!


---

### 1. FFNN

In [19]:
model=FFNN

In [20]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=False,
                rebalancing=False,
                model = model,
                device = device,
                task=task,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST')

>>> ITERATION N. 1


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-12 00:50:28,665][0m A new study created in RDB with name: K562_active_P_vs_inactive_P_FFNN_1[0m





Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 00:55:25,352][0m Trial 0 finished with value: 0.3462949775668635 and parameters: {'n_layers': 2, 'n_units_l0': 256, 'dropout_l0': 0.3, 'n_units_l1': 128, 'dropout_l1': 0.3, 'optimizer': 'Adam', 'lr': 0.0001639030020193242, 'weight_decay': 0.0001099651233531117}. Best is trial 0 with value: 0.3462949775668635.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 01:01:57,024][0m Trial 1 finished with value: 0.1254697554697555 and parameters: {'n_layers': 4, 'n_units_l0': 256, 'dropout_l0': 0.3, 'n_units_l1': 16, 'dropout_l1': 0.0, 'n_units_l2': 4, 'dropout_l2': 0.0, 'n_units_l3': 16, 'dropout_l3': 0.5, 'optimizer': 'RMSprop', 'lr': 0.017387480210972736, 'weight_decay': 0.0029814552751093685}. Best is trial 0 with value: 0.3462949775668635.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 01:09:06,846][0m Trial 2 finished with value: 0.32333481914941437 and parameters: {'n_layers': 2, 'n_units_l0': 64, 'dropout_l0': 0.2, 'n_units_l1': 64, 'dropout_l1': 0.3, 'optimizer': 'Adam', 'lr': 0.00435324883376702, 'weight_decay': 0.00029055960705563414}. Best is trial 0 with value: 0.3462949775668635.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.3462949775668635
  Params: 
    dropout_l0: 0.3
    dropout_l1: 0.3
    lr: 0.0001639030020193242
    n_layers: 2
    n_units_l0: 256
    n_units_l1: 128
    optimizer: Adam
    weight_decay: 0.0001099651233531117



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.35297561973610686


>>> ITERATION N. 2


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-12 01:17:50,675][0m A new study created in RDB with name: K562_active_P_vs_inactive_P_FFNN_1_2[0m





Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 01:25:01,184][0m Trial 0 finished with value: 0.3360337273344315 and parameters: {'n_layers': 3, 'n_units_l0': 64, 'dropout_l0': 0.4, 'n_units_l1': 32, 'dropout_l1': 0.3, 'n_units_l2': 64, 'dropout_l2': 0.0, 'optimizer': 'Adam', 'lr': 0.014232374414904938, 'weight_decay': 0.0018009860045993258}. Best is trial 0 with value: 0.3360337273344315.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 01:32:11,201][0m Trial 1 finished with value: 0.34025817815475007 and parameters: {'n_layers': 2, 'n_units_l0': 64, 'dropout_l0': 0.2, 'n_units_l1': 128, 'dropout_l1': 0.3, 'optimizer': 'Adam', 'lr': 0.0005213264668784428, 'weight_decay': 0.0007827073978775232}. Best is trial 1 with value: 0.34025817815475007.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 01:39:27,138][0m Trial 2 finished with value: 0.12561040632469206 and parameters: {'n_layers': 4, 'n_units_l0': 128, 'dropout_l0': 0.2, 'n_units_l1': 64, 'dropout_l1': 0.2, 'n_units_l2': 64, 'dropout_l2': 0.5, 'n_units_l3': 32, 'dropout_l3': 0.4, 'optimizer': 'Adam', 'lr': 1.7569736936572157e-05, 'weight_decay': 0.01855103686676687}. Best is trial 1 with value: 0.34025817815475007.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.34025817815475007
  Params: 
    dropout_l0: 0.2
    dropout_l1: 0.3
    lr: 0.0005213264668784428
    n_layers: 2
    n_units_l0: 64
    n_units_l1: 128
    optimizer: Adam
    weight_decay: 0.0007827073978775232



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.3482127133882047


>>> ITERATION N. 3


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-12 01:46:41,885][0m A new study created in RDB with name: K562_active_P_vs_inactive_P_FFNN_1_2_3[0m





Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 01:52:16,040][0m Trial 0 finished with value: 0.12867346938775512 and parameters: {'n_layers': 3, 'n_units_l0': 64, 'dropout_l0': 0.3, 'n_units_l1': 64, 'dropout_l1': 0.0, 'n_units_l2': 4, 'dropout_l2': 0.5, 'optimizer': 'Adam', 'lr': 5.7899104247396505e-05, 'weight_decay': 0.015207166958670226}. Best is trial 0 with value: 0.12867346938775512.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 02:00:21,110][0m Trial 1 finished with value: 0.12867255010112152 and parameters: {'n_layers': 4, 'n_units_l0': 128, 'dropout_l0': 0.0, 'n_units_l1': 128, 'dropout_l1': 0.0, 'n_units_l2': 4, 'dropout_l2': 0.5, 'n_units_l3': 16, 'dropout_l3': 0.5, 'optimizer': 'Adam', 'lr': 0.0032585985131537306, 'weight_decay': 0.0021988451786279345}. Best is trial 0 with value: 0.12867346938775512.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 02:05:56,691][0m Trial 2 finished with value: 0.1312596339837236 and parameters: {'n_layers': 3, 'n_units_l0': 64, 'dropout_l0': 0.3, 'n_units_l1': 128, 'dropout_l1': 0.2, 'n_units_l2': 16, 'dropout_l2': 0.0, 'optimizer': 'RMSprop', 'lr': 1.028921694274926e-05, 'weight_decay': 0.022453369914131507}. Best is trial 2 with value: 0.1312596339837236.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.1312596339837236
  Params: 
    dropout_l0: 0.3
    dropout_l1: 0.2
    dropout_l2: 0.0
    lr: 1.028921694274926e-05
    n_layers: 3
    n_units_l0: 64
    n_units_l1: 128
    n_units_l2: 16
    optimizer: RMSprop
    weight_decay: 0.022453369914131507



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.16695421647225367



3-FOLD CROSS-VALIDATION AUPRC TEST SCORE: 0.28938


In [21]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [22]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---
### 2. CNN

In [16]:
model=CNN

In [17]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=True,
                rebalancing=False,
                model = model,
                device = device,
                task=task,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST')

  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-12 11:32:46,679][0m A new study created in RDB with name: K562_active_P_vs_inactive_P_CNN_1[0m


>>> ITERATION N. 1



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 11:55:09,819][0m Trial 0 finished with value: 0.1848284357517103 and parameters: {'n_layers': 1, 'out_channels_l0': 64, 'kernel_size_l0': 5, 'dropout_l0': 0.4, 'optimizer': 'RMSprop', 'lr': 0.005701566059025464, 'weight_decay': 0.00683843177528163}. Best is trial 0 with value: 0.1848284357517103.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 12:19:26,431][0m Trial 1 finished with value: 0.12560166532192557 and parameters: {'n_layers': 3, 'out_channels_l0': 32, 'kernel_size_l0': 5, 'dropout_l0': 0.2, 'out_channels_l1': 96, 'kernel_size_l1': 15, 'dropout_l1': 0.4, 'out_channels_l2': 64, 'kernel_size_l2': 15, 'dropout_l2': 0.4, 'optimizer': 'RMSprop', 'lr': 0.02613038706489536, 'weight_decay': 0.017318484962950877}. Best is trial 0 with value: 0.1848284357517103.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 12:28:04,829][0m Trial 2 finished with value: 0.23819194998630996 and parameters: {'n_layers': 2, 'out_channels_l0': 16, 'kernel_size_l0': 5, 'dropout_l0': 0.2, 'out_channels_l1': 32, 'kernel_size_l1': 5, 'dropout_l1': 0, 'optimizer': 'Nadam', 'lr': 0.0027022020537939145, 'weight_decay': 0.0030706631721653618}. Best is trial 2 with value: 0.23819194998630996.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.23819194998630996
  Params: 
    dropout_l0: 0.2
    dropout_l1: 0
    kernel_size_l0: 5
    kernel_size_l1: 5
    lr: 0.0027022020537939145
    n_layers: 2
    optimizer: Nadam
    out_channels_l0: 16
    out_channels_l1: 32
    weight_decay: 0.0030706631721653618



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-12 12:50:09,688][0m A new study created in RDB with name: K562_active_P_vs_inactive_P_CNN_1_2[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.2339605494032059


>>> ITERATION N. 2



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 13:36:41,463][0m Trial 0 finished with value: 0.16207297133442722 and parameters: {'n_layers': 1, 'out_channels_l0': 64, 'kernel_size_l0': 15, 'dropout_l0': 0.4, 'optimizer': 'RMSprop', 'lr': 0.018698013655896274, 'weight_decay': 0.007484818852498159}. Best is trial 0 with value: 0.16207297133442722.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 13:52:14,335][0m Trial 1 finished with value: 0.20396069255166918 and parameters: {'n_layers': 2, 'out_channels_l0': 16, 'kernel_size_l0': 11, 'dropout_l0': 0, 'out_channels_l1': 64, 'kernel_size_l1': 5, 'dropout_l1': 0, 'optimizer': 'Nadam', 'lr': 1.0069750847495368e-05, 'weight_decay': 0.015848392145378438}. Best is trial 1 with value: 0.20396069255166918.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 14:00:53,460][0m Trial 2 finished with value: 0.19775391171452614 and parameters: {'n_layers': 1, 'out_channels_l0': 16, 'kernel_size_l0': 15, 'dropout_l0': 0.2, 'optimizer': 'Nadam', 'lr': 2.402350721321623e-05, 'weight_decay': 0.09498476684536594}. Best is trial 1 with value: 0.20396069255166918.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.20396069255166918
  Params: 
    dropout_l0: 0
    dropout_l1: 0
    kernel_size_l0: 11
    kernel_size_l1: 5
    lr: 1.0069750847495368e-05
    n_layers: 2
    optimizer: Nadam
    out_channels_l0: 16
    out_channels_l1: 64
    weight_decay: 0.015848392145378438



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-12 14:21:07,298][0m A new study created in RDB with name: K562_active_P_vs_inactive_P_CNN_1_2_3[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.2021318476416643


>>> ITERATION N. 3



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 14:42:08,928][0m Trial 0 finished with value: 0.24844796603333708 and parameters: {'n_layers': 1, 'out_channels_l0': 64, 'kernel_size_l0': 11, 'dropout_l0': 0.3, 'optimizer': 'Adam', 'lr': 0.0031527649209093034, 'weight_decay': 0.00044860548918871013}. Best is trial 0 with value: 0.24844796603333708.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 15:26:09,587][0m Trial 1 finished with value: 0.1609748503092002 and parameters: {'n_layers': 3, 'out_channels_l0': 64, 'kernel_size_l0': 5, 'dropout_l0': 0.4, 'out_channels_l1': 96, 'kernel_size_l1': 11, 'dropout_l1': 0, 'out_channels_l2': 256, 'kernel_size_l2': 15, 'dropout_l2': 0, 'optimizer': 'Adam', 'lr': 0.004018627872268393, 'weight_decay': 0.010673404157697677}. Best is trial 0 with value: 0.24844796603333708.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 15:37:58,730][0m Trial 2 finished with value: 0.2394728006625121 and parameters: {'n_layers': 1, 'out_channels_l0': 32, 'kernel_size_l0': 15, 'dropout_l0': 0.4, 'optimizer': 'Adam', 'lr': 0.0033025339374387905, 'weight_decay': 0.0033168947572443184}. Best is trial 0 with value: 0.24844796603333708.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.24844796603333708
  Params: 
    dropout_l0: 0.3
    kernel_size_l0: 11
    lr: 0.0031527649209093034
    n_layers: 1
    optimizer: Adam
    out_channels_l0: 64
    weight_decay: 0.00044860548918871013



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.24195604181140212



3-FOLD CROSS-VALIDATION AUPRC TEST SCORE: 0.22602


In [18]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [19]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---

## 3) ACTIVE ENHANCERS vs ACTIVE PROMOTERS

In [20]:
task = TASKS[2]
task

'active_E_vs_active_P'

In [21]:
pipe_data_load = Build_DataLoader_Pipeline(path_name=f'{task}.pickle')

Data Preprocessing Done!


---
### 1. FFNN

In [25]:
model=FFNN

In [26]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=False,
                rebalancing=False,
                model = model,
                device = device,
                task=task,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST')

>>> ITERATION N. 1


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-12 02:13:11,723][0m A new study created in RDB with name: K562_active_E_vs_active_P_FFNN_1[0m





Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 02:14:03,928][0m Trial 0 finished with value: 0.7936432060497448 and parameters: {'n_layers': 4, 'n_units_l0': 64, 'dropout_l0': 0.3, 'n_units_l1': 128, 'dropout_l1': 0.3, 'n_units_l2': 4, 'dropout_l2': 0.0, 'n_units_l3': 16, 'dropout_l3': 0.5, 'optimizer': 'Nadam', 'lr': 0.00017428893461443184, 'weight_decay': 0.0003182658734637611}. Best is trial 0 with value: 0.7936432060497448.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 02:14:56,072][0m Trial 1 finished with value: 0.7974051397589108 and parameters: {'n_layers': 1, 'n_units_l0': 128, 'dropout_l0': 0.2, 'optimizer': 'Adam', 'lr': 0.015548488251772245, 'weight_decay': 0.0006615660039962391}. Best is trial 1 with value: 0.7974051397589108.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 02:16:13,214][0m Trial 2 finished with value: 0.785662531710508 and parameters: {'n_layers': 1, 'n_units_l0': 64, 'dropout_l0': 0.0, 'optimizer': 'RMSprop', 'lr': 0.0021619533087973842, 'weight_decay': 0.00048288884385289034}. Best is trial 1 with value: 0.7974051397589108.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.7974051397589108
  Params: 
    dropout_l0: 0.2
    lr: 0.015548488251772245
    n_layers: 1
    n_units_l0: 128
    optimizer: Adam
    weight_decay: 0.0006615660039962391



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-12 02:17:31,538][0m A new study created in RDB with name: K562_active_E_vs_active_P_FFNN_1_2[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.7574873553695822


>>> ITERATION N. 2



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 02:18:23,120][0m Trial 0 finished with value: 0.8063492321521744 and parameters: {'n_layers': 4, 'n_units_l0': 32, 'dropout_l0': 0.0, 'n_units_l1': 32, 'dropout_l1': 0.0, 'n_units_l2': 32, 'dropout_l2': 0.0, 'n_units_l3': 16, 'dropout_l3': 0.0, 'optimizer': 'Adam', 'lr': 0.00024150535209240323, 'weight_decay': 0.0002291305142604275}. Best is trial 0 with value: 0.8063492321521744.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


  recall = tps / tps[-1]
[32m[I 2021-10-12 02:19:24,317][0m Trial 1 finished with value: 0.757195183581816 and parameters: {'n_layers': 4, 'n_units_l0': 128, 'dropout_l0': 0.0, 'n_units_l1': 64, 'dropout_l1': 0.3, 'n_units_l2': 32, 'dropout_l2': 0.0, 'n_units_l3': 16, 'dropout_l3': 0.0, 'optimizer': 'Adam', 'lr': 7.642900457011243e-05, 'weight_decay': 0.0013993337232987364}. Best is trial 0 with value: 0.8063492321521744.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 02:21:07,885][0m Trial 2 finished with value: 0.3038961038961039 and parameters: {'n_layers': 3, 'n_units_l0': 64, 'dropout_l0': 0.3, 'n_units_l1': 64, 'dropout_l1': 0.4, 'n_units_l2': 64, 'dropout_l2': 0.0, 'optimizer': 'Adam', 'lr': 0.07912853367581872, 'weight_decay': 0.057335928053079604}. Best is trial 0 with value: 0.8063492321521744.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.8063492321521744
  Params: 
    dropout_l0: 0.0
    dropout_l1: 0.0
    dropout_l2: 0.0
    dropout_l3: 0.0
    lr: 0.00024150535209240323
    n_layers: 4
    n_units_l0: 32
    n_units_l1: 32
    n_units_l2: 32
    n_units_l3: 16
    optimizer: Adam
    weight_decay: 0.0002291305142604275



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-12 02:22:25,634][0m A new study created in RDB with name: K562_active_E_vs_active_P_FFNN_1_2_3[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.8081377232693927


>>> ITERATION N. 3



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 02:24:01,052][0m Trial 0 finished with value: 0.31835497835497834 and parameters: {'n_layers': 4, 'n_units_l0': 32, 'dropout_l0': 0.2, 'n_units_l1': 128, 'dropout_l1': 0.3, 'n_units_l2': 4, 'dropout_l2': 0.0, 'n_units_l3': 32, 'dropout_l3': 0.4, 'optimizer': 'Nadam', 'lr': 0.002664670256257603, 'weight_decay': 0.020822751909707456}. Best is trial 0 with value: 0.31835497835497834.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 02:24:54,697][0m Trial 1 finished with value: 0.7582116810297906 and parameters: {'n_layers': 4, 'n_units_l0': 256, 'dropout_l0': 0.2, 'n_units_l1': 64, 'dropout_l1': 0.3, 'n_units_l2': 32, 'dropout_l2': 0.0, 'n_units_l3': 4, 'dropout_l3': 0.0, 'optimizer': 'Nadam', 'lr': 0.0017232667779949508, 'weight_decay': 0.00023304579785823218}. Best is trial 1 with value: 0.7582116810297906.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 02:25:45,982][0m Trial 2 finished with value: 0.7248912728868855 and parameters: {'n_layers': 2, 'n_units_l0': 32, 'dropout_l0': 0.3, 'n_units_l1': 16, 'dropout_l1': 0.3, 'optimizer': 'RMSprop', 'lr': 2.138747810080283e-05, 'weight_decay': 0.0005739681205396538}. Best is trial 1 with value: 0.7582116810297906.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.7582116810297906
  Params: 
    dropout_l0: 0.2
    dropout_l1: 0.3
    dropout_l2: 0.0
    dropout_l3: 0.0
    lr: 0.0017232667779949508
    n_layers: 4
    n_units_l0: 256
    n_units_l1: 64
    n_units_l2: 32
    n_units_l3: 4
    optimizer: Nadam
    weight_decay: 0.00023304579785823218



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.8048812285939689



3-FOLD CROSS-VALIDATION AUPRC TEST SCORE: 0.79017


In [27]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [28]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---
### 2. CNN

In [22]:
model=CNN

In [23]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=True,
                rebalancing=False,
                model = model,
                device = device,
                task=task,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST')

  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-12 16:22:56,460][0m A new study created in RDB with name: K562_active_E_vs_active_P_CNN_1[0m


>>> ITERATION N. 1



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5


  recall = tps / tps[-1]


EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 16:31:48,785][0m Trial 0 finished with value: 0.30200413498732825 and parameters: {'n_layers': 4, 'out_channels_l0': 16, 'kernel_size_l0': 11, 'dropout_l0': 0.2, 'out_channels_l1': 32, 'kernel_size_l1': 5, 'dropout_l1': 0.4, 'out_channels_l2': 64, 'kernel_size_l2': 11, 'dropout_l2': 0.4, 'out_channels_l3': 512, 'kernel_size_l3': 15, 'dropout_l3': 0.5, 'optimizer': 'RMSprop', 'lr': 0.0013297914380566182, 'weight_decay': 0.0005664577681366895}. Best is trial 0 with value: 0.30200413498732825.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5


  recall = tps / tps[-1]


EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 16:38:56,428][0m Trial 1 finished with value: 0.5151761687115588 and parameters: {'n_layers': 3, 'out_channels_l0': 16, 'kernel_size_l0': 5, 'dropout_l0': 0.4, 'out_channels_l1': 64, 'kernel_size_l1': 5, 'dropout_l1': 0.4, 'out_channels_l2': 256, 'kernel_size_l2': 15, 'dropout_l2': 0, 'optimizer': 'RMSprop', 'lr': 0.021681536915752683, 'weight_decay': 0.0419591703040397}. Best is trial 1 with value: 0.5151761687115588.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 16:41:49,622][0m Trial 2 finished with value: 0.3154978354978355 and parameters: {'n_layers': 3, 'out_channels_l0': 32, 'kernel_size_l0': 11, 'dropout_l0': 0.4, 'out_channels_l1': 64, 'kernel_size_l1': 5, 'dropout_l1': 0.4, 'out_channels_l2': 64, 'kernel_size_l2': 15, 'dropout_l2': 0.5, 'optimizer': 'Nadam', 'lr': 0.0003058504633688657, 'weight_decay': 0.000187390102108067}. Best is trial 1 with value: 0.5151761687115588.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.5151761687115588
  Params: 
    dropout_l0: 0.4
    dropout_l1: 0.4
    dropout_l2: 0
    kernel_size_l0: 5
    kernel_size_l1: 5
    kernel_size_l2: 15
    lr: 0.021681536915752683
    n_layers: 3
    optimizer: RMSprop
    out_channels_l0: 16
    out_channels_l1: 64
    out_channels_l2: 256
    weight_decay: 0.0419591703040397



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-12 16:52:34,274][0m A new study created in RDB with name: K562_active_E_vs_active_P_CNN_1_2[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.3127419354838709


>>> ITERATION N. 2



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 16:59:01,273][0m Trial 0 finished with value: 0.6878345120783538 and parameters: {'n_layers': 4, 'out_channels_l0': 32, 'kernel_size_l0': 11, 'dropout_l0': 0, 'out_channels_l1': 32, 'kernel_size_l1': 15, 'dropout_l1': 0.5, 'out_channels_l2': 128, 'kernel_size_l2': 15, 'dropout_l2': 0.4, 'out_channels_l3': 512, 'kernel_size_l3': 11, 'dropout_l3': 0, 'optimizer': 'Adam', 'lr': 0.01664098239998411, 'weight_decay': 0.00011963814176239059}. Best is trial 0 with value: 0.6878345120783538.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 17:02:34,047][0m Trial 1 finished with value: 0.6557080001966378 and parameters: {'n_layers': 2, 'out_channels_l0': 32, 'kernel_size_l0': 11, 'dropout_l0': 0.2, 'out_channels_l1': 96, 'kernel_size_l1': 11, 'dropout_l1': 0.4, 'optimizer': 'Adam', 'lr': 0.01780650622790982, 'weight_decay': 0.0027665900556611543}. Best is trial 0 with value: 0.6878345120783538.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 17:05:01,520][0m Trial 2 finished with value: 0.7123528015642018 and parameters: {'n_layers': 2, 'out_channels_l0': 32, 'kernel_size_l0': 15, 'dropout_l0': 0.4, 'out_channels_l1': 64, 'kernel_size_l1': 5, 'dropout_l1': 0, 'optimizer': 'Adam', 'lr': 0.0005517211984944772, 'weight_decay': 0.027013291582296612}. Best is trial 2 with value: 0.7123528015642018.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.7123528015642018
  Params: 
    dropout_l0: 0.4
    dropout_l1: 0
    kernel_size_l0: 15
    kernel_size_l1: 5
    lr: 0.0005517211984944772
    n_layers: 2
    optimizer: Adam
    out_channels_l0: 32
    out_channels_l1: 64
    weight_decay: 0.027013291582296612



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-12 17:09:54,696][0m A new study created in RDB with name: K562_active_E_vs_active_P_CNN_1_2_3[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.695046125447867


>>> ITERATION N. 3



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 17:12:44,050][0m Trial 0 finished with value: 0.488622465586735 and parameters: {'n_layers': 1, 'out_channels_l0': 16, 'kernel_size_l0': 11, 'dropout_l0': 0.2, 'optimizer': 'RMSprop', 'lr': 0.004400577484320937, 'weight_decay': 0.0020984440776398256}. Best is trial 0 with value: 0.488622465586735.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 17:20:49,734][0m Trial 1 finished with value: 0.7512711775914562 and parameters: {'n_layers': 4, 'out_channels_l0': 16, 'kernel_size_l0': 5, 'dropout_l0': 0.2, 'out_channels_l1': 96, 'kernel_size_l1': 11, 'dropout_l1': 0.4, 'out_channels_l2': 128, 'kernel_size_l2': 5, 'dropout_l2': 0, 'out_channels_l3': 256, 'kernel_size_l3': 15, 'dropout_l3': 0, 'optimizer': 'Adam', 'lr': 0.004373386569316851, 'weight_decay': 0.0010154843912017033}. Best is trial 1 with value: 0.7512711775914562.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 17:27:15,373][0m Trial 2 finished with value: 0.5999128790016992 and parameters: {'n_layers': 4, 'out_channels_l0': 16, 'kernel_size_l0': 11, 'dropout_l0': 0, 'out_channels_l1': 96, 'kernel_size_l1': 5, 'dropout_l1': 0.5, 'out_channels_l2': 64, 'kernel_size_l2': 11, 'dropout_l2': 0.5, 'out_channels_l3': 512, 'kernel_size_l3': 11, 'dropout_l3': 0, 'optimizer': 'RMSprop', 'lr': 1.4969574802333853e-05, 'weight_decay': 0.00018815117490107107}. Best is trial 1 with value: 0.7512711775914562.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.7512711775914562
  Params: 
    dropout_l0: 0.2
    dropout_l1: 0.4
    dropout_l2: 0
    dropout_l3: 0
    kernel_size_l0: 5
    kernel_size_l1: 11
    kernel_size_l2: 5
    kernel_size_l3: 15
    lr: 0.004373386569316851
    n_layers: 4
    optimizer: Adam
    out_channels_l0: 16
    out_channels_l1: 96
    out_channels_l2: 128
    out_channels_l3: 256
    weight_decay: 0.0010154843912017033



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.7295328074024247



3-FOLD CROSS-VALIDATION AUPRC TEST SCORE: 0.57911


In [24]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [25]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---

## 4) INACTIVE ENHANCERS vs INACTIVE PROMOTERS

In [26]:
task = TASKS[3]
task

'inactive_E_vs_inactive_P'

In [27]:
pipe_data_load = Build_DataLoader_Pipeline(path_name=f'{task}.pickle')

Data Preprocessing Done!


---
### 1. FFNN

In [31]:
model=FFNN

In [32]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=False,
                rebalancing=False,
                model = model,
                device = device,
                task=task,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST')

>>> ITERATION N. 1


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-12 02:28:56,978][0m A new study created in RDB with name: K562_inactive_E_vs_inactive_P_FFNN_1[0m





Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 02:36:53,102][0m Trial 0 finished with value: 0.5707663050887488 and parameters: {'n_layers': 3, 'n_units_l0': 32, 'dropout_l0': 0.0, 'n_units_l1': 16, 'dropout_l1': 0.4, 'n_units_l2': 32, 'dropout_l2': 0.0, 'optimizer': 'Adam', 'lr': 0.00097510473377239, 'weight_decay': 0.010576309768188704}. Best is trial 0 with value: 0.5707663050887488.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 02:48:40,882][0m Trial 1 finished with value: 0.3970357772738725 and parameters: {'n_layers': 4, 'n_units_l0': 32, 'dropout_l0': 0.2, 'n_units_l1': 128, 'dropout_l1': 0.0, 'n_units_l2': 32, 'dropout_l2': 0.0, 'n_units_l3': 16, 'dropout_l3': 0.5, 'optimizer': 'Adam', 'lr': 0.00020939005311462996, 'weight_decay': 0.017980456975394306}. Best is trial 0 with value: 0.5707663050887488.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 03:02:46,288][0m Trial 2 finished with value: 0.3967504409171076 and parameters: {'n_layers': 2, 'n_units_l0': 64, 'dropout_l0': 0.4, 'n_units_l1': 64, 'dropout_l1': 0.4, 'optimizer': 'RMSprop', 'lr': 0.01513180590579158, 'weight_decay': 0.04132297074865867}. Best is trial 0 with value: 0.5707663050887488.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.5707663050887488
  Params: 
    dropout_l0: 0.0
    dropout_l1: 0.4
    dropout_l2: 0.0
    lr: 0.00097510473377239
    n_layers: 3
    n_units_l0: 32
    n_units_l1: 16
    n_units_l2: 32
    optimizer: Adam
    weight_decay: 0.010576309768188704



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.5631192524318143


>>> ITERATION N. 2


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-12 03:27:47,808][0m A new study created in RDB with name: K562_inactive_E_vs_inactive_P_FFNN_1_2[0m





Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 03:36:47,149][0m Trial 0 finished with value: 0.39570357772738696 and parameters: {'n_layers': 3, 'n_units_l0': 64, 'dropout_l0': 0.0, 'n_units_l1': 16, 'dropout_l1': 0.3, 'n_units_l2': 16, 'dropout_l2': 0.4, 'optimizer': 'Adam', 'lr': 0.014371303253262739, 'weight_decay': 0.057907740220523085}. Best is trial 0 with value: 0.39570357772738696.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 04:01:08,106][0m Trial 1 finished with value: 0.3953231292517008 and parameters: {'n_layers': 3, 'n_units_l0': 64, 'dropout_l0': 0.2, 'n_units_l1': 16, 'dropout_l1': 0.0, 'n_units_l2': 64, 'dropout_l2': 0.0, 'optimizer': 'RMSprop', 'lr': 1.732525560765033e-05, 'weight_decay': 0.05280863265102106}. Best is trial 0 with value: 0.39570357772738696.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 04:09:02,810][0m Trial 2 finished with value: 0.5673555278823246 and parameters: {'n_layers': 3, 'n_units_l0': 256, 'dropout_l0': 0.4, 'n_units_l1': 32, 'dropout_l1': 0.0, 'n_units_l2': 64, 'dropout_l2': 0.0, 'optimizer': 'Nadam', 'lr': 8.511910865692941e-05, 'weight_decay': 0.009428628956688064}. Best is trial 2 with value: 0.5673555278823246.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.5673555278823246
  Params: 
    dropout_l0: 0.4
    dropout_l1: 0.0
    dropout_l2: 0.0
    lr: 8.511910865692941e-05
    n_layers: 3
    n_units_l0: 256
    n_units_l1: 32
    n_units_l2: 64
    optimizer: Nadam
    weight_decay: 0.009428628956688064



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.5712788601465019


>>> ITERATION N. 3


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-12 04:20:58,735][0m A new study created in RDB with name: K562_inactive_E_vs_inactive_P_FFNN_1_2_3[0m





Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 04:28:52,077][0m Trial 0 finished with value: 0.5659568773465478 and parameters: {'n_layers': 2, 'n_units_l0': 256, 'dropout_l0': 0.2, 'n_units_l1': 32, 'dropout_l1': 0.3, 'optimizer': 'Nadam', 'lr': 0.0001670063847371949, 'weight_decay': 0.004064442389538894}. Best is trial 0 with value: 0.5659568773465478.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 04:36:35,402][0m Trial 1 finished with value: 0.5552775897140471 and parameters: {'n_layers': 2, 'n_units_l0': 128, 'dropout_l0': 0.3, 'n_units_l1': 16, 'dropout_l1': 0.3, 'optimizer': 'Adam', 'lr': 0.0007618743998371818, 'weight_decay': 0.014302529968626573}. Best is trial 0 with value: 0.5659568773465478.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 04:55:58,045][0m Trial 2 finished with value: 0.3962824389014866 and parameters: {'n_layers': 4, 'n_units_l0': 128, 'dropout_l0': 0.3, 'n_units_l1': 32, 'dropout_l1': 0.4, 'n_units_l2': 4, 'dropout_l2': 0.0, 'n_units_l3': 4, 'dropout_l3': 0.4, 'optimizer': 'Adam', 'lr': 0.0028724165311694765, 'weight_decay': 0.0036001128094182147}. Best is trial 0 with value: 0.5659568773465478.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.5659568773465478
  Params: 
    dropout_l0: 0.2
    dropout_l1: 0.3
    lr: 0.0001670063847371949
    n_layers: 2
    n_units_l0: 256
    n_units_l1: 32
    optimizer: Nadam
    weight_decay: 0.004064442389538894



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.5766967412737221



3-FOLD CROSS-VALIDATION AUPRC TEST SCORE: 0.57036


In [33]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [34]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---
### 2. CNN

In [28]:
model=CNN

In [29]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=True,
                rebalancing=False,
                model = model,
                device = device,
                task=task,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST')

  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-12 17:35:27,529][0m A new study created in RDB with name: K562_inactive_E_vs_inactive_P_CNN_1[0m


>>> ITERATION N. 1



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 18:06:11,730][0m Trial 0 finished with value: 0.5316655380596873 and parameters: {'n_layers': 3, 'out_channels_l0': 32, 'kernel_size_l0': 15, 'dropout_l0': 0, 'out_channels_l1': 64, 'kernel_size_l1': 15, 'dropout_l1': 0, 'out_channels_l2': 96, 'kernel_size_l2': 15, 'dropout_l2': 0.4, 'optimizer': 'Nadam', 'lr': 0.014714848469556502, 'weight_decay': 0.02259534978909389}. Best is trial 0 with value: 0.5316655380596873.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 18:29:46,088][0m Trial 1 finished with value: 0.5581483148173945 and parameters: {'n_layers': 3, 'out_channels_l0': 32, 'kernel_size_l0': 15, 'dropout_l0': 0.2, 'out_channels_l1': 32, 'kernel_size_l1': 5, 'dropout_l1': 0.4, 'out_channels_l2': 96, 'kernel_size_l2': 11, 'dropout_l2': 0, 'optimizer': 'RMSprop', 'lr': 0.00015232663328485792, 'weight_decay': 0.013337937257722015}. Best is trial 1 with value: 0.5581483148173945.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 20:53:35,921][0m Trial 2 finished with value: 0.5399090771652603 and parameters: {'n_layers': 4, 'out_channels_l0': 64, 'kernel_size_l0': 5, 'dropout_l0': 0.2, 'out_channels_l1': 64, 'kernel_size_l1': 15, 'dropout_l1': 0.5, 'out_channels_l2': 256, 'kernel_size_l2': 5, 'dropout_l2': 0.4, 'out_channels_l3': 512, 'kernel_size_l3': 5, 'dropout_l3': 0, 'optimizer': 'Adam', 'lr': 0.03432583203320131, 'weight_decay': 0.001451665287089718}. Best is trial 1 with value: 0.5581483148173945.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.5581483148173945
  Params: 
    dropout_l0: 0.2
    dropout_l1: 0.4
    dropout_l2: 0
    kernel_size_l0: 15
    kernel_size_l1: 5
    kernel_size_l2: 11
    lr: 0.00015232663328485792
    n_layers: 3
    optimizer: RMSprop
    out_channels_l0: 32
    out_channels_l1: 32
    out_channels_l2: 96
    weight_decay: 0.013337937257722015



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-12 21:20:51,521][0m A new study created in RDB with name: K562_inactive_E_vs_inactive_P_CNN_1_2[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.568979032879536


>>> ITERATION N. 2



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 21:32:14,862][0m Trial 0 finished with value: 0.5552052128974312 and parameters: {'n_layers': 1, 'out_channels_l0': 16, 'kernel_size_l0': 5, 'dropout_l0': 0.4, 'optimizer': 'RMSprop', 'lr': 1.6000155652018827e-05, 'weight_decay': 0.006639313521203149}. Best is trial 0 with value: 0.5552052128974312.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 22:28:40,781][0m Trial 1 finished with value: 0.5216175390257851 and parameters: {'n_layers': 2, 'out_channels_l0': 16, 'kernel_size_l0': 15, 'dropout_l0': 0, 'out_channels_l1': 64, 'kernel_size_l1': 15, 'dropout_l1': 0, 'optimizer': 'RMSprop', 'lr': 0.02006244352458958, 'weight_decay': 0.001359432936760038}. Best is trial 0 with value: 0.5552052128974312.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 23:46:26,578][0m Trial 2 finished with value: 0.5491915296155487 and parameters: {'n_layers': 4, 'out_channels_l0': 64, 'kernel_size_l0': 5, 'dropout_l0': 0.3, 'out_channels_l1': 32, 'kernel_size_l1': 11, 'dropout_l1': 0, 'out_channels_l2': 96, 'kernel_size_l2': 15, 'dropout_l2': 0, 'out_channels_l3': 256, 'kernel_size_l3': 11, 'dropout_l3': 0.5, 'optimizer': 'RMSprop', 'lr': 0.006594046857840307, 'weight_decay': 0.0039030882533919145}. Best is trial 0 with value: 0.5552052128974312.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.5552052128974312
  Params: 
    dropout_l0: 0.4
    kernel_size_l0: 5
    lr: 1.6000155652018827e-05
    n_layers: 1
    optimizer: RMSprop
    out_channels_l0: 16
    weight_decay: 0.006639313521203149



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-13 00:02:52,627][0m A new study created in RDB with name: K562_inactive_E_vs_inactive_P_CNN_1_2_3[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.5650134383437307


>>> ITERATION N. 3



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-13 01:00:11,285][0m Trial 0 finished with value: 0.5361811371148619 and parameters: {'n_layers': 2, 'out_channels_l0': 64, 'kernel_size_l0': 15, 'dropout_l0': 0.4, 'out_channels_l1': 96, 'kernel_size_l1': 15, 'dropout_l1': 0.5, 'optimizer': 'Nadam', 'lr': 0.005283263348660313, 'weight_decay': 0.00104995791196492}. Best is trial 0 with value: 0.5361811371148619.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-13 02:01:53,236][0m Trial 1 finished with value: 0.5728102802030217 and parameters: {'n_layers': 4, 'out_channels_l0': 64, 'kernel_size_l0': 5, 'dropout_l0': 0.2, 'out_channels_l1': 32, 'kernel_size_l1': 5, 'dropout_l1': 0.5, 'out_channels_l2': 64, 'kernel_size_l2': 11, 'dropout_l2': 0.4, 'out_channels_l3': 512, 'kernel_size_l3': 15, 'dropout_l3': 0.5, 'optimizer': 'Nadam', 'lr': 0.00016607975331972227, 'weight_decay': 0.008277611566868029}. Best is trial 1 with value: 0.5728102802030217.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-13 02:18:29,388][0m Trial 2 finished with value: 0.5613009655669942 and parameters: {'n_layers': 1, 'out_channels_l0': 32, 'kernel_size_l0': 15, 'dropout_l0': 0.2, 'optimizer': 'RMSprop', 'lr': 0.0006138929936005074, 'weight_decay': 0.0011407636912847225}. Best is trial 1 with value: 0.5728102802030217.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.5728102802030217
  Params: 
    dropout_l0: 0.2
    dropout_l1: 0.5
    dropout_l2: 0.4
    dropout_l3: 0.5
    kernel_size_l0: 5
    kernel_size_l1: 5
    kernel_size_l2: 11
    kernel_size_l3: 15
    lr: 0.00016607975331972227
    n_layers: 4
    optimizer: Nadam
    out_channels_l0: 64
    out_channels_l1: 32
    out_channels_l2: 64
    out_channels_l3: 512
    weight_decay: 0.008277611566868029



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.4631413520363631



3-FOLD CROSS-VALIDATION AUPRC TEST SCORE: 0.53238


In [30]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [31]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---

## 5) ACTIVE ENHANCERS + ACTIVE PROMOTERS vs INACTIVE REST

In [32]:
task = TASKS[4]
task

'active_EP_vs_inactive_rest'

In [33]:
pipe_data_load = Build_DataLoader_Pipeline(path_name=f'{task}.pickle')

Data Preprocessing Done!


---
### 1. FFNN

In [37]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=False,
                rebalancing=False,
                model = model,
                device = device,
                task=task,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST')

>>> ITERATION N. 1


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-12 05:17:29,172][0m A new study created in RDB with name: K562_active_EP_vs_inactive_rest_FFNN_1[0m





Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 05:31:10,522][0m Trial 0 finished with value: 0.24629418270062958 and parameters: {'n_layers': 1, 'n_units_l0': 128, 'dropout_l0': 0.3, 'optimizer': 'Nadam', 'lr': 0.01864885474861787, 'weight_decay': 0.015954392777821874}. Best is trial 0 with value: 0.24629418270062958.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 05:39:28,516][0m Trial 1 finished with value: 0.2652954184856795 and parameters: {'n_layers': 1, 'n_units_l0': 256, 'dropout_l0': 0.4, 'optimizer': 'Adam', 'lr': 4.253497886484119e-05, 'weight_decay': 0.00033901789143189737}. Best is trial 1 with value: 0.2652954184856795.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 05:51:45,831][0m Trial 2 finished with value: 0.10739942261128703 and parameters: {'n_layers': 3, 'n_units_l0': 64, 'dropout_l0': 0.3, 'n_units_l1': 64, 'dropout_l1': 0.0, 'n_units_l2': 64, 'dropout_l2': 0.5, 'optimizer': 'Nadam', 'lr': 0.05248069233119157, 'weight_decay': 0.0005165472914786603}. Best is trial 1 with value: 0.2652954184856795.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.2652954184856795
  Params: 
    dropout_l0: 0.4
    lr: 4.253497886484119e-05
    n_layers: 1
    n_units_l0: 256
    optimizer: Adam
    weight_decay: 0.00033901789143189737



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.27454510526472703


>>> ITERATION N. 2


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-12 06:04:17,766][0m A new study created in RDB with name: K562_active_EP_vs_inactive_rest_FFNN_1_2[0m





Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 06:12:27,756][0m Trial 0 finished with value: 0.27193931885793465 and parameters: {'n_layers': 1, 'n_units_l0': 128, 'dropout_l0': 0.2, 'optimizer': 'Adam', 'lr': 0.010103325712545628, 'weight_decay': 0.05491631173869788}. Best is trial 0 with value: 0.27193931885793465.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 06:30:18,425][0m Trial 1 finished with value: 0.1128282734214938 and parameters: {'n_layers': 3, 'n_units_l0': 128, 'dropout_l0': 0.3, 'n_units_l1': 128, 'dropout_l1': 0.3, 'n_units_l2': 4, 'dropout_l2': 0.4, 'optimizer': 'Adam', 'lr': 0.0024049045266201627, 'weight_decay': 0.024014215230013975}. Best is trial 0 with value: 0.27193931885793465.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 06:42:55,943][0m Trial 2 finished with value: 0.2711267896088214 and parameters: {'n_layers': 2, 'n_units_l0': 256, 'dropout_l0': 0.2, 'n_units_l1': 128, 'dropout_l1': 0.2, 'optimizer': 'Adam', 'lr': 0.001081201846332641, 'weight_decay': 0.0002869961419393211}. Best is trial 0 with value: 0.27193931885793465.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.27193931885793465
  Params: 
    dropout_l0: 0.2
    lr: 0.010103325712545628
    n_layers: 1
    n_units_l0: 128
    optimizer: Adam
    weight_decay: 0.05491631173869788



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.25636274871719533


>>> ITERATION N. 3


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-12 07:01:24,926][0m A new study created in RDB with name: K562_active_EP_vs_inactive_rest_FFNN_1_2_3[0m





Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 07:13:35,647][0m Trial 0 finished with value: 0.11155677655677661 and parameters: {'n_layers': 3, 'n_units_l0': 32, 'dropout_l0': 0.3, 'n_units_l1': 64, 'dropout_l1': 0.4, 'n_units_l2': 4, 'dropout_l2': 0.0, 'optimizer': 'Nadam', 'lr': 2.9509747950440093e-05, 'weight_decay': 0.07785946014856363}. Best is trial 0 with value: 0.11155677655677661.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 07:29:47,008][0m Trial 1 finished with value: 0.23629712315549586 and parameters: {'n_layers': 1, 'n_units_l0': 64, 'dropout_l0': 0.3, 'optimizer': 'Adam', 'lr': 0.04779724877395587, 'weight_decay': 0.0004217457209240847}. Best is trial 1 with value: 0.23629712315549586.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-12 07:46:00,367][0m Trial 2 finished with value: 0.11162087912087906 and parameters: {'n_layers': 3, 'n_units_l0': 32, 'dropout_l0': 0.2, 'n_units_l1': 16, 'dropout_l1': 0.0, 'n_units_l2': 64, 'dropout_l2': 0.5, 'optimizer': 'Adam', 'lr': 0.01963204881164063, 'weight_decay': 0.023255687400315184}. Best is trial 1 with value: 0.23629712315549586.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.23629712315549586
  Params: 
    dropout_l0: 0.3
    lr: 0.04779724877395587
    n_layers: 1
    n_units_l0: 64
    optimizer: Adam
    weight_decay: 0.0004217457209240847



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.23775325649207518



3-FOLD CROSS-VALIDATION AUPRC TEST SCORE: 0.25622


In [38]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [39]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---
### 2. CNN

In [34]:
model=CNN

In [None]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=True,
                rebalancing=False,
                model = model,
                device = device,
                task=task,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST')

  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-13 03:19:13,321][0m A new study created in RDB with name: K562_active_EP_vs_inactive_rest_CNN_1[0m


>>> ITERATION N. 1



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-13 03:43:28,601][0m Trial 0 finished with value: 0.16530431024701364 and parameters: {'n_layers': 3, 'out_channels_l0': 64, 'kernel_size_l0': 5, 'dropout_l0': 0.2, 'out_channels_l1': 32, 'kernel_size_l1': 15, 'dropout_l1': 0.4, 'out_channels_l2': 64, 'kernel_size_l2': 15, 'dropout_l2': 0.5, 'optimizer': 'Nadam', 'lr': 0.00018100872559011593, 'weight_decay': 0.00038863492714039436}. Best is trial 0 with value: 0.16530431024701364.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-13 05:29:27,223][0m Trial 1 finished with value: 0.10733376792698833 and parameters: {'n_layers': 4, 'out_channels_l0': 64, 'kernel_size_l0': 15, 'dropout_l0': 0.4, 'out_channels_l1': 96, 'kernel_size_l1': 11, 'dropout_l1': 0.5, 'out_channels_l2': 128, 'kernel_size_l2': 5, 'dropout_l2': 0, 'out_channels_l3': 512, 'kernel_size_l3': 15, 'dropout_l3': 0, 'optimizer': 'Nadam', 'lr': 0.04200708705636999, 'weight_decay': 0.000278499612017659}. Best is trial 0 with value: 0.16530431024701364.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-13 06:07:16,656][0m Trial 2 finished with value: 0.17132706521457908 and parameters: {'n_layers': 1, 'out_channels_l0': 32, 'kernel_size_l0': 5, 'dropout_l0': 0.4, 'optimizer': 'Adam', 'lr': 0.0005296136108290213, 'weight_decay': 0.07164263550489237}. Best is trial 2 with value: 0.17132706521457908.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.17132706521457908
  Params: 
    dropout_l0: 0.4
    kernel_size_l0: 5
    lr: 0.0005296136108290213
    n_layers: 1
    optimizer: Adam
    out_channels_l0: 32
    weight_decay: 0.07164263550489237



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-13 06:58:29,065][0m A new study created in RDB with name: K562_active_EP_vs_inactive_rest_CNN_1_2[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.11259920634920634


>>> ITERATION N. 2



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-13 07:23:56,412][0m Trial 0 finished with value: 0.21359955577802422 and parameters: {'n_layers': 3, 'out_channels_l0': 32, 'kernel_size_l0': 11, 'dropout_l0': 0, 'out_channels_l1': 96, 'kernel_size_l1': 5, 'dropout_l1': 0, 'out_channels_l2': 64, 'kernel_size_l2': 15, 'dropout_l2': 0.5, 'optimizer': 'Nadam', 'lr': 0.0006152383646310618, 'weight_decay': 0.0013348267490916406}. Best is trial 0 with value: 0.21359955577802422.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-13 08:29:10,640][0m Trial 1 finished with value: 0.11289392810579252 and parameters: {'n_layers': 3, 'out_channels_l0': 32, 'kernel_size_l0': 5, 'dropout_l0': 0.4, 'out_channels_l1': 96, 'kernel_size_l1': 11, 'dropout_l1': 0, 'out_channels_l2': 96, 'kernel_size_l2': 5, 'dropout_l2': 0.5, 'optimizer': 'RMSprop', 'lr': 0.008304608286272913, 'weight_decay': 0.013919619779953618}. Best is trial 0 with value: 0.21359955577802422.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-10-13 09:00:21,566][0m Trial 2 finished with value: 0.14548470335182118 and parameters: {'n_layers': 4, 'out_channels_l0': 32, 'kernel_size_l0': 5, 'dropout_l0': 0.2, 'out_channels_l1': 32, 'kernel_size_l1': 11, 'dropout_l1': 0, 'out_channels_l2': 128, 'kernel_size_l2': 5, 'dropout_l2': 0.5, 'out_channels_l3': 128, 'kernel_size_l3': 15, 'dropout_l3': 0, 'optimizer': 'Nadam', 'lr': 0.00011172519751208714, 'weight_decay': 0.06475810004060047}. Best is trial 0 with value: 0.21359955577802422.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.21359955577802422
  Params: 
    dropout_l0: 0
    dropout_l1: 0
    dropout_l2: 0.5
    kernel_size_l0: 11
    kernel_size_l1: 5
    kernel_size_l2: 15
    lr: 0.0006152383646310618
    n_layers: 3
    optimizer: Nadam
    out_channels_l0: 32
    out_channels_l1: 96
    out_channels_l2: 64
    weight_decay: 0.0013348267490916406



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-13 10:15:19,008][0m A new study created in RDB with name: K562_active_EP_vs_inactive_rest_CNN_1_2_3[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.14636821543639758


>>> ITERATION N. 3



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5


In [None]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [None]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)