## BIOINFORMATICS THESIS: MULTIMODAL NEURAL NETWORK

# CELL LINE: GM12878

In [1]:
import pandas as pd 
import numpy as np
import os

import torch
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
from collections import defaultdict, OrderedDict
import pickle

import sqlite3

from sqlalchemy import create_engine

device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [2]:
# create a database to store optuna studies with sqlite backend
#engine = create_engine('sqlite:///BIOINF_optuna_tuning.db')

In [3]:
from BIOINF_tesi.data_pipe import CELL_LINES, TASKS

In [4]:
cell_line = CELL_LINES[1]
cell_line

'GM12878'

---

In [5]:
from BIOINF_tesi.data_pipe import Load_Create_Task
from BIOINF_tesi.data_pipe import Build_DataLoader_Pipeline

In [6]:
from BIOINF_tesi.models import FFNN, CNN, CNN_LSTM
from BIOINF_tesi.models.utils import fit, Param_Search, Kfold_CV

In [7]:
with open ('results_dict.pickle', 'rb') as fin:
    results_dict = pickle.load(fin)
    results_dict = defaultdict(lambda: defaultdict(dict), results_dict)

## 1) ACTIVE ENHANCERS vs INACTIVE ENHANCERS

In [24]:
task = TASKS[0]
task

'active_E_vs_inactive_E'

In [25]:
pipe_data_load = Build_DataLoader_Pipeline(path_name=f'{task}.pickle')

Data Preprocessing Done!




### 1. FFNN

In [None]:
model=FFNN

In [None]:
#
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                task=task,
                sequence=True,
                augmentation=False,
                model = model,
                device = device,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP.pt',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST.pt')

In [None]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [None]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---
### 2. CNN

In [26]:
model=CNN

In [27]:
#
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                task=task,
                sequence=True,
                augmentation=False,
                model = model,
                device = device,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP.pt',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST.pt')

  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-09-26 09:25:31,390][0m Using an existing study with name 'GM12878_active_E_vs_inactive_E_CNN_1' instead of creating a new one.[0m


>>> ITERATION N. 1





Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-09-26 09:29:38,355][0m Trial 1 finished with value: 0.18154049295774646 and parameters: {'n_layers': 4, 'out_channels_l0': 16, 'kernel_size_l0': 11, 'dropout_l0': 0, 'out_channels_l1': 32, 'kernel_size_l1': 15, 'dropout_l1': 0.5, 'out_channels_l2': 128, 'kernel_size_l2': 11, 'dropout_l2': 0, 'out_channels_l3': 128, 'kernel_size_l3': 11, 'dropout_l3': 0.5, 'optimizer': 'Adam', 'lr': 0.003620538250625066, 'weight_decay': 0.0006352113063565966}. Best is trial 1 with value: 0.18154049295774646.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-09-26 09:34:22,621][0m Trial 2 finished with value: 0.1807922535211267 and parameters: {'n_layers': 1, 'out_channels_l0': 16, 'kernel_size_l0': 5, 'dropout_l0': 0.3, 'optimizer': 'Nadam', 'lr': 9.516489444855221e-05, 'weight_decay': 0.018836333255144593}. Best is trial 1 with value: 0.18154049295774646.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-09-26 09:40:00,216][0m Trial 3 finished with value: 0.20204364332168 and parameters: {'n_layers': 3, 'out_channels_l0': 32, 'kernel_size_l0': 5, 'dropout_l0': 0.4, 'out_channels_l1': 96, 'kernel_size_l1': 11, 'dropout_l1': 0.4, 'out_channels_l2': 96, 'kernel_size_l2': 15, 'dropout_l2': 0.4, 'optimizer': 'RMSprop', 'lr': 0.0034858584539264868, 'weight_decay': 0.0017502263520960584}. Best is trial 3 with value: 0.20204364332168.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  4
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.20204364332168
  Params: 
    dropout_l0: 0.4
    dropout_l1: 0.4
    dropout_l2: 0.4
    kernel_size_l0: 5
    kernel_size_l1: 11
    kernel_size_l2: 15
    lr: 0.0034858584539264868
    n_layers: 3
    optimizer: RMSprop
    out_channels_l0: 32
    out_channels_l1: 96
    out_channels_l2: 96
    weight_decay: 0.0017502263520960584



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-09-26 09:53:25,059][0m A new study created in RDB with name: GM12878_active_E_vs_inactive_E_CNN_1_2[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.18416087388282026


>>> ITERATION N. 2



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-09-26 10:01:11,028][0m Trial 0 finished with value: 0.17877640845070422 and parameters: {'n_layers': 2, 'out_channels_l0': 32, 'kernel_size_l0': 5, 'dropout_l0': 0.4, 'out_channels_l1': 96, 'kernel_size_l1': 15, 'dropout_l1': 0, 'optimizer': 'RMSprop', 'lr': 0.005641195635703693, 'weight_decay': 0.003109228035872607}. Best is trial 0 with value: 0.17877640845070422.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-09-26 10:05:15,596][0m Trial 1 finished with value: 0.1787764084507042 and parameters: {'n_layers': 1, 'out_channels_l0': 16, 'kernel_size_l0': 5, 'dropout_l0': 0.4, 'optimizer': 'RMSprop', 'lr': 0.012340287330937765, 'weight_decay': 0.0009651277759836191}. Best is trial 0 with value: 0.17877640845070422.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-09-26 10:11:12,806][0m Trial 2 finished with value: 0.1783274647887324 and parameters: {'n_layers': 3, 'out_channels_l0': 64, 'kernel_size_l0': 11, 'dropout_l0': 0.4, 'out_channels_l1': 32, 'kernel_size_l1': 11, 'dropout_l1': 0.4, 'out_channels_l2': 96, 'kernel_size_l2': 15, 'dropout_l2': 0.5, 'optimizer': 'Adam', 'lr': 0.0008430106761741273, 'weight_decay': 0.011086392771422218}. Best is trial 0 with value: 0.17877640845070422.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.17877640845070422
  Params: 
    dropout_l0: 0.4
    dropout_l1: 0
    kernel_size_l0: 5
    kernel_size_l1: 15
    lr: 0.005641195635703693
    n_layers: 2
    optimizer: RMSprop
    out_channels_l0: 32
    out_channels_l1: 96
    weight_decay: 0.003109228035872607



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-09-26 10:28:13,457][0m A new study created in RDB with name: GM12878_active_E_vs_inactive_E_CNN_1_2_3[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.18322989076464746


>>> ITERATION N. 3



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-09-26 10:33:17,596][0m Trial 0 finished with value: 0.19066207617437206 and parameters: {'n_layers': 3, 'out_channels_l0': 64, 'kernel_size_l0': 15, 'dropout_l0': 0.3, 'out_channels_l1': 96, 'kernel_size_l1': 5, 'dropout_l1': 0.4, 'out_channels_l2': 256, 'kernel_size_l2': 5, 'dropout_l2': 0.5, 'optimizer': 'Nadam', 'lr': 3.8209286858162034e-05, 'weight_decay': 0.0028556940507315793}. Best is trial 0 with value: 0.19066207617437206.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-09-26 10:38:34,216][0m Trial 1 finished with value: 0.17870332189947935 and parameters: {'n_layers': 1, 'out_channels_l0': 16, 'kernel_size_l0': 15, 'dropout_l0': 0, 'optimizer': 'Adam', 'lr': 6.013431346312665e-05, 'weight_decay': 0.008481235440714796}. Best is trial 0 with value: 0.19066207617437206.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-09-26 10:44:25,298][0m Trial 2 finished with value: 0.1784330985915493 and parameters: {'n_layers': 3, 'out_channels_l0': 16, 'kernel_size_l0': 11, 'dropout_l0': 0.2, 'out_channels_l1': 32, 'kernel_size_l1': 11, 'dropout_l1': 0.5, 'out_channels_l2': 128, 'kernel_size_l2': 11, 'dropout_l2': 0.5, 'optimizer': 'Adam', 'lr': 0.0007348912318582014, 'weight_decay': 0.008735226186127099}. Best is trial 0 with value: 0.19066207617437206.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.19066207617437206
  Params: 
    dropout_l0: 0.3
    dropout_l1: 0.4
    dropout_l2: 0.5
    kernel_size_l0: 15
    kernel_size_l1: 5
    kernel_size_l2: 5
    lr: 3.8209286858162034e-05
    n_layers: 3
    optimizer: Nadam
    out_channels_l0: 64
    out_channels_l1: 96
    out_channels_l2: 256
    weight_decay: 0.0028556940507315793



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.19850912851954647



3-FOLD CROSS-VALIDATION AUPRC TEST SCORE: 0.18863


In [28]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [29]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---
### 3. CNN-LSTM

In [None]:
model=CNN_LSTM

In [None]:
#
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                task=task,
                sequence=True,
                augmentation=False,
                model = model,
                device = device,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP.pt',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST.pt')

In [None]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [None]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---

## 2) ACTIVE PROMOTERS vs INACTIVE PROMOTERS

In [8]:
task = TASKS[1]
task

'active_P_vs_inactive_P'

In [9]:
pipe_data_load = Build_DataLoader_Pipeline(path_name=f'{task}.pickle')

Data Preprocessing Done!




---

### 1. FFNN

In [None]:
model=FFNN

In [None]:
#
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                task=task,
                sequence=True,
                augmentation=False,
                model = model,
                device = device,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP.pt',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST.pt')

In [None]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [None]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---
### 2. CNN

In [10]:
model=CNN

In [11]:
#
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                task=task,
                sequence=True,
                augmentation=False,
                model = model,
                device = device,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP.pt',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST.pt')

>>> ITERATION N. 1



  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-09-26 12:34:13,128][0m Using an existing study with name 'GM12878_active_P_vs_inactive_P_CNN_1' instead of creating a new one.[0m


Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.23328689851295875
  Params: 
    dropout_l0: 0
    dropout_l1: 0
    kernel_size_l0: 5
    kernel_size_l1: 15
    lr: 0.0003918369167353946
    n_layers: 2
    optimizer: Adam
    out_channels_l0: 16
    out_channels_l1: 96
    weight_decay: 0.0024062105922524253



  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-09-26 12:34:16,190][0m Using an existing study with name 'GM12878_active_P_vs_inactive_P_CNN_1_2' instead of creating a new one.[0m


AUPRC test score: 0.19516851187817139


>>> ITERATION N. 2

Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.1425107994776207
  Params: 
    dropout_l0: 0.4
    kernel_size_l0: 11
    lr: 0.03934232737694134
    n_layers: 1
    optimizer: RMSprop
    out_channels_l0: 64
    weight_decay: 0.0681414364346004



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

  return torch.max_pool1d(input, kernel_size, stride, padding, dilation, ceil_mode)


EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-09-26 12:44:20,923][0m Using an existing study with name 'GM12878_active_P_vs_inactive_P_CNN_1_2_3' instead of creating a new one.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.14229137469741376


>>> ITERATION N. 3



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

	add(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add(Tensor other, *, Number alpha) (Triggered internally at  /opt/conda/conda-bld/pytorch_1623448265233/work/torch/csrc/utils/python_arg_parser.cpp:1025.)
  grad = grad.add(group['weight_decay'], p.data)


EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-09-26 12:52:13,130][0m Trial 3 finished with value: 0.2184558445735339 and parameters: {'n_layers': 2, 'out_channels_l0': 64, 'kernel_size_l0': 15, 'dropout_l0': 0.3, 'out_channels_l1': 96, 'kernel_size_l1': 11, 'dropout_l1': 0, 'optimizer': 'Nadam', 'lr': 6.786700880251181e-05, 'weight_decay': 0.0028624813824748765}. Best is trial 3 with value: 0.2184558445735339.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-09-26 12:58:44,237][0m Trial 4 finished with value: 0.14231222218617176 and parameters: {'n_layers': 1, 'out_channels_l0': 64, 'kernel_size_l0': 15, 'dropout_l0': 0.2, 'optimizer': 'RMSprop', 'lr': 1.0279799897007344e-05, 'weight_decay': 0.05606120672463917}. Best is trial 3 with value: 0.2184558445735339.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  5
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.2184558445735339
  Params: 
    dropout_l0: 0.3
    dropout_l1: 0
    kernel_size_l0: 15
    kernel_size_l1: 11
    lr: 6.786700880251181e-05
    n_layers: 2
    optimizer: Nadam
    out_channels_l0: 64
    out_channels_l1: 96
    weight_decay: 0.0028624813824748765



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.1770272497556328



3-FOLD CROSS-VALIDATION AUPRC TEST SCORE: 0.1715


In [12]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [13]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---
### 3. CNN-LSTM

In [None]:
model=CNN_LSTM

In [None]:
#
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                task=task,
                sequence=True,
                augmentation=False,
                model = model,
                device = device,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP.pt',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST.pt')

In [None]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [None]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---

## 3) ACTIVE ENHANCERS vs ACTIVE PROMOTERS

In [14]:
task = TASKS[2]
task

'active_E_vs_active_P'

In [15]:
pipe_data_load = Build_DataLoader_Pipeline(path_name=f'{task}.pickle')

Data Preprocessing Done!




---
### 1. FFNN

In [None]:
model=FFNN

In [None]:
#
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                task=task,
                sequence=True,
                augmentation=False,
                model = model,
                device = device,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP.pt',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST.pt')

In [None]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [None]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---
### 2. CNN

In [16]:
model=CNN

In [17]:
#
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                task=task,
                sequence=True,
                augmentation=False,
                model = model,
                device = device,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP.pt',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST.pt')

  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-09-26 13:10:37,197][0m A new study created in RDB with name: GM12878_active_E_vs_active_P_CNN_1[0m


>>> ITERATION N. 1



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-09-26 13:12:19,297][0m Trial 0 finished with value: 0.8016463167087442 and parameters: {'n_layers': 2, 'out_channels_l0': 64, 'kernel_size_l0': 5, 'dropout_l0': 0.3, 'out_channels_l1': 96, 'kernel_size_l1': 5, 'dropout_l1': 0.4, 'optimizer': 'RMSprop', 'lr': 8.284013934538835e-05, 'weight_decay': 0.026601102251335884}. Best is trial 0 with value: 0.8016463167087442.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-09-26 13:14:42,373][0m Trial 1 finished with value: 0.7830679324884406 and parameters: {'n_layers': 1, 'out_channels_l0': 16, 'kernel_size_l0': 11, 'dropout_l0': 0, 'optimizer': 'Nadam', 'lr': 4.027275907902912e-05, 'weight_decay': 0.018361898091728432}. Best is trial 0 with value: 0.8016463167087442.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-09-26 13:16:15,667][0m Trial 2 finished with value: 0.8021072999764748 and parameters: {'n_layers': 2, 'out_channels_l0': 16, 'kernel_size_l0': 5, 'dropout_l0': 0, 'out_channels_l1': 64, 'kernel_size_l1': 5, 'dropout_l1': 0.5, 'optimizer': 'Adam', 'lr': 0.000293837976787035, 'weight_decay': 0.0007523458608222387}. Best is trial 2 with value: 0.8021072999764748.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.8021072999764748
  Params: 
    dropout_l0: 0
    dropout_l1: 0.5
    kernel_size_l0: 5
    kernel_size_l1: 5
    lr: 0.000293837976787035
    n_layers: 2
    optimizer: Adam
    out_channels_l0: 16
    out_channels_l1: 64
    weight_decay: 0.0007523458608222387



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-09-26 13:18:39,751][0m A new study created in RDB with name: GM12878_active_E_vs_active_P_CNN_1_2[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.7847216146345877


>>> ITERATION N. 2



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-09-26 13:22:28,505][0m Trial 0 finished with value: 0.5188679451215653 and parameters: {'n_layers': 3, 'out_channels_l0': 64, 'kernel_size_l0': 15, 'dropout_l0': 0, 'out_channels_l1': 32, 'kernel_size_l1': 15, 'dropout_l1': 0, 'out_channels_l2': 128, 'kernel_size_l2': 5, 'dropout_l2': 0.4, 'optimizer': 'Adam', 'lr': 0.00019572063912358517, 'weight_decay': 0.018564376938151054}. Best is trial 0 with value: 0.5188679451215653.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-09-26 13:24:02,269][0m Trial 1 finished with value: 0.7814930797904298 and parameters: {'n_layers': 1, 'out_channels_l0': 32, 'kernel_size_l0': 15, 'dropout_l0': 0, 'optimizer': 'RMSprop', 'lr': 0.00025289294062975416, 'weight_decay': 0.00032769562683364236}. Best is trial 1 with value: 0.7814930797904298.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-09-26 13:25:29,496][0m Trial 2 finished with value: 0.7872208280727409 and parameters: {'n_layers': 2, 'out_channels_l0': 16, 'kernel_size_l0': 5, 'dropout_l0': 0, 'out_channels_l1': 32, 'kernel_size_l1': 11, 'dropout_l1': 0.5, 'optimizer': 'Adam', 'lr': 4.388791534364639e-05, 'weight_decay': 0.00043916925463086446}. Best is trial 2 with value: 0.7872208280727409.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.7872208280727409
  Params: 
    dropout_l0: 0
    dropout_l1: 0.5
    kernel_size_l0: 5
    kernel_size_l1: 11
    lr: 4.388791534364639e-05
    n_layers: 2
    optimizer: Adam
    out_channels_l0: 16
    out_channels_l1: 32
    weight_decay: 0.00043916925463086446



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-09-26 13:27:43,310][0m A new study created in RDB with name: GM12878_active_E_vs_active_P_CNN_1_2_3[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.7755393699714217


>>> ITERATION N. 3



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-09-26 13:29:21,415][0m Trial 0 finished with value: 0.765341780854677 and parameters: {'n_layers': 1, 'out_channels_l0': 64, 'kernel_size_l0': 5, 'dropout_l0': 0.4, 'optimizer': 'Adam', 'lr': 0.04079433085974168, 'weight_decay': 0.0033503542179001058}. Best is trial 0 with value: 0.765341780854677.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-09-26 13:31:13,796][0m Trial 1 finished with value: 0.7883792650335985 and parameters: {'n_layers': 2, 'out_channels_l0': 16, 'kernel_size_l0': 11, 'dropout_l0': 0, 'out_channels_l1': 96, 'kernel_size_l1': 5, 'dropout_l1': 0.4, 'optimizer': 'Nadam', 'lr': 0.0013156047373445202, 'weight_decay': 0.0004521556064895734}. Best is trial 1 with value: 0.7883792650335985.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-09-26 13:32:55,790][0m Trial 2 finished with value: 0.6688784661538385 and parameters: {'n_layers': 4, 'out_channels_l0': 16, 'kernel_size_l0': 5, 'dropout_l0': 0, 'out_channels_l1': 32, 'kernel_size_l1': 11, 'dropout_l1': 0.4, 'out_channels_l2': 96, 'kernel_size_l2': 11, 'dropout_l2': 0.4, 'out_channels_l3': 256, 'kernel_size_l3': 11, 'dropout_l3': 0.4, 'optimizer': 'Nadam', 'lr': 0.03836269143385874, 'weight_decay': 0.03848120841299475}. Best is trial 1 with value: 0.7883792650335985.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.7883792650335985
  Params: 
    dropout_l0: 0
    dropout_l1: 0.4
    kernel_size_l0: 11
    kernel_size_l1: 5
    lr: 0.0013156047373445202
    n_layers: 2
    optimizer: Nadam
    out_channels_l0: 16
    out_channels_l1: 96
    weight_decay: 0.0004521556064895734



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.8010480755346968



3-FOLD CROSS-VALIDATION AUPRC TEST SCORE: 0.7871


In [18]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [19]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---
### 3. CNN-LSTM

In [None]:
model=CNN_LSTM

In [None]:
#
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                task=task,
                sequence=True,
                augmentation=False,
                model = model,
                device = device,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP.pt',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST.pt')

In [None]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [None]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---

## 4) INACTIVE ENHANCERS vs INACTIVE PROMOTERS

In [20]:
task = TASKS[3]
task

'inactive_E_vs_inactive_P'

In [21]:
pipe_data_load = Build_DataLoader_Pipeline(path_name=f'{task}.pickle')

Data Preprocessing Done!




---
### 1. FFNN

In [None]:
model=FFNN

In [None]:
#
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                task=task,
                sequence=True,
                augmentation=False,
                model = model,
                device = device,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP.pt',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST.pt')

In [None]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [None]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---
### 2. CNN

In [22]:
model=CNN

In [23]:
#
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                task=task,
                sequence=True,
                augmentation=False,
                model = model,
                device = device,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP.pt',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST.pt')

  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-09-26 13:35:49,377][0m A new study created in RDB with name: GM12878_inactive_E_vs_inactive_P_CNN_1[0m


>>> ITERATION N. 1



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-09-26 14:04:49,339][0m Trial 0 finished with value: 0.37539017627252913 and parameters: {'n_layers': 3, 'out_channels_l0': 16, 'kernel_size_l0': 5, 'dropout_l0': 0, 'out_channels_l1': 96, 'kernel_size_l1': 5, 'dropout_l1': 0.5, 'out_channels_l2': 256, 'kernel_size_l2': 11, 'dropout_l2': 0.4, 'optimizer': 'Adam', 'lr': 0.016896784898099627, 'weight_decay': 0.00021530557262608728}. Best is trial 0 with value: 0.37539017627252913.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-09-26 14:24:02,470][0m Trial 1 finished with value: 0.5708898764062255 and parameters: {'n_layers': 2, 'out_channels_l0': 64, 'kernel_size_l0': 15, 'dropout_l0': 0, 'out_channels_l1': 96, 'kernel_size_l1': 15, 'dropout_l1': 0.5, 'optimizer': 'Nadam', 'lr': 0.0007074639609222145, 'weight_decay': 0.002380135119353916}. Best is trial 1 with value: 0.5708898764062255.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-09-26 14:46:36,459][0m Trial 2 finished with value: 0.5142394687728905 and parameters: {'n_layers': 3, 'out_channels_l0': 64, 'kernel_size_l0': 5, 'dropout_l0': 0.2, 'out_channels_l1': 96, 'kernel_size_l1': 15, 'dropout_l1': 0.5, 'out_channels_l2': 64, 'kernel_size_l2': 15, 'dropout_l2': 0.5, 'optimizer': 'Adam', 'lr': 0.015671867619450233, 'weight_decay': 0.0002536250410265197}. Best is trial 1 with value: 0.5708898764062255.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.5708898764062255
  Params: 
    dropout_l0: 0
    dropout_l1: 0.5
    kernel_size_l0: 15
    kernel_size_l1: 15
    lr: 0.0007074639609222145
    n_layers: 2
    optimizer: Nadam
    out_channels_l0: 64
    out_channels_l1: 96
    weight_decay: 0.002380135119353916



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-09-26 15:03:55,333][0m A new study created in RDB with name: GM12878_inactive_E_vs_inactive_P_CNN_1_2[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.5750354350343014


>>> ITERATION N. 2



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-09-26 15:23:01,151][0m Trial 0 finished with value: 0.5625082728866406 and parameters: {'n_layers': 2, 'out_channels_l0': 64, 'kernel_size_l0': 11, 'dropout_l0': 0.3, 'out_channels_l1': 64, 'kernel_size_l1': 15, 'dropout_l1': 0, 'optimizer': 'Adam', 'lr': 0.001062526989212738, 'weight_decay': 0.0026120315931074706}. Best is trial 0 with value: 0.5625082728866406.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-09-26 15:53:13,802][0m Trial 1 finished with value: 0.5003336707214805 and parameters: {'n_layers': 4, 'out_channels_l0': 32, 'kernel_size_l0': 11, 'dropout_l0': 0.2, 'out_channels_l1': 96, 'kernel_size_l1': 11, 'dropout_l1': 0.4, 'out_channels_l2': 64, 'kernel_size_l2': 11, 'dropout_l2': 0.5, 'out_channels_l3': 256, 'kernel_size_l3': 11, 'dropout_l3': 0.5, 'optimizer': 'Nadam', 'lr': 0.0065713330995416175, 'weight_decay': 0.00402359933560874}. Best is trial 0 with value: 0.5625082728866406.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-09-26 16:01:24,107][0m Trial 2 finished with value: 0.374213705684294 and parameters: {'n_layers': 1, 'out_channels_l0': 32, 'kernel_size_l0': 11, 'dropout_l0': 0.3, 'optimizer': 'Nadam', 'lr': 0.019546808397553045, 'weight_decay': 0.0334997432787113}. Best is trial 0 with value: 0.5625082728866406.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.5625082728866406
  Params: 
    dropout_l0: 0.3
    dropout_l1: 0
    kernel_size_l0: 11
    kernel_size_l1: 15
    lr: 0.001062526989212738
    n_layers: 2
    optimizer: Adam
    out_channels_l0: 64
    out_channels_l1: 64
    weight_decay: 0.0026120315931074706



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-09-26 16:17:03,308][0m A new study created in RDB with name: GM12878_inactive_E_vs_inactive_P_CNN_1_2_3[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.5817020531118744


>>> ITERATION N. 3



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-09-26 16:26:02,332][0m Trial 0 finished with value: 0.5253359181322571 and parameters: {'n_layers': 1, 'out_channels_l0': 64, 'kernel_size_l0': 11, 'dropout_l0': 0.4, 'optimizer': 'Nadam', 'lr': 0.0005703235107371743, 'weight_decay': 0.07418081825077961}. Best is trial 0 with value: 0.5253359181322571.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-09-26 16:34:24,572][0m Trial 1 finished with value: 0.3741879580114875 and parameters: {'n_layers': 3, 'out_channels_l0': 64, 'kernel_size_l0': 15, 'dropout_l0': 0, 'out_channels_l1': 32, 'kernel_size_l1': 5, 'dropout_l1': 0.4, 'out_channels_l2': 64, 'kernel_size_l2': 5, 'dropout_l2': 0, 'optimizer': 'RMSprop', 'lr': 0.05819507028346397, 'weight_decay': 0.0017153912065525987}. Best is trial 0 with value: 0.5253359181322571.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-09-26 16:43:20,931][0m Trial 2 finished with value: 0.5610394010746764 and parameters: {'n_layers': 1, 'out_channels_l0': 64, 'kernel_size_l0': 11, 'dropout_l0': 0.3, 'optimizer': 'Adam', 'lr': 0.00028358098986135057, 'weight_decay': 0.003479446096484148}. Best is trial 2 with value: 0.5610394010746764.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.5610394010746764
  Params: 
    dropout_l0: 0.3
    kernel_size_l0: 11
    lr: 0.00028358098986135057
    n_layers: 1
    optimizer: Adam
    out_channels_l0: 64
    weight_decay: 0.003479446096484148



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.5621335853588748



3-FOLD CROSS-VALIDATION AUPRC TEST SCORE: 0.57296


In [24]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [25]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---
### 3. CNN-LSTM

In [None]:
model=CNN_LSTM

In [None]:
#
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                task=task,
                sequence=True,
                augmentation=False,
                model = model,
                device = device,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP.pt',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST.pt')

In [None]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [None]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---

## 5) ACTIVE ENHANCERS + ACTIVE PROMOTERS vs INACTIVE REST

In [26]:
task = TASKS[4]
task

'active_EP_vs_inactive_rest'

In [27]:
pipe_data_load = Build_DataLoader_Pipeline(path_name=f'{task}.pickle')

Data Preprocessing Done!




---
### 1. FFNN

In [None]:
#
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                task=task,
                sequence=True,
                augmentation=False,
                model = model,
                device = device,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP.pt',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST.pt')

In [None]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [None]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---
### 2. CNN

In [28]:
model=CNN

In [29]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                task=task,
                sequence=True,
                augmentation=False,
                model = model,
                device = device,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP.pt',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST.pt')

  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-09-26 16:56:54,146][0m A new study created in RDB with name: GM12878_active_EP_vs_inactive_rest_CNN_1[0m


>>> ITERATION N. 1



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-09-26 17:12:46,873][0m Trial 0 finished with value: 0.15401424846340098 and parameters: {'n_layers': 4, 'out_channels_l0': 16, 'kernel_size_l0': 5, 'dropout_l0': 0.4, 'out_channels_l1': 32, 'kernel_size_l1': 5, 'dropout_l1': 0, 'out_channels_l2': 64, 'kernel_size_l2': 15, 'dropout_l2': 0, 'out_channels_l3': 512, 'kernel_size_l3': 11, 'dropout_l3': 0, 'optimizer': 'RMSprop', 'lr': 0.07614649264012976, 'weight_decay': 0.016770687837481448}. Best is trial 0 with value: 0.15401424846340098.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-09-26 17:23:33,116][0m Trial 1 finished with value: 0.15407990314769976 and parameters: {'n_layers': 1, 'out_channels_l0': 64, 'kernel_size_l0': 15, 'dropout_l0': 0.4, 'optimizer': 'Adam', 'lr': 0.06132636725602765, 'weight_decay': 0.0006738092403381138}. Best is trial 1 with value: 0.15407990314769976.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-09-26 18:10:04,010][0m Trial 2 finished with value: 0.1541455578319986 and parameters: {'n_layers': 4, 'out_channels_l0': 64, 'kernel_size_l0': 15, 'dropout_l0': 0.3, 'out_channels_l1': 32, 'kernel_size_l1': 15, 'dropout_l1': 0, 'out_channels_l2': 64, 'kernel_size_l2': 5, 'dropout_l2': 0.5, 'out_channels_l3': 256, 'kernel_size_l3': 15, 'dropout_l3': 0.4, 'optimizer': 'Adam', 'lr': 0.0009015763766101864, 'weight_decay': 0.00782725696761916}. Best is trial 2 with value: 0.1541455578319986.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.1541455578319986
  Params: 
    dropout_l0: 0.3
    dropout_l1: 0
    dropout_l2: 0.5
    dropout_l3: 0.4
    kernel_size_l0: 15
    kernel_size_l1: 15
    kernel_size_l2: 5
    kernel_size_l3: 15
    lr: 0.0009015763766101864
    n_layers: 4
    optimizer: Adam
    out_channels_l0: 64
    out_channels_l1: 32
    out_channels_l2: 64
    out_channels_l3: 256
    weight_decay: 0.00782725696761916



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-09-26 19:08:45,968][0m A new study created in RDB with name: GM12878_active_EP_vs_inactive_rest_CNN_1_2[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.15991752256458144


>>> ITERATION N. 2



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-09-26 19:26:15,084][0m Trial 0 finished with value: 0.17794942998099741 and parameters: {'n_layers': 4, 'out_channels_l0': 32, 'kernel_size_l0': 11, 'dropout_l0': 0, 'out_channels_l1': 96, 'kernel_size_l1': 11, 'dropout_l1': 0.5, 'out_channels_l2': 64, 'kernel_size_l2': 15, 'dropout_l2': 0.5, 'out_channels_l3': 512, 'kernel_size_l3': 11, 'dropout_l3': 0.5, 'optimizer': 'Nadam', 'lr': 1.095411157762299e-05, 'weight_decay': 0.0018879711289278907}. Best is trial 0 with value: 0.17794942998099741.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-09-26 19:44:52,309][0m Trial 1 finished with value: 0.15683224013732494 and parameters: {'n_layers': 3, 'out_channels_l0': 16, 'kernel_size_l0': 15, 'dropout_l0': 0.3, 'out_channels_l1': 32, 'kernel_size_l1': 15, 'dropout_l1': 0.4, 'out_channels_l2': 96, 'kernel_size_l2': 15, 'dropout_l2': 0.5, 'optimizer': 'RMSprop', 'lr': 0.00030836517744387335, 'weight_decay': 0.00934963787730285}. Best is trial 0 with value: 0.17794942998099741.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-09-26 19:59:53,234][0m Trial 2 finished with value: 0.20925968035519338 and parameters: {'n_layers': 3, 'out_channels_l0': 32, 'kernel_size_l0': 11, 'dropout_l0': 0.4, 'out_channels_l1': 32, 'kernel_size_l1': 15, 'dropout_l1': 0.5, 'out_channels_l2': 256, 'kernel_size_l2': 5, 'dropout_l2': 0.5, 'optimizer': 'Adam', 'lr': 0.0006293456331054727, 'weight_decay': 0.00015247203109346937}. Best is trial 2 with value: 0.20925968035519338.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.20925968035519338
  Params: 
    dropout_l0: 0.4
    dropout_l1: 0.5
    dropout_l2: 0.5
    kernel_size_l0: 11
    kernel_size_l1: 15
    kernel_size_l2: 5
    lr: 0.0006293456331054727
    n_layers: 3
    optimizer: Adam
    out_channels_l0: 32
    out_channels_l1: 32
    out_channels_l2: 256
    weight_decay: 0.00015247203109346937



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-09-26 20:16:50,708][0m A new study created in RDB with name: GM12878_active_EP_vs_inactive_rest_CNN_1_2_3[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.21303772802323795


>>> ITERATION N. 3



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-09-26 20:26:42,325][0m Trial 0 finished with value: 0.1589260408778612 and parameters: {'n_layers': 1, 'out_channels_l0': 32, 'kernel_size_l0': 15, 'dropout_l0': 0, 'optimizer': 'Adam', 'lr': 4.602828429268402e-05, 'weight_decay': 0.004852194149624126}. Best is trial 0 with value: 0.1589260408778612.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-09-26 20:46:57,365][0m Trial 1 finished with value: 0.15669413919413921 and parameters: {'n_layers': 3, 'out_channels_l0': 32, 'kernel_size_l0': 5, 'dropout_l0': 0.2, 'out_channels_l1': 32, 'kernel_size_l1': 15, 'dropout_l1': 0, 'out_channels_l2': 96, 'kernel_size_l2': 5, 'dropout_l2': 0.5, 'optimizer': 'RMSprop', 'lr': 0.00883177488375565, 'weight_decay': 0.008498226884993496}. Best is trial 0 with value: 0.1589260408778612.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training


Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5


[32m[I 2021-09-26 21:08:03,224][0m Trial 2 finished with value: 0.15663003663003663 and parameters: {'n_layers': 1, 'out_channels_l0': 64, 'kernel_size_l0': 15, 'dropout_l0': 0, 'optimizer': 'RMSprop', 'lr': 0.034224488538173614, 'weight_decay': 0.0005992973379482369}. Best is trial 0 with value: 0.1589260408778612.[0m


EarlyStopping counter: 5 out of 5
Early stopping the training
Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.1589260408778612
  Params: 
    dropout_l0: 0
    kernel_size_l0: 15
    lr: 4.602828429268402e-05
    n_layers: 1
    optimizer: Adam
    out_channels_l0: 32
    weight_decay: 0.004852194149624126



Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 1 out of 5
EarlyStopping counter: 2 out of 5
EarlyStopping counter: 3 out of 5
EarlyStopping counter: 4 out of 5
EarlyStopping counter: 5 out of 5
Early stopping the training
AUPRC test score: 0.17553326322046425



3-FOLD CROSS-VALIDATION AUPRC TEST SCORE: 0.18283


In [30]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [31]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---
### 3. CNN-LSTM

In [None]:
model=CNN_LSTM

In [None]:
#
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                task=task,
                sequence=True,
                augmentation=False,
                model = model,
                device = device,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                hp_model_path = f'{cell_line}_{task}_{model.__name__}_HP.pt',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST.pt')

In [None]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [None]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---