## BIOINFORMATICS THESIS: MULTIMODAL NEURAL NETWORK

# CELL LINE: MCF7

In [1]:
import pandas as pd 
import numpy as np
import os

import torch
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
from collections import defaultdict, OrderedDict
import pickle

import sqlite3
from sqlalchemy import create_engine

device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [2]:
# create a database to store optuna studies with sqlite backend
#engine = create_engine('sqlite:///BIOINF_optuna_tuning.db')

In [3]:
from BIOINF_tesi.data_pipe import CELL_LINES, TASKS

In [4]:
cell_line = CELL_LINES[6]
cell_line

'MCF7'

---

In [5]:
from BIOINF_tesi.data_pipe import Load_Create_Task
from BIOINF_tesi.data_pipe import Build_DataLoader_Pipeline

In [6]:
from BIOINF_tesi.models import FFNN, CNN, CNN_LSTM
from BIOINF_tesi.models.utils import fit, Param_Search, Kfold_CV

In [7]:
with open ('results_dict.pickle', 'rb') as fin:
    results_dict = pickle.load(fin)
    results_dict = defaultdict(lambda: defaultdict(dict), results_dict)

## 1) ACTIVE ENHANCERS vs INACTIVE ENHANCERS

In [7]:
task = TASKS[0]
task

'active_E_vs_inactive_E'

In [105]:
pipe_data_load = Build_DataLoader_Pipeline(path_name=f'{task}.pickle')

Data Preprocessing Done!


### 1. FFNN

In [8]:
model=FFNN

In [11]:
# IMBALANCED
type_augm_genfeatures = 'smote'

kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=False,
                type_augm_genfeatures=type_augm_genfeatures,
                model = model,
                device = device,
                task=task,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}_{type_augm_genfeatures}',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_{type_augm_genfeatures}_TEST')

>>> ITERATION N. 1



  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-11-04 07:13:55,806][0m Using an existing study with name 'MCF7_active_E_vs_inactive_E_FFNN_smote_1' instead of creating a new one.[0m


Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.15105952227404468
  Params: 
    dropout_l0: 0.2
    dropout_l1: 0.2
    lr: 0.0002261826417803801
    n_layers: 2
    n_units_l0: 64
    n_units_l1: 128
    optimizer: Adam
    weight_decay: 0.0011446014499368682

AUPRC test score: 0.15548495483851843


>>> ITERATION N. 2



  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-11-04 07:13:56,842][0m Using an existing study with name 'MCF7_active_E_vs_inactive_E_FFNN_smote_1_2' instead of creating a new one.[0m


Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.1544891709064542
  Params: 
    dropout_l0: 0.3
    dropout_l1: 0.4
    lr: 0.0013756301518166312
    n_layers: 2
    n_units_l0: 32
    n_units_l1: 16
    optimizer: Nadam
    weight_decay: 0.0008685118385636113

AUPRC test score: 0.1456963533178596


>>> ITERATION N. 3



  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-11-04 07:13:57,598][0m Using an existing study with name 'MCF7_active_E_vs_inactive_E_FFNN_smote_1_2_3' instead of creating a new one.[0m


Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.15029712214440238
  Params: 
    dropout_l0: 0.2
    dropout_l1: 0.0
    dropout_l2: 0.4
    dropout_l3: 0.0
    lr: 9.359342633984355e-05
    n_layers: 4
    n_units_l0: 32
    n_units_l1: 128
    n_units_l2: 64
    n_units_l3: 32
    optimizer: Nadam
    weight_decay: 0.0016848748756725018

AUPRC test score: 0.15152583438855838



3-FOLD CROSS-VALIDATION AUPRC TEST SCORE: 0.1509


In [12]:
results_dict[cell_line][task][f'{model.__name__}_{type_augm_genfeatures}'] = kf_CV.scores_dict

In [13]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

In [14]:
# IMBALANCED
type_augm_genfeatures = 'double'

kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=False,
                type_augm_genfeatures=type_augm_genfeatures,
                model = model,
                device = device,
                task=task,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}_{type_augm_genfeatures}',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_{type_augm_genfeatures}_TEST')

>>> ITERATION N. 1



  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-11-04 07:13:58,318][0m Using an existing study with name 'MCF7_active_E_vs_inactive_E_FFNN_double_1' instead of creating a new one.[0m


Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-11-04 07:13:58,572][0m Using an existing study with name 'MCF7_active_E_vs_inactive_E_FFNN_double_1_2' instead of creating a new one.[0m


  Value:  0.1576736581867642
  Params: 
    dropout_l0: 0.0
    dropout_l1: 0.2
    lr: 0.0038018402646063957
    n_layers: 2
    n_units_l0: 256
    n_units_l1: 16
    optimizer: RMSprop
    weight_decay: 0.00046009552972165217

AUPRC test score: 0.14471923088839636


>>> ITERATION N. 2

Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.16339572413443337
  Params: 
    dropout_l0: 0.0
    dropout_l1: 0.4
    lr: 8.281068161130383e-05
    n_layers: 2
    n_units_l0: 128
    n_units_l1: 64
    optimizer: RMSprop
    weight_decay: 0.001985406384546921

AUPRC test score: 0.14790680956119268


>>> ITERATION N. 3



  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-11-04 07:13:58,921][0m Using an existing study with name 'MCF7_active_E_vs_inactive_E_FFNN_double_1_2_3' instead of creating a new one.[0m


Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.15280886350443357
  Params: 
    dropout_l0: 0.3
    lr: 0.00012002179769812661
    n_layers: 1
    n_units_l0: 32
    optimizer: Adam
    weight_decay: 0.0001981619055043867

AUPRC test score: 0.15541055178000965



3-FOLD CROSS-VALIDATION AUPRC TEST SCORE: 0.14935


In [15]:
results_dict[cell_line][task][f'{model.__name__}_{type_augm_genfeatures}'] = kf_CV.scores_dict

In [16]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---
### 2. CNN

In [36]:
model = CNN

In [11]:
# IMBALANCED
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=True,
                model = model,
                device = device,
                task=task,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST')

>>> ITERATION N. 1



  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-29 09:46:24,229][0m Using an existing study with name 'MCF7_active_E_vs_inactive_E_CNN_1' instead of creating a new one.[0m


Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.08547794118063562
  Params: 
    dropout_l0: 0
    dropout_l1: 0.4
    dropout_l2: 0
    dropout_l3: 0.5
    kernel_size_l0: 15
    kernel_size_l1: 15
    kernel_size_l2: 15
    kernel_size_l3: 11
    lr: 0.012744302809199555
    n_layers: 4
    optimizer: Adam
    out_channels_l0: 16
    out_channels_l1: 32
    out_channels_l2: 64
    out_channels_l3: 512
    weight_decay: 0.006130627132113038

AUPRC test score: 0.08033931129889728


>>> ITERATION N. 2



  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-29 09:46:28,833][0m Using an existing study with name 'MCF7_active_E_vs_inactive_E_CNN_1_2' instead of creating a new one.[0m


Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.09607757256404395
  Params: 
    dropout_l0: 0.2
    kernel_size_l0: 11
    lr: 2.3874988795464207e-05
    n_layers: 1
    optimizer: RMSprop
    out_channels_l0: 64
    weight_decay: 0.001098554791895839

AUPRC test score: 0.09077200522516284




  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-29 09:46:30,898][0m Using an existing study with name 'MCF7_active_E_vs_inactive_E_CNN_1_2_3' instead of creating a new one.[0m


>>> ITERATION N. 3

Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.0987738779836167
  Params: 
    dropout_l0: 0.2
    dropout_l1: 0.5
    kernel_size_l0: 5
    kernel_size_l1: 5
    lr: 0.00039748878628171306
    n_layers: 2
    optimizer: RMSprop
    out_channels_l0: 16
    out_channels_l1: 32
    weight_decay: 0.0005269354997232535

AUPRC test score: 0.09213097601349199



3-FOLD CROSS-VALIDATION AUPRC TEST SCORE: 0.08775


In [99]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [100]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---

## 2) ACTIVE PROMOTERS vs INACTIVE PROMOTERS

In [40]:
task = TASKS[1]
task

'active_P_vs_inactive_P'

In [14]:
pipe_data_load = Build_DataLoader_Pipeline(path_name=f'{task}.pickle')

Data Preprocessing Done!


---

### 1. FFNN

In [15]:
model=FFNN

In [20]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=False,
                model = model,
                device = device,
                task=task,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST')

  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-11-04 07:14:04,885][0m Using an existing study with name 'MCF7_active_P_vs_inactive_P_FFNN_1' instead of creating a new one.[0m


>>> ITERATION N. 1



  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-11-04 07:14:05,328][0m Using an existing study with name 'MCF7_active_P_vs_inactive_P_FFNN_1_2' instead of creating a new one.[0m


Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.315432743699675
  Params: 
    dropout_l0: 0.2
    lr: 0.0004290449378091921
    n_layers: 1
    n_units_l0: 128
    optimizer: RMSprop
    weight_decay: 0.000217485962025295

AUPRC test score: 0.3212731386877567


>>> ITERATION N. 2



  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-11-04 07:14:05,569][0m Using an existing study with name 'MCF7_active_P_vs_inactive_P_FFNN_1_2_3' instead of creating a new one.[0m


Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.31686605717601846
  Params: 
    dropout_l0: 0.4
    dropout_l1: 0.4
    lr: 0.00039297884489198346
    n_layers: 2
    n_units_l0: 32
    n_units_l1: 32
    optimizer: Nadam
    weight_decay: 0.0065561654406609444

AUPRC test score: 0.3046814267664064


>>> ITERATION N. 3

Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.32157386558192863
  Params: 
    dropout_l0: 0.0
    dropout_l1: 0.0
    dropout_l2: 0.5
    lr: 0.00025515577544799907
    n_layers: 3
    n_units_l0: 64
    n_units_l1: 128
    n_units_l2: 64
    optimizer: Adam
    weight_decay: 0.0002717294449781887

AUPRC test score: 0.3345455609308332



3-FOLD CROSS-VALIDATION AUPRC TEST SCORE: 0.32017


In [21]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [23]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---
### 2. CNN

In [42]:
model=CNN

In [17]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=True,
                model = model,
                device = device,
                task=task,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST')

  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-29 09:46:38,905][0m Using an existing study with name 'MCF7_active_P_vs_inactive_P_CNN_1' instead of creating a new one.[0m


>>> ITERATION N. 1

Study statistics: 
  Number of finished trials:  4
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.16209697188152564
  Params: 
    dropout_l0: 0.2
    dropout_l1: 0.4
    dropout_l2: 0.5
    dropout_l3: 0.4
    kernel_size_l0: 5
    kernel_size_l1: 11
    kernel_size_l2: 15
    kernel_size_l3: 5
    lr: 0.00011120872312599924
    n_layers: 4
    optimizer: Adam
    out_channels_l0: 64
    out_channels_l1: 32
    out_channels_l2: 128
    out_channels_l3: 256
    weight_decay: 0.08402904807729342

AUPRC test score: 0.1999078454249038


>>> ITERATION N. 2



  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-29 09:46:39,925][0m Using an existing study with name 'MCF7_active_P_vs_inactive_P_CNN_1_2' instead of creating a new one.[0m


Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.2285222689301424
  Params: 
    dropout_l0: 0.3
    kernel_size_l0: 15
    lr: 0.0002798100506003284
    n_layers: 1
    optimizer: Nadam
    out_channels_l0: 16
    weight_decay: 0.08276571915439879

AUPRC test score: 0.2181018491779142


>>> ITERATION N. 3



  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-29 09:46:41,100][0m Using an existing study with name 'MCF7_active_P_vs_inactive_P_CNN_1_2_3' instead of creating a new one.[0m


Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.19553435707255523
  Params: 
    dropout_l0: 0.2
    dropout_l1: 0.5
    dropout_l2: 0.4
    dropout_l3: 0.5
    kernel_size_l0: 11
    kernel_size_l1: 5
    kernel_size_l2: 15
    kernel_size_l3: 15
    lr: 1.3056131183736454e-05
    n_layers: 4
    optimizer: Nadam
    out_channels_l0: 64
    out_channels_l1: 32
    out_channels_l2: 256
    out_channels_l3: 256
    weight_decay: 0.0006315304505451421

AUPRC test score: 0.18021177705434768



3-FOLD CROSS-VALIDATION AUPRC TEST SCORE: 0.19941


In [109]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [110]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---

## 3) ACTIVE ENHANCERS vs ACTIVE PROMOTERS

In [48]:
task = TASKS[2]
task

'active_E_vs_active_P'

In [23]:
pipe_data_load = Build_DataLoader_Pipeline(path_name=f'{task}.pickle')

Data Preprocessing Done!


---
### 1. FFNN

In [20]:
model=FFNN

In [25]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=False,
                model = model,
                device = device,
                task=task,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST')

  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-11-04 07:14:09,932][0m Using an existing study with name 'MCF7_active_E_vs_active_P_FFNN_1' instead of creating a new one.[0m
  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-11-04 07:14:10,062][0m Using an existing study with name 'MCF7_active_E_vs_active_P_FFNN_1_2' instead of creating a new one.[0m


>>> ITERATION N. 1

Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.7567624829305591
  Params: 
    dropout_l0: 0.4
    dropout_l1: 0.2
    dropout_l2: 0.0
    dropout_l3: 0.0
    lr: 0.001398230302699925
    n_layers: 4
    n_units_l0: 32
    n_units_l1: 32
    n_units_l2: 64
    n_units_l3: 4
    optimizer: Nadam
    weight_decay: 0.0005226188422789123

AUPRC test score: 0.7862916163780955


>>> ITERATION N. 2



  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-11-04 07:14:10,228][0m Using an existing study with name 'MCF7_active_E_vs_active_P_FFNN_1_2_3' instead of creating a new one.[0m


Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.7830887316062349
  Params: 
    dropout_l0: 0.0
    lr: 0.0006917552546116357
    n_layers: 1
    n_units_l0: 128
    optimizer: Adam
    weight_decay: 0.0017075627522820118

AUPRC test score: 0.7505347381623895


>>> ITERATION N. 3

Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.7981090907946037
  Params: 
    dropout_l0: 0.0
    dropout_l1: 0.3
    dropout_l2: 0.5
    lr: 0.0023043744351189128
    n_layers: 3
    n_units_l0: 64
    n_units_l1: 128
    n_units_l2: 16
    optimizer: Adam
    weight_decay: 0.0007433989198116716

AUPRC test score: 0.7717132937628074



3-FOLD CROSS-VALIDATION AUPRC TEST SCORE: 0.76951


In [26]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [27]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---
### 2. CNN

In [49]:
model=CNN

In [23]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=True,
                model = model,
                device = device,
                task=task,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST')

  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-29 09:46:43,084][0m Using an existing study with name 'MCF7_active_E_vs_active_P_CNN_1' instead of creating a new one.[0m


>>> ITERATION N. 1

Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.6303823052224203
  Params: 
    dropout_l0: 0.3
    kernel_size_l0: 5
    lr: 9.178421110218887e-05
    n_layers: 1
    optimizer: Nadam
    out_channels_l0: 64
    weight_decay: 0.0029891332413908567

AUPRC test score: 0.6053509263068434




  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-29 09:46:44,583][0m Using an existing study with name 'MCF7_active_E_vs_active_P_CNN_1_2' instead of creating a new one.[0m


>>> ITERATION N. 2

Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.6763754599327442
  Params: 
    dropout_l0: 0
    dropout_l1: 0
    kernel_size_l0: 11
    kernel_size_l1: 5
    lr: 0.0006873910986899672
    n_layers: 2
    optimizer: Nadam
    out_channels_l0: 64
    out_channels_l1: 32
    weight_decay: 0.08645466710084093



  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-29 09:46:45,139][0m Using an existing study with name 'MCF7_active_E_vs_active_P_CNN_1_2_3' instead of creating a new one.[0m


AUPRC test score: 0.40298373742753213


>>> ITERATION N. 3

Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.6718818158809702
  Params: 
    dropout_l0: 0.2
    kernel_size_l0: 11
    lr: 1.9954821895235276e-05
    n_layers: 1
    optimizer: Adam
    out_channels_l0: 64
    weight_decay: 0.0001461144596431985

AUPRC test score: 0.6465923813322103



3-FOLD CROSS-VALIDATION AUPRC TEST SCORE: 0.55164


In [119]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [120]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---

## 4) INACTIVE ENHANCERS vs INACTIVE PROMOTERS

In [53]:
task = TASKS[3]
task

'inactive_E_vs_inactive_P'

In [29]:
pipe_data_load = Build_DataLoader_Pipeline(path_name=f'{task}.pickle')

Data Preprocessing Done!


---
### 1. FFNN

In [25]:
model=FFNN

In [31]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=False,
                model = model,
                device = device,
                task=task,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST')

>>> ITERATION N. 1


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-11-04 07:14:17,070][0m Using an existing study with name 'MCF7_inactive_E_vs_inactive_P_FFNN_1' instead of creating a new one.[0m



Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.5721723358592362
  Params: 
    dropout_l0: 0.0
    dropout_l1: 0.2
    dropout_l2: 0.4
    dropout_l3: 0.4
    lr: 0.0009708068727668427
    n_layers: 4
    n_units_l0: 256
    n_units_l1: 32
    n_units_l2: 64
    n_units_l3: 4
    optimizer: Nadam
    weight_decay: 0.0007470018175682338

AUPRC test score: 0.57666465612355


>>> ITERATION N. 2


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-11-04 07:14:17,686][0m Using an existing study with name 'MCF7_inactive_E_vs_inactive_P_FFNN_1_2' instead of creating a new one.[0m



Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.5629222099626057
  Params: 
    dropout_l0: 0.2
    dropout_l1: 0.4
    lr: 0.00043078811554049973
    n_layers: 2
    n_units_l0: 256
    n_units_l1: 32
    optimizer: RMSprop
    weight_decay: 0.005445604131968907

AUPRC test score: 0.5576872159629325


>>> ITERATION N. 3



  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-11-04 07:14:18,043][0m Using an existing study with name 'MCF7_inactive_E_vs_inactive_P_FFNN_1_2_3' instead of creating a new one.[0m


Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.5635291832459379
  Params: 
    dropout_l0: 0.3
    dropout_l1: 0.3
    dropout_l2: 0.4
    dropout_l3: 0.5
    lr: 0.0006904298035088993
    n_layers: 4
    n_units_l0: 64
    n_units_l1: 16
    n_units_l2: 64
    n_units_l3: 32
    optimizer: Nadam
    weight_decay: 0.002979158513574353

AUPRC test score: 0.569435693660712



3-FOLD CROSS-VALIDATION AUPRC TEST SCORE: 0.56793


In [32]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [33]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---
### 2. CNN

In [54]:
model=CNN

In [29]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=True,
                model = model,
                device = device,
                task=task,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST')

  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-29 09:46:52,671][0m Using an existing study with name 'MCF7_inactive_E_vs_inactive_P_CNN_1' instead of creating a new one.[0m


>>> ITERATION N. 1

Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.5584309447922419
  Params: 
    dropout_l0: 0
    dropout_l1: 0
    kernel_size_l0: 15
    kernel_size_l1: 11
    lr: 0.0007424717932043451
    n_layers: 2
    optimizer: RMSprop
    out_channels_l0: 32
    out_channels_l1: 64
    weight_decay: 0.0003225631874582084

AUPRC test score: 0.556859593016824


>>> ITERATION N. 2


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-29 09:46:53,793][0m Using an existing study with name 'MCF7_inactive_E_vs_inactive_P_CNN_1_2' instead of creating a new one.[0m



Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.5637142250936519
  Params: 
    dropout_l0: 0.3
    dropout_l1: 0
    kernel_size_l0: 15
    kernel_size_l1: 5
    lr: 0.009490858966933198
    n_layers: 2
    optimizer: Nadam
    out_channels_l0: 32
    out_channels_l1: 96
    weight_decay: 0.005776782415111461



  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-29 09:46:54,808][0m Using an existing study with name 'MCF7_inactive_E_vs_inactive_P_CNN_1_2_3' instead of creating a new one.[0m


AUPRC test score: 0.398772589014135


>>> ITERATION N. 3

Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.49009306632094735
  Params: 
    dropout_l0: 0.2
    kernel_size_l0: 11
    lr: 0.0060236128485301995
    n_layers: 1
    optimizer: RMSprop
    out_channels_l0: 16
    weight_decay: 0.0633961372007977

AUPRC test score: 0.40310341742708894



3-FOLD CROSS-VALIDATION AUPRC TEST SCORE: 0.45291


In [129]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [130]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---

## 5) ACTIVE ENHANCERS + ACTIVE PROMOTERS vs INACTIVE REST

In [58]:
task = TASKS[4]
task

'active_EP_vs_inactive_rest'

In [35]:
pipe_data_load = Build_DataLoader_Pipeline(path_name=f'{task}.pickle')

Data Preprocessing Done!


---
### 1. FFNN

In [30]:
model = FFNN

In [37]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=False,
                model = model,
                device = device,
                task=task,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST')

>>> ITERATION N. 1


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-11-04 07:14:22,529][0m Using an existing study with name 'MCF7_active_EP_vs_inactive_rest_FFNN_1' instead of creating a new one.[0m



Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.2354494890833941
  Params: 
    dropout_l0: 0.3
    dropout_l1: 0.2
    lr: 1.4687863218078216e-05
    n_layers: 2
    n_units_l0: 128
    n_units_l1: 64
    optimizer: RMSprop
    weight_decay: 0.0019194900783040768

AUPRC test score: 0.2515267179127115


>>> ITERATION N. 2


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-11-04 07:14:23,049][0m Using an existing study with name 'MCF7_active_EP_vs_inactive_rest_FFNN_1_2' instead of creating a new one.[0m



Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.2576535215068615
  Params: 
    dropout_l0: 0.2
    lr: 1.248240999612458e-05
    n_layers: 1
    n_units_l0: 128
    optimizer: Adam
    weight_decay: 0.00013745757441942864



  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-11-04 07:14:23,349][0m Using an existing study with name 'MCF7_active_EP_vs_inactive_rest_FFNN_1_2_3' instead of creating a new one.[0m


AUPRC test score: 0.2533431222624828


>>> ITERATION N. 3

Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.24405981935568682
  Params: 
    dropout_l0: 0.2
    lr: 0.0392386097223017
    n_layers: 1
    n_units_l0: 64
    optimizer: Nadam
    weight_decay: 0.003052820825160458

AUPRC test score: 0.23152450277260886



3-FOLD CROSS-VALIDATION AUPRC TEST SCORE: 0.24546


In [38]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [39]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---
### 2. CNN

In [59]:
model=CNN

In [35]:
kf_CV = Kfold_CV()

kf_CV(build_dataloader_pipeline = pipe_data_load,
                num_epochs = 100,
                n_folds=3,
                cell_line=cell_line,
                sequence=True,
                model = model,
                device = device,
                task=task,
                sampler = 'TPE',
                study_name = f'{cell_line}_{task}_{model.__name__}',
                test_model_path = f'{cell_line}_{task}_{model.__name__}_TEST')

>>> ITERATION N. 1



  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-29 09:46:59,070][0m Using an existing study with name 'MCF7_active_EP_vs_inactive_rest_CNN_1' instead of creating a new one.[0m


Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.19232316739993086
  Params: 
    dropout_l0: 0.4
    dropout_l1: 0.4
    dropout_l2: 0.4
    kernel_size_l0: 15
    kernel_size_l1: 15
    kernel_size_l2: 11
    lr: 1.2178411950646805e-05
    n_layers: 3
    optimizer: Adam
    out_channels_l0: 64
    out_channels_l1: 64
    out_channels_l2: 256
    weight_decay: 0.00021061438429659932

AUPRC test score: 0.13743567831660705




  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-29 09:47:01,374][0m Using an existing study with name 'MCF7_active_EP_vs_inactive_rest_CNN_1_2' instead of creating a new one.[0m


>>> ITERATION N. 2

Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.1918037989342798
  Params: 
    dropout_l0: 0.2
    kernel_size_l0: 5
    lr: 0.004888960842269926
    n_layers: 1
    optimizer: Adam
    out_channels_l0: 32
    weight_decay: 0.0015571389766702508

AUPRC test score: 0.18803344799846733


>>> ITERATION N. 3


  pruner=optuna.pruners.PatientPruner(optuna.pruners.MedianPruner(), patience=2),
[32m[I 2021-10-29 09:47:02,599][0m Using an existing study with name 'MCF7_active_EP_vs_inactive_rest_CNN_1_2_3' instead of creating a new one.[0m



Study statistics: 
  Number of finished trials:  3
  Number of pruned trials:  0
  Number of complete trials:  3
Best trial:
  Value:  0.19392598699652555
  Params: 
    dropout_l0: 0
    dropout_l1: 0
    kernel_size_l0: 5
    kernel_size_l1: 5
    lr: 6.555126827472983e-05
    n_layers: 2
    optimizer: RMSprop
    out_channels_l0: 64
    out_channels_l1: 32
    weight_decay: 0.011102025388347045

AUPRC test score: 0.19705775202192988



3-FOLD CROSS-VALIDATION AUPRC TEST SCORE: 0.17418


In [139]:
results_dict[cell_line][task][model.__name__] = kf_CV.scores_dict

In [140]:
with open ('results_dict.pickle', 'wb') as fout:
    pickle.dump(OrderedDict(results_dict), fout)

---