# Load packages

In [17]:
%env OMP_THREAD_LIMIT = 1
%env OMP_NUM_THREADS = 1

%load_ext autoreload
%autoreload 2

import os
import pickle
from collections import defaultdict
from sklearn import linear_model
from tqdm import tqdm
from joblib import Parallel, delayed
import multiomics_benchmark.models

env: OMP_THREAD_LIMIT=1
env: OMP_NUM_THREADS=1
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Load the data

In [18]:
dataset = 'head_neck'

In [19]:
os.chdir('/home/mxenoc/workspace/multiomics-benchmark/data/RData/'+dataset)

with open('DF.pkl', 'rb') as f:
    DF = pickle.load(f)
    
with open('groups.pkl', 'rb') as f:
    patient_groups = pickle.load(f)
    
with open('responses.pkl', 'rb') as f:
    responses = pickle.load(f)

In [20]:
options = defaultdict(lambda: ["Meth450_Gene", "miRNA_HiSeq_Gene", "RNA_HiSeq_Gene", "RPPA_Gene", "SCNV_Log_Gene"], 
                      {'pregnancy': ['rna', 'plasma_l', 'serum_l', 'microb', 'immune', 'metabol', 'plasma_s'],
                       'omics': ['rna', 'plasma_l', 'serum_l', 'microb', 'immune', 'metabol', 'plasma_s'],
                       'brain':['Meth450_Gene', 'miRNA_Gene', 'RNA_HiSeq_Gene', 'RPPA_Gene', 'SCNV_Log_Gene'],
                       'pree': ['rna', 'lipid', 'plasma', 'urine', 'somalog', 'microb'],
                       'PreE': ['rna', 'lipid', 'plasma', 'urine', 'somalog', 'microb'],
                       'glioma': ["Meth450_Gene", "miRNA_Gene", "RNA_HiSeq_Gene", "RPPA_Gene", "SCNV_Log_Gene"],
                       'ovarian': ["Meth27_Gene", "miRNA_Gene", "RNA_GA_Gene", "RPPA_Gene", "SCNV_Log_Gene"],
                       'uterine':["Meth450_Gene", "miRNA_HiSeq_Gene", "RPPA_Gene", "SCNV_Log_Gene"]
                      })

omic_names = options[dataset]

In [21]:
options = defaultdict(lambda: False, 
                      {'omics':  True,
                       'PreE':  True})

longitudinal = options[dataset]

In [22]:
DF.shape

(176, 64548)

# Run your model

In [23]:
#Pick the model type and optimisation method
#type_model = 'group_lasso'
type_model = 'Stacked_Lasso'

folds = 10
scale_y = False
repeats = 1
n_samples = 50
tries = 50
cv = 3

In [None]:
#Run stacked lasso
results = defaultdict(list)

for predictor_index in tqdm(range(len(omic_names))):
    
    # Get your feature groups
    feature_groups = multiomics_benchmark.models.get_feature_groups(DF, omic_names, predictor_index)
    feature_groups_list = multiomics_benchmark.models.get_feature_groups(DF, omic_names, predictor_index)

    # Get your y
    X, y = multiomics_benchmark.models.get_X_y(DF, omic_names, predictor_index, responses)
    
    omic_names_left = omic_names.copy()
    del omic_names_left[predictor_index]

    prediction_train, observed_train, prediction_test, observed_test = zip(*Parallel(n_jobs=n_samples)
                           (delayed(multiomics_benchmark.models.Stacked_Lasso)(X, y, omic_names_left, feat_n, 
                                                                                   responses, patient_groups, 
                                                                                   longitudinal, folds, repeats, 
                                                                                   tries, cv)
                            for feat_n in range(n_samples)))
        
    results['prediction_train'].append(prediction_train)
    results['observed_train'].append(observed_train)
    results['prediction_test'].append(prediction_test)
    results['observed_test'].append(observed_test)

In [16]:
results = defaultdict(list)

for predictor_index in tqdm(range(len(omic_names))):
    
    # Get your feature groups
    feature_groups = multiomics_benchmark.models.get_feature_groups(DF, omic_names, predictor_index)
    feature_groups_list = multiomics_benchmark.models.get_feature_groups(DF, omic_names, predictor_index)

    # Get your y
    X, y = multiomics_benchmark.models.get_X_y(DF, omic_names, predictor_index, responses)
    
    omic_names_left = omic_names.copy()
    del omic_names_left[predictor_index]

    prediction_train, observed_train, prediction_test, observed_test = zip(*Parallel(n_jobs=n_samples)
                           (delayed(multiomics_benchmark.models.SGLasso)(X, y, feat_n, feature_groups,  
                                                                             patient_groups, longitudinal, 
                                                                             folds, repeats, tries, cv)
                            for feat_n in range(n_samples)))
        
    results['prediction_train'].append(prediction_train)
    results['observed_train'].append(observed_train)
    results['prediction_test'].append(prediction_test)
    results['observed_test'].append(observed_test)


  0%|          | 0/5 [00:00<?, ?it/s][A

IndexError: index 10 is out of bounds for axis 1 with size 10

In [None]:
results = defaultdict(list)

for predictor_index in tqdm(range(len(omic_names))):
    
    # Get your feature groups
    feature_groups = multiomics_benchmark.models.get_feature_groups(DF, omic_names, predictor_index)
    feature_groups_list = multiomics_benchmark.models.get_feature_groups(DF, omic_names, predictor_index)

    # Get your y
    X, y = multiomics_benchmark.models.get_X_y(DF, omic_names, predictor_index, responses)

    omic_names_left = omic_names.copy()
    del omic_names_left[predictor_index]

    prediction_train, observed_train, prediction_test, observed_test = multiomics_benchmark.models.GFA(
        X, y, omic_names_left, patient_groups, longitudinal, folds, repeats, tries, cv)
        
    results['prediction_train'].append(prediction_train)
    results['observed_train'].append(observed_train)
    results['prediction_test'].append(prediction_test)
    results['observed_test'].append(observed_test)

In [None]:
results = defaultdict(list)

for predictor_index in tqdm(range(len(omic_names))):
    
    # Get your feature groups
    feature_groups = multiomics_benchmark.models.get_feature_groups(DF, omic_names, predictor_index)
    feature_groups_list = multiomics_benchmark.models.get_feature_groups(DF, omic_names, predictor_index)

    # Get your y
    X, y = multiomics_benchmark.models.get_X_y(DF, omic_names, predictor_index, responses)

    prediction_train, observed_train, prediction_test, observed_test = zip(*Parallel(n_jobs=n_samples)
                           (delayed(multiomics_benchmark.models.pymodels)(X, y, type_model, feat_n, 
                                                                              patient_groups, longitudinal, 
                                                                              folds, repeats, tries, cv)
                            for feat_n in range(n_samples)))
        
    results['prediction_train'].append(prediction_train)
    results['observed_train'].append(observed_train)
    results['prediction_test'].append(prediction_test)
    results['observed_test'].append(observed_test)

In [None]:
os.chdir('/home/mxenoc/workspace/benchmark/results/'+dataset)
with open(type_model+'.pkl', 'wb') as f:  
    pickle.dump(results, f)