In [85]:
import sys

import meta_dataloader.TCGA

import models.mlp, models.gcn
import numpy as np
import data.gene_graphs
import collections
import sklearn.metrics
import sklearn.model_selection
import random
from collections import OrderedDict
import pandas as pd
from torch.optim import Optimizer
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from torch import Tensor
import matplotlib.pyplot as plt
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
tasks = meta_dataloader.TCGA.TCGAMeta(download=True, 
                                      min_samples_per_class=10)

Downloading or checking for TCGA_HiSeqV2 using Academic Torrents
Torrent name: HiSeqV2.gz, Size: 513.04MB


In [3]:
for taskid in sorted(tasks.task_ids):
    print(taskid)

('Expression_Subtype', 'LUAD')
('Expression_Subtype', 'LUNG')
('GeneExp_Subtype', 'GBM')
('Metastasis_nature2012', 'BRCA')
('Node_nature2012', 'BRCA')
('PAM50Call_RNAseq', 'BRCA')
('_EVENT', 'ACC')
('_EVENT', 'BLCA')
('_EVENT', 'BRCA')
('_EVENT', 'CESC')
('_EVENT', 'CHOL')
('_EVENT', 'COAD')
('_EVENT', 'COADREAD')
('_EVENT', 'ESCA')
('_EVENT', 'GBM')
('_EVENT', 'GBMLGG')
('_EVENT', 'HNSC')
('_EVENT', 'KICH')
('_EVENT', 'KIRP')
('_EVENT', 'LAML')
('_EVENT', 'LGG')
('_EVENT', 'LIHC')
('_EVENT', 'LUAD')
('_EVENT', 'LUNG')
('_EVENT', 'LUSC')
('_EVENT', 'MESO')
('_EVENT', 'OV')
('_EVENT', 'PAAD')
('_EVENT', 'READ')
('_EVENT', 'SARC')
('_EVENT', 'SKCM')
('_EVENT', 'STAD')
('_EVENT', 'THCA')
('_EVENT', 'UCEC')
('_EVENT', 'UCS')
('_EVENT', 'UVM')
('_PANCAN_DNAMethyl_BLCA', 'BLCA')
('_PANCAN_DNAMethyl_BRCA', 'BRCA')
('_PANCAN_DNAMethyl_HNSC', 'HNSC')
('_PANCAN_DNAMethyl_LUAD', 'LUAD')
('_PANCAN_DNAMethyl_LUSC', 'LUSC')
('_PANCAN_mirna_BLCA', 'BLCA')
('_PANCAN_mirna_BRCA', 'BRCA')
('_PANCAN_mirn

In [8]:
def load_sets(task, valid=False):
     
    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(task._samples, 
                                                                                task._labels, 
                                                                                stratify=task._labels,
                                                                                train_size=50,
                                                                                test_size=100,
                                                                                shuffle=True,
                                                                                random_state=0
                                                                                 )
    
    train_set = TensorDataset( Tensor(X_train), Tensor(y_train))
    test_set = TensorDataset( Tensor(X_test), Tensor(y_test))

    if valid:
        X_test, X_valid, y_test, y_valid = sklearn.model_selection.train_test_split(X_test, 
                                                                                y_test, 
                                                                                stratify=y_test,
                                                                                train_size=50,
                                                                                test_size=50,
                                                                                shuffle=True,
                                                                                random_state=0
                                                                               )
        valid_set = TensorDataset( Tensor(X_valid), Tensor(y_valid))
        return train_set, valid_set, test_set
    
    return train_set, test_set 

In [9]:
def train(model, dataset, task_id, stop_early=False):
    train_loss = []
    criterion = model.criterion
    if type(model).__name__ == "LogisticRegression":
        optimizer = torch.optim.LBFGS(model.parameters(), lr=1)
    else:
        optimizer = torch.optim.Adam(model.parameters(), lr=model.learning_rate, weight_decay= model.weight_decay)
     
    if stop_early:
        print("")
    else:
        train_set = dataset
        valid_set = None
 
    for i in range(model.epochs):
        for batch, labels in torch.utils.data.DataLoader(train_set, batch_size=model.batch_size, shuffle=True):
            labels = torch.autograd.Variable(labels.long())
            
            def closure():
                # Forward + Backward + Optimize
                optimizer.zero_grad()
                out = model(batch)
                loss = criterion(out, labels)
                loss.backward()
                return loss
            
            intermediate = optimizer.step(closure)
            loss = intermediate.item()
            train_loss.append(loss)

    return model, loss

In [10]:
def test(model, test_set):
    # Test the Model
    batch, labels = next(iter(torch.utils.data.DataLoader(test_set, batch_size=len(test_set), shuffle=False)))
    model.eval()
    outputs = model(batch)
    _, predicted = torch.max(outputs.data, 1)

    predicted = predicted.numpy()
    labels = labels.numpy()
    accuracy = (predicted == labels).mean()*100.
    return accuracy

In [106]:
class MultiLayerPerceptron(torch.nn.Module):
    def __init__(self, seed, input_size, num_classes, num_layers, channels, learning_rate, batch_size, epochs, patience, weight_decay):
        super(MultiLayerPerceptron, self).__init__()
        self.input_size = input_size
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        self.epochs = epochs
        self.patience = patience
        self.weight_decay = weight_decay
        self.num_layers = num_layers
        self.channels = channels
        self.output_size = num_classes
        random.seed(seed)
        torch.manual_seed(seed)
        
        nodes = []
        nodes += channels
        architecture = OrderedDict()
        for i in range(self.num_layers):
            architecture['fc' + str(i)] = nn.Linear(input_size, nodes[i])
            architecture['relu' + str(i)] = torch.nn.ReLU()
            input_size = nodes[i]

        self.features = nn.Sequential(architecture)

        self.classifier = nn.Linear(input_size, num_classes)
    
    @property
    def criterion(self):
        criterion = F.cross_entropy
        return criterion
    
    @property
    def optimizer(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate, weight_decay=self.weight_decay)
        return optimizer

    def forward(self, x):
        x = x.view(-1, self.input_size)
        features = self.features(x)
        logits = self.classifier(features)
        return logits

In [108]:
tasks_opt = sorted(random.sample(tasks.task_ids,k=25)) #tasks for hyperparameter tunning
print (tasks_opt)

[('PAM50Call_RNAseq', 'BRCA'), ('_EVENT', 'CHOL'), ('_EVENT', 'ESCA'), ('_EVENT', 'GBMLGG'), ('_EVENT', 'LAML'), ('_EVENT', 'LUSC'), ('_EVENT', 'UCEC'), ('_PANCAN_DNAMethyl_LUSC', 'LUSC'), ('alcohol_history_documented', 'HNSC'), ('biochemical_recurrence', 'PRAD'), ('clinical_M', 'ACC'), ('clinical_stage', 'KIRP'), ('colon_polyps_present', 'COADREAD'), ('diabetes', 'UCEC'), ('family_history_of_cancer', 'LGG'), ('gender', 'LIHC'), ('gender', 'STAD'), ('lymphovascular_invasion_present', 'HNSC'), ('oct_embedded', 'ACC'), ('oct_embedded', 'CESC'), ('oct_embedded', 'LUAD'), ('oct_embedded', 'MESO'), ('oct_embedded', 'STAD'), ('oct_embedded', 'THCA'), ('tumor_tissue_site', 'PCPG')]


# Number of Hidden Layers and Channels Optimization

In [109]:
lr = 0.001
weight_decay = 0.00005
batch_size = 32
epochs=100
patience = 10

In [117]:
num_layer= 1
channels1 = [[512], [256], [128], [64], [32]] # best :  [128] :  68.07407407407408
channel1_res = []
for channel in channels1:
    task_res = []
    for taskid in tasks_opt[:10]:
        task = meta_dataloader.TCGA.TCGATask(taskid)
        input_size = task._samples.shape[1]
        num_classes = len(collections.Counter(task._labels))
        try:
            train_set, valid_set, test_set = load_sets(task, valid=True)
            seed_res=[]
            for seed in range(0,3):
                MLP_model = MultiLayerPerceptron(seed, input_size, num_classes, num_layer, channel, lr, batch_size, epochs, patience, weight_decay)
                trained_model, train_loss = train(MLP_model, train_set, task.id, False)
                mlp_result = test(trained_model, valid_set)
                print('task {} ==> {} seed {}' .format(taskid, mlp_result, seed))
                seed_res.append(mlp_result)
            task_res.append(np.mean(seed_res))
            print("Average task performance for channel {}: {}" .format(channel, task_res))
        except:
            print("Not enough number of samples")
        
    channel1_res.append(np.mean(task_res))
print("Channel Average Performace: {}" .format(channel1_res))
best_channel_1 = channels1[channel1_res.index(max(channel1_res))]
print("The most optimal number of nodes for 1 number of layers architecture: {}" .format(best_channel_1))

task ('PAM50Call_RNAseq', 'BRCA') ==> 64.0 seed 0
task ('PAM50Call_RNAseq', 'BRCA') ==> 72.0 seed 1
task ('PAM50Call_RNAseq', 'BRCA') ==> 76.0 seed 2
Average task performance for channel [512]: [70.66666666666667]
Not enough number of samples
task ('_EVENT', 'ESCA') ==> 54.0 seed 0
task ('_EVENT', 'ESCA') ==> 56.00000000000001 seed 1
task ('_EVENT', 'ESCA') ==> 56.00000000000001 seed 2
Average task performance for channel [512]: [70.66666666666667, 55.333333333333336]
task ('_EVENT', 'GBMLGG') ==> 72.0 seed 0
task ('_EVENT', 'GBMLGG') ==> 66.0 seed 1
task ('_EVENT', 'GBMLGG') ==> 70.0 seed 2
Average task performance for channel [512]: [70.66666666666667, 55.333333333333336, 69.33333333333333]
task ('_EVENT', 'LAML') ==> 50.0 seed 0
task ('_EVENT', 'LAML') ==> 54.0 seed 1
task ('_EVENT', 'LAML') ==> 54.0 seed 2
Average task performance for channel [512]: [70.66666666666667, 55.333333333333336, 69.33333333333333, 52.666666666666664]
task ('_EVENT', 'LUSC') ==> 56.00000000000001 seed 0
ta

task ('_EVENT', 'LAML') ==> 56.00000000000001 seed 0
task ('_EVENT', 'LAML') ==> 54.0 seed 1
task ('_EVENT', 'LAML') ==> 56.00000000000001 seed 2
Average task performance for channel [64]: [72.0, 56.00000000000001, 69.33333333333333, 55.333333333333336]
task ('_EVENT', 'LUSC') ==> 62.0 seed 0
task ('_EVENT', 'LUSC') ==> 60.0 seed 1
task ('_EVENT', 'LUSC') ==> 54.0 seed 2
Average task performance for channel [64]: [72.0, 56.00000000000001, 69.33333333333333, 55.333333333333336, 58.666666666666664]
task ('_EVENT', 'UCEC') ==> 72.0 seed 0
task ('_EVENT', 'UCEC') ==> 80.0 seed 1
task ('_EVENT', 'UCEC') ==> 82.0 seed 2
Average task performance for channel [64]: [72.0, 56.00000000000001, 69.33333333333333, 55.333333333333336, 58.666666666666664, 78.0]
task ('_PANCAN_DNAMethyl_LUSC', 'LUSC') ==> 66.0 seed 0
task ('_PANCAN_DNAMethyl_LUSC', 'LUSC') ==> 70.0 seed 1
task ('_PANCAN_DNAMethyl_LUSC', 'LUSC') ==> 56.00000000000001 seed 2
Average task performance for channel [64]: [72.0, 56.0000000000

In [118]:
num_layer= 2
channels2 = [[512,256], [256,128], [128,64]] # best: [128, 64] : 68.14814814814815
channel2_res = []
for channel in channels2:
    task_res = []
    for taskid in tasks_opt[:10]:
        task = meta_dataloader.TCGA.TCGATask(taskid)
        input_size = task._samples.shape[1]
        num_classes = len(collections.Counter(task._labels))
        try:
            train_set, valid_set, test_set = load_sets(task, valid=True)
            seed_res=[]
            for seed in range(0,3):
                MLP_model = MultiLayerPerceptron(seed, input_size, num_classes, num_layer, channel, lr, batch_size, epochs, patience, weight_decay)
                trained_model, train_loss = train(MLP_model, train_set, task.id, False)
                mlp_result = test(trained_model, valid_set)
                print('task {} ==> {} seed {}' .format(taskid, mlp_result, seed))
                seed_res.append(mlp_result)
            task_res.append(np.mean(seed_res))
            print("Average task performance for channel {}: {}" .format(channel, task_res))
        except:
            print("Not enough number of samples")
        
    channel2_res.append(np.mean(task_res))
print("Channel Average Performace: {}" .format(channel2_res))
best_channel_2 = channels2[channel2_res.index(max(channel2_res))]
print("The most optimal number of nodes for 2 number of layers architecture: {}" .format(best_channel_2))

task ('PAM50Call_RNAseq', 'BRCA') ==> 76.0 seed 0
task ('PAM50Call_RNAseq', 'BRCA') ==> 78.0 seed 1
task ('PAM50Call_RNAseq', 'BRCA') ==> 78.0 seed 2
Average task performance for channel [512, 256]: [77.33333333333333]
Not enough number of samples
task ('_EVENT', 'ESCA') ==> 56.00000000000001 seed 0
task ('_EVENT', 'ESCA') ==> 56.00000000000001 seed 1
task ('_EVENT', 'ESCA') ==> 54.0 seed 2
Average task performance for channel [512, 256]: [77.33333333333333, 55.333333333333336]
task ('_EVENT', 'GBMLGG') ==> 68.0 seed 0
task ('_EVENT', 'GBMLGG') ==> 62.0 seed 1
task ('_EVENT', 'GBMLGG') ==> 68.0 seed 2
Average task performance for channel [512, 256]: [77.33333333333333, 55.333333333333336, 66.0]
task ('_EVENT', 'LAML') ==> 60.0 seed 0
task ('_EVENT', 'LAML') ==> 56.00000000000001 seed 1
task ('_EVENT', 'LAML') ==> 50.0 seed 2
Average task performance for channel [512, 256]: [77.33333333333333, 55.333333333333336, 66.0, 55.333333333333336]
task ('_EVENT', 'LUSC') ==> 56.00000000000001 se

In [112]:
num_layer= 3
channels3 = [[128,64,32],[256,128,64],[512,128,64],[256,128,32]] # Best : [128, 64, 32] : 68.24561403508771
channel3_res = []
for channel in channels3:
    task_res = []
    for taskid in tasks_opt:
        task = meta_dataloader.TCGA.TCGATask(taskid)
        input_size = task._samples.shape[1]
        num_classes = len(collections.Counter(task._labels))
        try:
            train_set, valid_set, test_set = load_sets(task, valid=True)
            seed_res=[]
            for seed in range(0,3):
                MLP_model = MultiLayerPerceptron(seed, input_size, num_classes, num_layer, channel, lr, batch_size, epochs, patience, weight_decay)
                trained_model, train_loss = train(MLP_model, train_set, task.id, False)
                mlp_result = test(trained_model, valid_set)
                print('task {} ==> {} seed {}' .format(taskid, mlp_result, seed))
                seed_res.append(mlp_result)
            task_res.append(np.mean(seed_res))
            print("Average task performance for channel {}: {}" .format(channel, task_res))
        except:
            print("Not enough number of samples")
        
    channel3_res.append(np.mean(task_res))
print("Channel Average Performace: {}" .format(channel3_res))
best_channel_3 = channels3[channel3_res.index(max(channel3_res))]
print("The most optimal number of nodes for 3 number of layers architecture: {}" .format(best_channel_3))

task ('PAM50Call_RNAseq', 'BRCA') ==> 74.0 seed 0
task ('PAM50Call_RNAseq', 'BRCA') ==> 76.0 seed 1
task ('PAM50Call_RNAseq', 'BRCA') ==> 80.0 seed 2
Average task performance for channel [128, 64, 32]: [76.66666666666667]
Not enough number of samples
task ('_EVENT', 'ESCA') ==> 56.00000000000001 seed 0
task ('_EVENT', 'ESCA') ==> 54.0 seed 1
task ('_EVENT', 'ESCA') ==> 54.0 seed 2
Average task performance for channel [128, 64, 32]: [76.66666666666667, 54.666666666666664]
task ('_EVENT', 'GBMLGG') ==> 70.0 seed 0
task ('_EVENT', 'GBMLGG') ==> 68.0 seed 1
task ('_EVENT', 'GBMLGG') ==> 70.0 seed 2
Average task performance for channel [128, 64, 32]: [76.66666666666667, 54.666666666666664, 69.33333333333333]
task ('_EVENT', 'LAML') ==> 56.00000000000001 seed 0
task ('_EVENT', 'LAML') ==> 60.0 seed 1
task ('_EVENT', 'LAML') ==> 56.00000000000001 seed 2
Average task performance for channel [128, 64, 32]: [76.66666666666667, 54.666666666666664, 69.33333333333333, 57.333333333333336]
task ('_EV

task ('_EVENT', 'LAML') ==> 52.0 seed 0
task ('_EVENT', 'LAML') ==> 56.00000000000001 seed 1
task ('_EVENT', 'LAML') ==> 48.0 seed 2
Average task performance for channel [256, 128, 68]: [78.0, 54.0, 68.66666666666667, 52.0]
task ('_EVENT', 'LUSC') ==> 56.00000000000001 seed 0
task ('_EVENT', 'LUSC') ==> 62.0 seed 1
task ('_EVENT', 'LUSC') ==> 60.0 seed 2
Average task performance for channel [256, 128, 68]: [78.0, 54.0, 68.66666666666667, 52.0, 59.333333333333336]
task ('_EVENT', 'UCEC') ==> 82.0 seed 0
task ('_EVENT', 'UCEC') ==> 80.0 seed 1
task ('_EVENT', 'UCEC') ==> 80.0 seed 2
Average task performance for channel [256, 128, 68]: [78.0, 54.0, 68.66666666666667, 52.0, 59.333333333333336, 80.66666666666667]
task ('_PANCAN_DNAMethyl_LUSC', 'LUSC') ==> 56.00000000000001 seed 0
task ('_PANCAN_DNAMethyl_LUSC', 'LUSC') ==> 62.0 seed 1
task ('_PANCAN_DNAMethyl_LUSC', 'LUSC') ==> 66.0 seed 2
Average task performance for channel [256, 128, 68]: [78.0, 54.0, 68.66666666666667, 52.0, 59.3333333

task ('biochemical_recurrence', 'PRAD') ==> 88.0 seed 2
Average task performance for channel [512, 128, 64]: [78.0, 55.333333333333336, 71.33333333333333, 52.666666666666664, 60.666666666666664, 80.66666666666667, 64.0, 69.33333333333333, 88.0]
Not enough number of samples
task ('clinical_stage', 'KIRP') ==> 60.0 seed 0
task ('clinical_stage', 'KIRP') ==> 60.0 seed 1
task ('clinical_stage', 'KIRP') ==> 66.0 seed 2
Average task performance for channel [512, 128, 64]: [78.0, 55.333333333333336, 71.33333333333333, 52.666666666666664, 60.666666666666664, 80.66666666666667, 64.0, 69.33333333333333, 88.0, 62.0]
task ('colon_polyps_present', 'COADREAD') ==> 72.0 seed 0
task ('colon_polyps_present', 'COADREAD') ==> 72.0 seed 1
task ('colon_polyps_present', 'COADREAD') ==> 66.0 seed 2
Average task performance for channel [512, 128, 64]: [78.0, 55.333333333333336, 71.33333333333333, 52.666666666666664, 60.666666666666664, 80.66666666666667, 64.0, 69.33333333333333, 88.0, 62.0, 70.0]
Not enough n

task ('gender', 'LIHC') ==> 84.0 seed 0
task ('gender', 'LIHC') ==> 84.0 seed 1
task ('gender', 'LIHC') ==> 84.0 seed 2
Average task performance for channel [256, 128, 32]: [62.666666666666664, 55.333333333333336, 69.33333333333333, 50.0, 58.0, 80.66666666666667, 58.0, 70.66666666666667, 88.0, 64.66666666666667, 68.0, 50.666666666666664, 84.0]
task ('gender', 'STAD') ==> 80.0 seed 0
task ('gender', 'STAD') ==> 78.0 seed 1
task ('gender', 'STAD') ==> 78.0 seed 2
Average task performance for channel [256, 128, 32]: [62.666666666666664, 55.333333333333336, 69.33333333333333, 50.0, 58.0, 80.66666666666667, 58.0, 70.66666666666667, 88.0, 64.66666666666667, 68.0, 50.666666666666664, 84.0, 78.66666666666667]
task ('lymphovascular_invasion_present', 'HNSC') ==> 66.0 seed 0
task ('lymphovascular_invasion_present', 'HNSC') ==> 62.0 seed 1
task ('lymphovascular_invasion_present', 'HNSC') ==> 66.0 seed 2
Average task performance for channel [256, 128, 32]: [62.666666666666664, 55.333333333333336, 

In [114]:
num_layer= 4
channels4 = [[512,256,128,64],[256,128,64,32],[128,64,32,16]] #Best: [256,128,64,32] : 62.833333333333336
channel4_res = []
for channel in channels4:
    task_res = []
    for taskid in tasks_opt[:5]:
        task = meta_dataloader.TCGA.TCGATask(taskid)
        input_size = task._samples.shape[1]
        num_classes = len(collections.Counter(task._labels))
        try:
            train_set, valid_set, test_set = load_sets(task, valid=True)
            seed_res=[]
            for seed in range(0,3):
                MLP_model = MultiLayerPerceptron(seed, input_size, num_classes, num_layer, channel, lr, batch_size, epochs, patience, weight_decay)
                trained_model, train_loss = train(MLP_model, train_set, task.id, False)
                mlp_result = test(trained_model, valid_set)
                print('task {} ==> {} seed {}' .format(taskid, mlp_result, seed))
                seed_res.append(mlp_result)
            task_res.append(np.mean(seed_res))
            print("Average task performance for channel {}: {}" .format(channel, task_res))
        except:
            print("Not enough number of samples")
        
    channel4_res.append(np.mean(task_res))
print("Channel Average Performace: {}" .format(channel4_res))
best_channel_4 = channels4[channel4_res.index(max(channel4_res))]
print("The most optimal number of nodes for 4 number of layers architecture: {}" .format(best_channel_4))

task ('PAM50Call_RNAseq', 'BRCA') ==> 76.0 seed 0
task ('PAM50Call_RNAseq', 'BRCA') ==> 76.0 seed 1
task ('PAM50Call_RNAseq', 'BRCA') ==> 80.0 seed 2
Average task performance for channel [512, 256, 128, 64]: [77.33333333333333]
Not enough number of samples
task ('_EVENT', 'ESCA') ==> 56.00000000000001 seed 0
task ('_EVENT', 'ESCA') ==> 54.0 seed 1
task ('_EVENT', 'ESCA') ==> 54.0 seed 2
Average task performance for channel [512, 256, 128, 64]: [77.33333333333333, 54.666666666666664]
task ('_EVENT', 'GBMLGG') ==> 70.0 seed 0
task ('_EVENT', 'GBMLGG') ==> 56.00000000000001 seed 1
task ('_EVENT', 'GBMLGG') ==> 70.0 seed 2
Average task performance for channel [512, 256, 128, 64]: [77.33333333333333, 54.666666666666664, 65.33333333333333]
task ('_EVENT', 'LAML') ==> 52.0 seed 0
task ('_EVENT', 'LAML') ==> 54.0 seed 1
task ('_EVENT', 'LAML') ==> 56.00000000000001 seed 2
Average task performance for channel [512, 256, 128, 64]: [77.33333333333333, 54.666666666666664, 65.33333333333333, 54.0]


# Learning Rate Optimization

In [121]:
weight_decay = 0.00005
batch_size = 32
epochs=100
num_layer = 2
channels = [128,64]

In [122]:
lr_list = [0.00001,0.0001,0.001,0.01] #0.0001 : 68.33333333333333
lr_res = []

for learning_rate in lr_list:
    task_res = []
    for taskid in tasks_opt[:10]:
        task = meta_dataloader.TCGA.TCGATask(taskid)
        input_size = task._samples.shape[1]
        num_classes = len(collections.Counter(task._labels))
        try:
            train_set, valid_set, test_set = load_sets(task, valid=True)
            seed_res=[]
            for seed in range(0,4):
                MLP_model = MultiLayerPerceptron(seed, input_size, num_classes, num_layer, channels, learning_rate, batch_size, epochs, patience, weight_decay)
                trained_model, train_loss = train(MLP_model, train_set, task.id, False)
                mlp_result = test(trained_model, valid_set)
                print('task {} ==> {} seed {}' .format(taskid, mlp_result, seed))
                seed_res.append(mlp_result)
            task_res.append(np.mean(seed_res))
            print("Average task performance for channel {}: {}" .format(learning_rate, task_res))
        except:
            print("Not enough number of samples")
    lr_res.append(np.mean(task_res))
print("Learning rate Average Performace over tasks: {}" .format(lr_res))
best_lr = lr_list[lr_res.index(max(lr_res))]
print("The most optimal learning rate: {}" .format(best_lr))

task ('PAM50Call_RNAseq', 'BRCA') ==> 70.0 seed 0
task ('PAM50Call_RNAseq', 'BRCA') ==> 74.0 seed 1
task ('PAM50Call_RNAseq', 'BRCA') ==> 74.0 seed 2
task ('PAM50Call_RNAseq', 'BRCA') ==> 74.0 seed 3
Average task performance for channel 1e-05: [73.0]
Not enough number of samples
task ('_EVENT', 'ESCA') ==> 56.00000000000001 seed 0
task ('_EVENT', 'ESCA') ==> 64.0 seed 1
task ('_EVENT', 'ESCA') ==> 60.0 seed 2
task ('_EVENT', 'ESCA') ==> 56.00000000000001 seed 3
Average task performance for channel 1e-05: [73.0, 59.0]
task ('_EVENT', 'GBMLGG') ==> 70.0 seed 0
task ('_EVENT', 'GBMLGG') ==> 66.0 seed 1
task ('_EVENT', 'GBMLGG') ==> 64.0 seed 2
task ('_EVENT', 'GBMLGG') ==> 64.0 seed 3
Average task performance for channel 1e-05: [73.0, 59.0, 66.0]
task ('_EVENT', 'LAML') ==> 50.0 seed 0
task ('_EVENT', 'LAML') ==> 60.0 seed 1
task ('_EVENT', 'LAML') ==> 57.99999999999999 seed 2
task ('_EVENT', 'LAML') ==> 60.0 seed 3
Average task performance for channel 1e-05: [73.0, 59.0, 66.0, 57.0]
task

task ('_EVENT', 'LAML') ==> 64.0 seed 0
task ('_EVENT', 'LAML') ==> 57.99999999999999 seed 1
task ('_EVENT', 'LAML') ==> 50.0 seed 2
task ('_EVENT', 'LAML') ==> 48.0 seed 3
Average task performance for channel 0.01: [54.0, 57.49999999999999, 65.0, 55.0]
task ('_EVENT', 'LUSC') ==> 56.00000000000001 seed 0
task ('_EVENT', 'LUSC') ==> 60.0 seed 1
task ('_EVENT', 'LUSC') ==> 57.99999999999999 seed 2
task ('_EVENT', 'LUSC') ==> 56.00000000000001 seed 3
Average task performance for channel 0.01: [54.0, 57.49999999999999, 65.0, 55.0, 57.5]
task ('_EVENT', 'UCEC') ==> 82.0 seed 0
task ('_EVENT', 'UCEC') ==> 80.0 seed 1
task ('_EVENT', 'UCEC') ==> 82.0 seed 2
task ('_EVENT', 'UCEC') ==> 82.0 seed 3
Average task performance for channel 0.01: [54.0, 57.49999999999999, 65.0, 55.0, 57.5, 81.5]
task ('_PANCAN_DNAMethyl_LUSC', 'LUSC') ==> 46.0 seed 0
task ('_PANCAN_DNAMethyl_LUSC', 'LUSC') ==> 40.0 seed 1
task ('_PANCAN_DNAMethyl_LUSC', 'LUSC') ==> 52.0 seed 2
task ('_PANCAN_DNAMethyl_LUSC', 'LUSC')

# Number of epochs experiments

In [124]:
weight_decay = 0.00005
batch_size = 32
lr= 0.0001 # best_lr
num_layer = 2 # best num_layer
channels = [128,64] # best channels

In [125]:
epoch_list = [100,250,500]
epoch_res = []

for e in epoch_list:
    task_res = []
    for taskid in tasks_opt[:10]:
        task = meta_dataloader.TCGA.TCGATask(taskid)
        input_size = task._samples.shape[1]
        num_classes = len(collections.Counter(task._labels))
        try:
            train_set, valid_set, test_set = load_sets(task, valid=True)
            seed_res=[]
            for seed in range(0,4):
                MLP_model = MultiLayerPerceptron(seed, input_size, num_classes, num_layer, channels, lr, batch_size, e, patience, weight_decay)
                trained_model, train_loss = train(MLP_model, train_set, task.id, False)
                mlp_result = test(trained_model, valid_set)
                print('task {} ==> {} seed {}' .format(taskid, mlp_result, seed))
                seed_res.append(mlp_result)
            task_res.append(np.mean(seed_res))
            print("Average task performance for epoch {}: {}" .format(e, task_res))
        except:
            print("Not enough number of samples")
        
    epoch_res.append(np.mean(task_res))
print("Epoch Average Performace over tasks: {}" .format(epoch_res))
best_epoch = epoch_list[epoch_res.index(max(epoch_res))]
print("The most optimal number of epochs: {}" .format(best_epoch))

task ('PAM50Call_RNAseq', 'BRCA') ==> 78.0 seed 0
task ('PAM50Call_RNAseq', 'BRCA') ==> 76.0 seed 1
task ('PAM50Call_RNAseq', 'BRCA') ==> 72.0 seed 2
task ('PAM50Call_RNAseq', 'BRCA') ==> 70.0 seed 3
Average task performance for epoch 100: [74.0]
Not enough number of samples
task ('_EVENT', 'ESCA') ==> 54.0 seed 0
task ('_EVENT', 'ESCA') ==> 54.0 seed 1
task ('_EVENT', 'ESCA') ==> 57.99999999999999 seed 2
task ('_EVENT', 'ESCA') ==> 54.0 seed 3
Average task performance for epoch 100: [74.0, 55.0]
task ('_EVENT', 'GBMLGG') ==> 62.0 seed 0
task ('_EVENT', 'GBMLGG') ==> 70.0 seed 1
task ('_EVENT', 'GBMLGG') ==> 70.0 seed 2
task ('_EVENT', 'GBMLGG') ==> 68.0 seed 3
Average task performance for epoch 100: [74.0, 55.0, 67.5]
task ('_EVENT', 'LAML') ==> 54.0 seed 0
task ('_EVENT', 'LAML') ==> 57.99999999999999 seed 1
task ('_EVENT', 'LAML') ==> 57.99999999999999 seed 2
task ('_EVENT', 'LAML') ==> 52.0 seed 3
Average task performance for epoch 100: [74.0, 55.0, 67.5, 55.5]
task ('_EVENT', 'LUS

# Weight-decay Experiment

In [126]:
epochs = 100
batch_size = 32
lr= 0.0001 # best_lr
num_layer = 2 # best num_layer
channels = [128,64] # best channels

In [127]:
wd_list=[0, 0.00005, 0.0005, 0.005, 0.05]
wd = []

for wd in wd_list:
    task_res = []
    for taskid in tasks_opt[:10]:
        task = meta_dataloader.TCGA.TCGATask(taskid)
        input_size = task._samples.shape[1]
        num_classes = len(collections.Counter(task._labels))
        try:
            train_set, valid_set, test_set = load_sets(task, valid=True)
            seed_res=[]
            for seed in range(0,4):
                MLP_model = MultiLayerPerceptron(seed, input_size, num_classes, num_layer, channels, lr, batch_size, epochs, patience, wd)
                trained_model, train_loss = train(MLP_model, train_set, task.id, False)
                mlp_result = test(trained_model, valid_set)
                print('task {} ==> {} seed {}' .format(taskid, mlp_result, seed))
                seed_res.append(mlp_result)
            task_res.append(np.mean(seed_res))
            print("Average task performance for weight decay {}: {}" .format(wd, task_res))
        except:
            print("Not enough number of samples")
        
    wd_res.append(np.mean(task_res))
print("Weight Decay Average Performace: {}" .format(wd_res))
best_wd = wd_list[wd_res.index(max(wd_res))]
print("The most optimal value of weight decay: {}" .format(best_wd))

task ('PAM50Call_RNAseq', 'BRCA') ==> 78.0 seed 0
task ('PAM50Call_RNAseq', 'BRCA') ==> 78.0 seed 1
task ('PAM50Call_RNAseq', 'BRCA') ==> 74.0 seed 2
task ('PAM50Call_RNAseq', 'BRCA') ==> 72.0 seed 3
Average task performance for weight decay 0: [75.5]
Not enough number of samples
task ('_EVENT', 'ESCA') ==> 62.0 seed 0
task ('_EVENT', 'ESCA') ==> 57.99999999999999 seed 1
task ('_EVENT', 'ESCA') ==> 56.00000000000001 seed 2
task ('_EVENT', 'ESCA') ==> 57.99999999999999 seed 3
Average task performance for weight decay 0: [75.5, 58.5]
task ('_EVENT', 'GBMLGG') ==> 66.0 seed 0
task ('_EVENT', 'GBMLGG') ==> 68.0 seed 1
task ('_EVENT', 'GBMLGG') ==> 68.0 seed 2
task ('_EVENT', 'GBMLGG') ==> 68.0 seed 3
Average task performance for weight decay 0: [75.5, 58.5, 67.5]
task ('_EVENT', 'LAML') ==> 57.99999999999999 seed 0
task ('_EVENT', 'LAML') ==> 50.0 seed 1
task ('_EVENT', 'LAML') ==> 57.99999999999999 seed 2
task ('_EVENT', 'LAML') ==> 56.00000000000001 seed 3
Average task performance for we

task ('_EVENT', 'GBMLGG') ==> 68.0 seed 1
task ('_EVENT', 'GBMLGG') ==> 68.0 seed 2
task ('_EVENT', 'GBMLGG') ==> 70.0 seed 3
Average task performance for weight decay 0.005: [74.0, 57.0, 68.5]
task ('_EVENT', 'LAML') ==> 57.99999999999999 seed 0
task ('_EVENT', 'LAML') ==> 56.00000000000001 seed 1
task ('_EVENT', 'LAML') ==> 56.00000000000001 seed 2
task ('_EVENT', 'LAML') ==> 52.0 seed 3
Average task performance for weight decay 0.005: [74.0, 57.0, 68.5, 55.5]
task ('_EVENT', 'LUSC') ==> 60.0 seed 0
task ('_EVENT', 'LUSC') ==> 60.0 seed 1
task ('_EVENT', 'LUSC') ==> 62.0 seed 2
task ('_EVENT', 'LUSC') ==> 56.00000000000001 seed 3
Average task performance for weight decay 0.005: [74.0, 57.0, 68.5, 55.5, 59.5]
task ('_EVENT', 'UCEC') ==> 82.0 seed 0
task ('_EVENT', 'UCEC') ==> 82.0 seed 1
task ('_EVENT', 'UCEC') ==> 80.0 seed 2
task ('_EVENT', 'UCEC') ==> 80.0 seed 3
Average task performance for weight decay 0.005: [74.0, 57.0, 68.5, 55.5, 59.5, 81.0]
task ('_PANCAN_DNAMethyl_LUSC', 'L

In [None]:
#for lr in [0.000001,0.00001,0.0001,0.001,0.01]:
#    for seed in [0,1]:
#        model = models.mlp.MLP(name="MLP_lay2_chan512",
                               num_layer=1, 
                               channels=256, 
                               lr=lr,
                               patience=50,
                               cuda=True,
                               metric=sklearn.metrics.accuracy_score,
                               verbose=False,
                               seed=seed)

#        model.fit(X_train, y_train)
#
#        y_valid_pred = model.predict(X_valid)
#        print(seed, lr, sklearn.metrics.accuracy_score(y_valid, np.argmax(y_valid_pred,axis=1)))