# With 10 - fold - cross - validation

In [1]:
import numpy as np
from numpy import random
import pandas as pd

from sklearn.model_selection import KFold
import torch.utils.data
import matplotlib
import os
import sys
matplotlib.use('Agg')

import aitac
import plot_utils

In [2]:
#create output directory
output_file_path = "../outputs/valid10x10/"
directory = os.path.dirname(output_file_path)
if not os.path.exists(directory):
    print("Creating directory %s" % output_file_path)
    os.makedirs(directory)
else:
     print("Directory %s exists" % output_file_path)

Directory ../outputs/valid10x10/ exists


In [3]:
# Device configuration
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')


In [4]:
print(device)

cpu


In [5]:
# Hyper parameters
num_epochs = 10
num_classes = 141
batch_size = 10
learning_rate = 0.001
num_filters = 300
run_num = 'first'


In [6]:
# Load all data
x = np.load('../BRCA_data/mini_sample_one_hot_seqs.npy')
x = x.astype(np.float32)
y = np.load('../BRCA_data/mini_sample_cell_type_array.npy')
y = y.astype(np.float32)
peak_names = np.load('../BRCA_data/mini_sample_peak_names.npy')


In [7]:
def cross_validate(x, y, peak_names, output_file_path):
    kf = KFold(n_splits=10, shuffle=True, random_state = 123)

    pred_all = []
    corr_all = []
    peak_order = []
    for train_index, test_index in kf.split(x):
        train_data, eval_data = x[train_index, :, :], x[test_index, :, :]
        train_labels, eval_labels = y[train_index, :], y[test_index, :]
        train_names, eval_name = peak_names[train_index], peak_names[test_index]

        # Data loader
        train_dataset = torch.utils.data.TensorDataset(torch.from_numpy(train_data), torch.from_numpy(train_labels))
        print(train_dataset)
        train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=False, num_workers=0)
        print(train_loader)
        
        eval_dataset = torch.utils.data.TensorDataset(torch.from_numpy(eval_data), torch.from_numpy(eval_labels))
        eval_loader = torch.utils.data.DataLoader(dataset=eval_dataset, batch_size=batch_size, shuffle=False, num_workers=0)


        # create model 
        model = aitac.ConvNet(num_classes, num_filters).to(device)

        # Loss and optimizer
        criterion = aitac.pearson_loss
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

        # train model
        '''
        model, best_loss = aitac.train_model(train_loader, eval_loader, model, device, criterion,  optimizer, num_epochs, output_file_path)

        # Predict on test set
        predictions, max_activations, max_act_index = aitac.test_model(eval_loader, model, device)
        
        # plot the correlations histogram
        correlations = plot_utils.plot_cors(eval_labels, predictions, output_file_path)

        pred_all.append(predictions)
        corr_all.append(correlations)
        peak_order.append(eval_name)
    
    pred_all = np.vstack(pred_all)
    corr_all = np.hstack(corr_all)
    peak_order = np.hstack(peak_order)

    return pred_all, corr_all, peak_order
    '''



In [8]:
cross_validate(x, y, peak_names, output_file_path) # 12' | 17.34

<torch.utils.data.dataset.TensorDataset object at 0x7f5794507280>
<torch.utils.data.dataloader.DataLoader object at 0x7f5710998c70>
<torch.utils.data.dataset.TensorDataset object at 0x7f5710998b80>
<torch.utils.data.dataloader.DataLoader object at 0x7f5711047100>
<torch.utils.data.dataset.TensorDataset object at 0x7f5794494520>
<torch.utils.data.dataloader.DataLoader object at 0x7f5710998d00>
<torch.utils.data.dataset.TensorDataset object at 0x7f5794494400>
<torch.utils.data.dataloader.DataLoader object at 0x7f57944f7400>
<torch.utils.data.dataset.TensorDataset object at 0x7f57944f7040>
<torch.utils.data.dataloader.DataLoader object at 0x7f57109987f0>
<torch.utils.data.dataset.TensorDataset object at 0x7f5710998c40>
<torch.utils.data.dataloader.DataLoader object at 0x7f5710998c70>
<torch.utils.data.dataset.TensorDataset object at 0x7f57944fce80>
<torch.utils.data.dataloader.DataLoader object at 0x7f5711047340>
<torch.utils.data.dataset.TensorDataset object at 0x7f57944fcdc0>
<torch.uti

In [7]:
predictions, correlations, peak_order = cross_validate(x, y, peak_names, output_file_path) # 12' | 17.34

None
None
None
None
None
None
None
None
None
None


TypeError: cannot unpack non-iterable NoneType object

In [10]:
print(torch.utils.data.get_worker_info())

None


In [8]:
np.save(output_file_path + "predictions_trial" + run_num + ".npy", predictions)
np.save(output_file_path + "correlations_trial" + run_num + ".npy", correlations)
np.save(output_file_path + "peak_order" + run_num + ".npy", peak_order)

In [9]:
pre_tr = np.load(output_file_path + "predictions_trial" + run_num + ".npy")


In [10]:
print(pre_tr.shape)
print(pre_tr)

(100, 141)
[[-2.1759357  -2.2475684   1.3170478  ...  0.92555666  0.48664138
   0.4101336 ]
 [-3.8181725  -4.0997725   1.5997989  ...  1.3199861   1.0771232
   1.0127945 ]
 [-3.7817397  -3.943818    1.4862937  ...  1.2669295   1.1656965
   1.1460701 ]
 ...
 [-0.5498084  -0.66493     0.09440688 ...  0.28931335  0.2036487
   0.19308531]
 [-0.16605252 -0.2045662   0.10259235 ...  0.16371793 -0.03135055
  -0.03287304]
 [-0.14722002 -0.13741028  0.21233973 ...  0.02480217 -0.13214831
  -0.10751709]]


In [11]:
cor = np.load(output_file_path + "correlations_trial" + run_num + ".npy")

In [17]:
print(cor.shape)
print(cor)

(100,)
[ 0.11152916  0.20347479  0.7627214   0.64401011  0.74574792  0.67680965
  0.52909116  0.52269603  0.434583    0.6932973   0.14449857 -0.02118328
  0.1050344   0.47564544  0.59199979  0.64276615  0.39676618  0.59357292
  0.57092798  0.30739773 -0.27065144  0.60161778  0.64359082  0.51851467
  0.63183773  0.70501281  0.44582928  0.6512335   0.64564099  0.40760471
  0.55978016  0.5353844   0.73902046  0.67735676  0.74258931  0.63297987
  0.65288257  0.46116633  0.53141075  0.4925484   0.61032634  0.34804016
  0.27505474  0.54817038  0.48851004  0.44129074  0.44406787  0.68675175
  0.67973497  0.36224065  0.05119919  0.75699778  0.5358313   0.55007052
  0.55506001  0.75840302  0.73609826  0.46285019  0.56945155  0.71048137
  0.54536291  0.45592709  0.597265    0.7419421   0.68726956  0.656477
  0.30166545  0.51630278  0.61866179  0.48581838 -0.119609    0.62682722
  0.38330817  0.55082049  0.55513511  0.6083669   0.5360829   0.67323939
  0.59680607  0.22580315 -0.01939756  0.661239