In [1]:
1*2

2

In [2]:
import sys
sys.path.insert(1, '../')

import os
import GPUtil
import importlib

def sgpu():
    GPUtil.showUtilization()

def rl(module):
    importlib.reload(module)

In [3]:
sgpu()

| ID | GPU | MEM |
------------------
|  0 |  0% |  0% |
|  1 |  0% |  0% |
|  2 |  0% |  0% |
|  3 |  0% |  0% |
|  4 |  0% |  0% |
|  5 |  0% |  0% |
|  6 |  0% |  0% |
|  7 |  0% |  0% |


In [4]:
import pandas as pd
import os
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure

import torch
import gpytorch

from scipy.stats import pearsonr
from scipy.special import binom as binom
from sklearn.metrics import r2_score as r2
from sklearn.metrics import mean_squared_error as mse
from sklearn.model_selection import train_test_split

In [5]:
import EpiK.models as models

In [6]:
output_device = 0
n_devices = torch.cuda.device_count()
models.set_params(output_device, n_devices)
print("number of GPUs = {}; output device = {}".
      format(n_devices, torch.cuda.current_device()))

number of GPUs = 8; output device = 0


In [7]:
from EpiK.functions import get_data, get_envs, set_data_path
set_data_path("../matsui_data/")

### Data

In [8]:
env_list = get_envs()
env = env_list[5]

In [9]:
geno_t, pheno = get_data(env)

  geno_t = torch.tensor(geno_t, dtype=torch.float)


In [10]:
inds_sub = np.where(np.array(pheno.pheno < -0.6) == False)[0]

### Get R2 curve

In [11]:
import EpiK.functions
rl(EpiK.functions)
from EpiK.functions import train_model_cv

EpiK.functions.output_device = output_device
EpiK.functions.n_devices = n_devices

In [12]:
# training sizes

props = [.01, .05, .1, .2, .3, .4, .5, .6, .7, .8, .9, .99]

In [13]:
# check_point sizes

partitions = [2, 2, 2, 2, 4, 4, 4, 4, 16, 32, 64, 80]

In [14]:
pd.DataFrame({"props":props, "partitions":partitions})

Unnamed: 0,props,partitions
0,0.01,2
1,0.05,2
2,0.1,2
3,0.2,2
4,0.3,4
5,0.4,4
6,0.5,4
7,0.6,4
8,0.7,16
9,0.8,32


In [15]:
r2_score_list = []

In [16]:
from EpiK.kernels import DiKernel
ker = DiKernel()
ker.raw_lda = torch.nn.Parameter(torch.tensor(-8.))
ker.raw_eta = torch.nn.Parameter(torch.tensor(-12.))

In [None]:
for i in range(11, len(props)):
    print("working on training proportion %f"%round(props[i],2))

    # define training data
    np.random.seed(100)
    train_size = np.round(props[i]*len(inds_sub)).astype('int')
    sub = np.random.choice(inds_sub, train_size)
    sub_t = np.random.choice(list(set(inds_sub).difference(sub)), 4000)
    train_x = geno_t[sub]
    train_y = torch.tensor(np.array(pheno.pheno[sub]), dtype=torch.float32)
    test_x = geno_t[sub_t]
    test_y = torch.tensor(np.array(pheno.pheno[sub_t]), dtype=torch.float32)
    train_x, train_y = train_x.contiguous(), train_y.contiguous()
    test_x, test_y = test_x.contiguous(), test_y.contiguous()
    train_x, train_y = train_x.to(output_device), train_y.to(output_device)
    test_x, test_y = test_x.to(output_device), test_y.to(output_device)

    # train kernel
    ker, likelihood = train_model_cv(ker, train_x, train_y, 50, .1)

    print("raw_lda = %f"%round(ker.state_dict()['raw_lda'].item(),3), "and", "raw_eta = %f"%round(ker.state_dict()['raw_eta'].item(),3))

    torch.cuda.empty_cache()

    model = models.ExactGPModel(train_x, train_y, likelihood, ker).to(output_device)

    test_x = test_x.cuda()
    model.eval()
    likelihood.eval()

    with gpytorch.beta_features.checkpoint_kernel(train_x.shape[0]//partitions[i]):
        f_preds = model(test_x)

    f_mean = f_preds.mean.cpu().detach().numpy()
    y_test = test_y.detach().cpu().numpy()
    r2_score = r2(y_test, f_mean)
    print('training size = %i'%train_size,'; R2 = %f'%r2_score)
    r2_score_list.append(r2_score)

    del model, likelihood, f_preds
    torch.cuda.empty_cache()

    sgpu() 

working on training proportion 0.990000
working on iteration 0
working on iteration 10
working on iteration 20
working on iteration 30
working on iteration 40
raw_lda = -7.717000 and raw_eta = -10.184000




In [None]:
r2_score_list = [r2_score]
dic = {"tr_prop":0.99, "r2_score":r2_score}

import pandas as pd
table = pd.DataFrame(dic)
table.to_csv("~/r2_epik_.99_220806.csv", header=True, index=None)

In [None]:
r2_list = {}
r2_list[props[i]] = r2_score

In [None]:
r2_score

In [19]:
r2_score

0.6501406457842309

In [36]:
r2_list = np.zeros(len(props))

In [39]:
r2_list[:len(r2_score_list)] = r2_score_list

In [40]:
results = pd.DataFrame({"props":props, "train_size":np.round(geno_t.shape[0]*np.array(props)).astype("int"), "r2":r2_list})

results.to_csv("r2s_epik.csv", index=None)

In [21]:
torch.cuda.empty_cache()

In [22]:
sgpu()

| ID | GPU | MEM  |
-------------------
|  0 |  0% | 100% |
|  1 |  0% |  19% |
|  2 |  0% |  19% |
|  3 |  0% |  19% |
|  4 |  0% |  19% |
|  5 |  0% |  19% |
|  6 |  0% |  19% |
|  7 |  0% |  19% |


### Without loop

In [24]:
# i=10

# # define training data
# np.random.seed(100)
# train_size = np.round(props[i]*len(inds_sub)).astype('int')
# sub = np.random.choice(inds_sub, train_size)
# sub_t = np.random.choice(list(set(inds_sub).difference(sub)), 5000)
# train_x = geno_t[sub]
# train_y = torch.tensor(np.array(pheno.pheno[sub]), dtype=torch.float32)
# test_x = geno_t[sub_t]
# test_y = torch.tensor(np.array(pheno.pheno[sub_t]), dtype=torch.float32)
# train_x, train_y = train_x.contiguous(), train_y.contiguous()
# test_x, test_y = test_x.contiguous(), test_y.contiguous()
# train_x, train_y = train_x.to(output_device), train_y.to(output_device)
# test_x, test_y = test_x.to(output_device), test_y.to(output_device)

# # # train kernel
# # ker, likelihood = train_model_cv(ker, train_x, train_y, 50, .1)

# # print("raw_lda = %f"%round(ker.state_dict()['raw_lda'].item(),3), "and", "raw_eta = %f"%round(ker.state_dict()['raw_eta'].item(),3))


# sgpu()

# likelihood = gpytorch.likelihoods.GaussianLikelihood().to(output_device)
# model = models.ExactGPModel(train_x, train_y, likelihood, ker).to(output_device)


# test_x = test_x.cuda()
# model.eval()
# likelihood.eval()

# with gpytorch.beta_features.checkpoint_kernel(train_x.shape[0]//50):
#     f_preds = model(test_x)

# f_mean = f_preds.mean.cpu().detach().numpy()
# y_test = test_y.detach().cpu().numpy()
# r2_score = r2(y_test, f_mean)
# print('training size = %i'%train_size,'; R2 = %f'%r2_score)
# r2_score_list.append(r2_score)

# del model, likelihood, f_preds
# torch.cuda.empty_cache()

# sgpu() 