### Additive + Dominant

In [65]:
torch.tensor(float('inf'))

tensor(inf)

In [69]:
parameter=torch.nn.Parameter(torch.zeros(1, 1, 1))

In [70]:
parameter

Parameter containing:
tensor([[[0.]]], requires_grad=True)

In [None]:
from gpytorch.constraints import Positive
from gpytorch.constraints import LessThan

class K11(Kernel):
    """Diploid kernel"""

    is_stationary = True

    # We will register the parameter when initializing the kernel
    def __init__(self, 
                par_prior=None, par_constraint=None, 
                **kwargs):
      super().__init__(**kwargs)

      # register the raw parameter
      self.register_parameter(
          name='raw_par', 
          parameter=torch.nn.Parameter(torch.zeros(*self.batch_shape, 2))
      )

      # set the parameter constraint to be positive, when nothing is specified
      if par_constraint is None:
          par_constraint = LessThan(upper_bound=0.)

      # register the constraint
      self.register_constraint("raw_par", par_constraint)


    # now set up the 'actual' paramter
    @property
    def par(self):
      # when accessing the parameter, apply the constraint transform
      return self.raw_par_constraint.transform(self.raw_par)
    @par.setter
    def par(self, value):
      return self._set_par(value)


    def forward(self, geno1, geno2, **params):
        L = geno1.shape[1]
        geno1_ht = 1.*(geno1 == 1.)
        geno2_ht = 1.*(geno2 == 1.)        
        geno1_h0 = 1.*(geno1 == 0.)
        geno1_h1 = 1.*(geno1 == 2.)
        geno2_h0 = 1.*(geno2 == 0.)
        geno2_h1 = 1.*(geno2 == 2.)

        S1 = self.covar_dist(geno1_ht, geno2_ht, **params)
        S2 = self.covar_dist(geno1_h0, geno2_h0, **params) + self.covar_dist(geno1_h1, geno2_h1, **params)
        D2 = self.covar_dist(geno1_h0, geno2_h1, **params) + self.covar_dist(geno1_h1, geno2_h0, **params)
        D1 = L - S1 - S2 - D2


        return par[0]*k_1_0(S1, S2, D1, D2) + par[1]*k_0_1(S1, S2, D1, D2)


In [None]:
def k_1_1(S1, S2, D2, L):
    D1 = L - S1 - S2 - D2
    return 2*(1 + 2*D1 -L)*(D2 - S2)


In [None]:
def k_1_0(S1, S2, D1, D2):
    return -2*D2 + 2*S2

def k_0_1(S1, S2, D1, D2):
    return -2*D1 + L

def k_2_0(S1, S2, D1, D2):
    return 2*(D2 - 1) - 4*D2*S2 + 2*(S2 -1)*S2

def k_1_1(S1, S2, D1, D2):
    return 2*(1 + 2*D1 -L)*(D2 - S2)

def k_0_2(S1, S2, D1, D2):
    return 2*D1**2 - 2*L*D1 + .5*(L-1)*L

### Pairwise Epistatic

### Pairwise Epistatic + Dominant

### All epistatic

### All dominant

### Low order + All order

In [1]:
import pandas as pd
import os
import gc
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
import GPUtil

import torch
import gpytorch

from scipy.stats import pearsonr
from scipy.special import binom as binom
from sklearn.metrics import r2_score as r2
from sklearn.metrics import mean_squared_error as mse
from sklearn.model_selection import train_test_split

In [2]:
import sys
sys.path.insert(1, '../')
import EpiK.models as models

In [3]:
output_device = 0
n_devices = torch.cuda.device_count()
models.set_params(output_device, n_devices)
print("number of GPUs = {}; output device = {}".
      format(n_devices, torch.cuda.current_device()))

number of GPUs = 8; output device = 0


In [4]:
from EpiK.functions import get_data, get_envs, set_data_path
set_data_path("../matsui_data/")

In [5]:
# training sizes
props = [.01, .05, .1, .2, .3, .4, .5, .6, .7, .8, .9, .95]

In [6]:
# check_point sizes
partitions = [2, 2, 2, 2, 4, 4, 4, 4, 16, 32, 64, 120]
pd.DataFrame({"props":props, "partitions":partitions}).to_csv("partition_sizes.csv", index=None)

In [7]:
partition_sizes = pd.read_csv("partition_sizes.csv")

### Data

In [9]:
env_list = get_envs()
env = env_list[5]

In [10]:
geno_t, pheno = get_data(env)

  geno_t = torch.tensor(geno_t, dtype=torch.float)


In [11]:
inds_sub = np.where(np.array(pheno.pheno < -0.6) == False)[0]

In [12]:
GPUtil.showUtilization()

| ID | GPU | MEM |
------------------
|  0 |  0% | 22% |
|  1 |  0% |  0% |
|  2 |  0% |  0% |
|  3 |  0% |  0% |
|  4 |  0% |  0% |
|  5 |  0% |  0% |
|  6 |  0% |  0% |
|  7 |  0% |  0% |


In [31]:
def get_train_test(sub, sub_t):
    train_x = geno_t[sub]
    train_y = torch.tensor(np.array(pheno.pheno[sub]), dtype=torch.float32)
    test_x = geno_t[sub_t]
    test_y = torch.tensor(np.array(pheno.pheno[sub_t]), dtype=torch.float32)
    train_x, train_y = train_x.contiguous(), train_y.contiguous()
    test_x, test_y = test_x.contiguous(), test_y.contiguous()
    train_x, train_y = train_x.to(output_device), train_y.to(output_device)
    test_x, test_y = test_x.to(output_device), test_y.to(output_device)
    return train_x, test_x, train_y, test_y

In [32]:
i=0 
np.random.seed(100)
train_size = np.round(props[i]*len(inds_sub)).astype('int')
sub = np.random.choice(inds_sub, train_size)
sub_t = np.random.choice(list(set(inds_sub).difference(sub)), 2000)

In [33]:
train_x, test_x, train_y, test_y = get_train_test(sub, sub_t)

### Train model

In [36]:
import EpiK.functions
from EpiK.functions import train_model_cv

EpiK.functions.output_device = output_device
EpiK.functions.n_devices = n_devices

In [46]:
# define kernel

from EpiK.kernels import DiKernel
ker = DiKernel()
ker.raw_lda = torch.nn.Parameter(torch.tensor(-8.))
ker.raw_eta = torch.nn.Parameter(torch.tensor(-12.))

In [55]:
# define kernel

from EpiK.kernels import RBFKernel
ker = RBFKernel()

EpiK.kernels.L = geno_t.shape[1]
# ls = torch.tensor([[96.04]]).to(output_device)
# ker = gpytorch.kernels.RBFKernel().to(output_device)

# ker.lengthscale = ls

In [59]:
# define kernel

from EpiK.kernels import LinKernel
ker = LinKernel()

In [None]:
# train model
print("training GP model using CV")
ker, likelihood = train_model_cv(ker, train_x, train_y, 50, .1)

In [61]:
# make predictions - build model
torch.cuda.empty_cache()
model = models.ExactGPModel(train_x, train_y, likelihood, ker).to(output_device)

In [62]:
partition_size = partition_sizes.partitions[i]

model.eval()
likelihood.eval()
with gpytorch.beta_features.checkpoint_kernel(train_x.shape[0]//int(partition_size)):
    f_preds = model(test_x)

f_mean = f_preds.mean.cpu().detach().numpy()
y_test = test_y.detach().cpu().numpy()
r2_score = r2(y_test, f_mean)                
print(r2_score)



-4023169547572.15


In [64]:
def k_1_0(S1, S2, D1, D2):
    return -2*D2 + 2*S2

def k_0_1(S1, S2, D1, D2):
    return -2*D1 + L

def k_2_0(S1, S2, D1, D2):
    return 2*(d2 - 1) - 4*d2*s2 + 2*(s2 -1)*s2

def k_1_1(S1, S2, D1, D2):
    return 2*(1 + 2*d1 -L)*(D2 - S2)

def k_0_2(S1, S2, D1, D2):
    return 2*d1**2 - 2*L*d1 + .5*(L-1)*L