In [4]:
import matplotlib.pyplot as plt

## SVR

In [10]:
%%writefile ../scripts/svr.py
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVR
import numpy as np
import pandas as pd
import sys
import os

C = (1, 5, 10, 20, 50)
kernel = ('linear', 'rbf', 'sigmoid')
param_grid = {'C':C, 'kernel':kernel}
svr = SVR()
cvmodel = GridSearchCV(svr, param_grid, refit=True, cv=5)
model_name = sys.argv[0].split('/')[-1].replace('.py','')
path = sys.argv[1]
fold = sys.argv[2]
f_id = sys.argv[3]
trn_X = np.load(path+'data/fold_'+fold+'/train/X/'+f_id+'.npy')
trn_y = np.load(path+'data/fold_'+fold+'/train/y/'+f_id+'.npy')
tst_X = np.load(path+'data/fold_'+fold+'/test/X/'+f_id+'.npy')
scaler = pd.read_pickle(path+'data/fold_'+fold+'/scaler/'+f_id+'.pickle')

cvmodel.fit(trn_X, trn_y.ravel())
pred_y = scaler.inverse_transform(cvmodel.predict(tst_X))

if not os.path.exists(path+'results/'+model_name+'/fold_'+fold+'/'):
    os.makedirs(path+'results/'+model_name+'/fold_'+fold+'/')
np.save(path+'results/'+model_name+'/fold_'+fold+'/'+f_id+'.npy', pred_y)
pd.to_pickle(cvmodel, path+'results/'+model_name+'/fold_'+fold+'/'+f_id+'.model')

Overwriting ../scripts/svr.py


### Random Forest

In [3]:
%%writefile ../scripts/rf.py
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor
import numpy as np
import pandas as pd
import sys
import os

param_grid = {'n_estimators':[50,100,150,200], 
              'max_depth':[1,2,3,4,5,20],
              'max_features':[1,2,3,4,5,6]}
rf = RandomForestRegressor(random_state=0)

cvmodel = GridSearchCV(rf, param_grid, refit=True, cv=5)

model_name = sys.argv[0].split('/')[-1].replace('.py','')
path = sys.argv[1]
fold = sys.argv[2]
f_id = sys.argv[3]

trn_X = np.load(path+'data/fold_'+fold+'/train/X/'+f_id+'.npy')
trn_y = np.load(path+'data/fold_'+fold+'/train/y/'+f_id+'.npy')
tst_X = np.load(path+'data/fold_'+fold+'/test/X/'+f_id+'.npy')

scaler = pd.read_pickle(path+'data/fold_'+fold+'/scaler/'+f_id+'.pickle')

cvmodel.fit(trn_X, trn_y.ravel())

pred_y = scaler.inverse_transform(cvmodel.predict(tst_X))

if not os.path.exists(path+'results/'+model_name+'/fold_'+fold+'/'):
    os.makedirs(path+'results/'+model_name+'/fold_'+fold+'/')

np.save(path+'results/'+model_name+'/fold_'+fold+'/'+f_id+'.npy', pred_y)
pd.to_pickle(cvmodel, path+'results/'+model_name+'/fold_'+fold+'/'+f_id+'.model')

Writing ../scripts/rf.py


## Decision tree

In [13]:
%%writefile ../scripts/dt.py
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeRegressor
import numpy as np
import pandas as pd
import sys
import os

param_grid = {'max_depth':[1,2,3,4,5,20],
              'max_features':[1,2,3,4,5,6]}
dt = DecisionTreeRegressor(random_state=0)

cvmodel = GridSearchCV(dt, param_grid, refit=True, cv=5)

model_name = sys.argv[0].split('/')[-1].replace('.py','')
path = sys.argv[1]
fold = sys.argv[2]
f_id = sys.argv[3]

trn_X = np.load(path+'data/fold_'+fold+'/train/X/'+f_id+'.npy')
trn_y = np.load(path+'data/fold_'+fold+'/train/y/'+f_id+'.npy')
tst_X = np.load(path+'data/fold_'+fold+'/test/X/'+f_id+'.npy')

scaler = pd.read_pickle(path+'data/fold_'+fold+'/scaler/'+f_id+'.pickle')

cvmodel.fit(trn_X, trn_y.ravel())

pred_y = scaler.inverse_transform(cvmodel.predict(tst_X))

if not os.path.exists(path+'results/'+model_name+'/fold_'+fold+'/'):
    os.makedirs(path+'results/'+model_name+'/fold_'+fold+'/')

np.save(path+'results/'+model_name+'/fold_'+fold+'/'+f_id+'.npy', pred_y)
pd.to_pickle(cvmodel, path+'results/'+model_name+'/fold_'+fold+'/'+f_id+'.model')

Writing ../scripts/dt.py


## Elastic net

In [11]:
%%writefile ../scripts/elst.py
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import ElasticNet 
import numpy as np
import pandas as pd
import sys
import os

param_grid = {'alpha':[0.1,0.5,1,5,10], 
              'l1_ratio':[0.1,0.3,0.5,0.7,0.9]}
elst = ElasticNet(random_state=0)

cvmodel = GridSearchCV(elst, param_grid, refit=True, cv=5)

model_name = sys.argv[0].split('/')[-1].replace('.py','')
path = sys.argv[1]
fold = sys.argv[2]
f_id = sys.argv[3]

trn_X = np.load(path+'data/fold_'+fold+'/train/X/'+f_id+'.npy')
trn_y = np.load(path+'data/fold_'+fold+'/train/y/'+f_id+'.npy')
tst_X = np.load(path+'data/fold_'+fold+'/test/X/'+f_id+'.npy')

scaler = pd.read_pickle(path+'data/fold_'+fold+'/scaler/'+f_id+'.pickle')

cvmodel.fit(trn_X, trn_y.ravel())

pred_y = scaler.inverse_transform(cvmodel.predict(tst_X))

if not os.path.exists(path+'results/'+model_name+'/fold_'+fold+'/'):
    os.makedirs(path+'results/'+model_name+'/fold_'+fold+'/')

np.save(path+'results/'+model_name+'/fold_'+fold+'/'+f_id+'.npy', pred_y)
pd.to_pickle(cvmodel, path+'results/'+model_name+'/fold_'+fold+'/'+f_id+'.model')

Writing ../scripts/elst.py


## GP - Linear

In [3]:
%%writefile ../scripts/gp_linear.py
from stheno import Measure, GP, Linear, Delta
import numpy as np
import pandas as pd
import sys
import os
np.random.seed(0)


model_name = sys.argv[0].split('/')[-1].replace('.py','')
path = sys.argv[1]
fold = sys.argv[2]
f_id = sys.argv[3]

trn_X = np.load(path+'data/fold_'+fold+'/train/X/'+f_id+'.npy')
trn_y = np.load(path+'data/fold_'+fold+'/train/y/'+f_id+'.npy')
tst_X = np.load(path+'data/fold_'+fold+'/test/X/'+f_id+'.npy')

scaler = pd.read_pickle(path+'data/fold_'+fold+'/scaler/'+f_id+'.pickle')

prior = Measure()                  # Construct a prior.
f1 = GP(Linear(), measure=prior)        # Define our probabilistic model.
f2 = GP(Delta(), measure=prior)
f = f1+f2
post = prior | (f(trn_X), trn_y)           # Compute the posterior distribution.
pred = post(f).mean(tst_X).mat

pred_y = scaler.inverse_transform(pred)

if not os.path.exists(path+'results/'+model_name+'/fold_'+fold+'/'):
    os.makedirs(path+'results/'+model_name+'/fold_'+fold+'/')

np.save(path+'results/'+model_name+'/fold_'+fold+'/'+f_id+'.npy', pred_y)
# pd.to_pickle(model.param_array, path+'results/'+model_name+'/fold_'+fold+'/'+f_id+'.model')

Overwriting ../scripts/gp_linear.py


## GP - RBF

In [4]:
%%writefile ../scripts/gp_rbf.py
from stheno import Measure, GP, EQ, Delta
import numpy as np
import pandas as pd
import sys
import os
np.random.seed(0)

model_name = sys.argv[0].split('/')[-1].replace('.py','')
path = sys.argv[1]
fold = sys.argv[2]
f_id = sys.argv[3]

trn_X = np.load(path+'data/fold_'+fold+'/train/X/'+f_id+'.npy')
trn_y = np.load(path+'data/fold_'+fold+'/train/y/'+f_id+'.npy')
tst_X = np.load(path+'data/fold_'+fold+'/test/X/'+f_id+'.npy')

scaler = pd.read_pickle(path+'data/fold_'+fold+'/scaler/'+f_id+'.pickle')

prior = Measure()                  # Construct a prior.
f1 = GP(EQ(), measure=prior)        # Define our probabilistic model.
f2 = GP(Delta(), measure=prior)
f = f1+f2
post = prior | (f(trn_X), trn_y)           # Compute the posterior distribution.
pred = post(f).mean(tst_X).mat

pred_y = scaler.inverse_transform(pred)

if not os.path.exists(path+'results/'+model_name+'/fold_'+fold+'/'):
    os.makedirs(path+'results/'+model_name+'/fold_'+fold+'/')

np.save(path+'results/'+model_name+'/fold_'+fold+'/'+f_id+'.npy', pred_y)
# pd.to_pickle(model.param_array, path+'results/'+model_name+'/fold_'+fold+'/'+f_id+'.model')

Overwriting ../scripts/gp_rbf.py


## GP - Matern12

In [5]:
%%writefile ../scripts/gp_m12.py
from stheno import Measure, GP, Matern12, Delta
import numpy as np
import pandas as pd
import sys
import os
np.random.seed(0)

model_name = sys.argv[0].split('/')[-1].replace('.py','')
path = sys.argv[1]
fold = sys.argv[2]
f_id = sys.argv[3]

trn_X = np.load(path+'data/fold_'+fold+'/train/X/'+f_id+'.npy')
trn_y = np.load(path+'data/fold_'+fold+'/train/y/'+f_id+'.npy')
tst_X = np.load(path+'data/fold_'+fold+'/test/X/'+f_id+'.npy')

scaler = pd.read_pickle(path+'data/fold_'+fold+'/scaler/'+f_id+'.pickle')

prior = Measure()                  # Construct a prior.
f1 = GP(Matern12(), measure=prior)        # Define our probabilistic model.
f2 = GP(Delta(), measure=prior)
f = f1+f2
post = prior | (f(trn_X), trn_y)           # Compute the posterior distribution.
pred = post(f).mean(tst_X).mat

pred_y = scaler.inverse_transform(pred)

if not os.path.exists(path+'results/'+model_name+'/fold_'+fold+'/'):
    os.makedirs(path+'results/'+model_name+'/fold_'+fold+'/')

np.save(path+'results/'+model_name+'/fold_'+fold+'/'+f_id+'.npy', pred_y)
# pd.to_pickle(model.param_array, path+'results/'+model_name+'/fold_'+fold+'/'+f_id+'.model')

Overwriting ../scripts/gp_m12.py


## GP - Matern32

In [6]:
%%writefile ../scripts/gp_m32.py
from stheno import Measure, GP, Matern32, Delta
import numpy as np
import pandas as pd
import sys
import os
np.random.seed(0)

model_name = sys.argv[0].split('/')[-1].replace('.py','')
path = sys.argv[1]
fold = sys.argv[2]
f_id = sys.argv[3]

trn_X = np.load(path+'data/fold_'+fold+'/train/X/'+f_id+'.npy')
trn_y = np.load(path+'data/fold_'+fold+'/train/y/'+f_id+'.npy')
tst_X = np.load(path+'data/fold_'+fold+'/test/X/'+f_id+'.npy')

scaler = pd.read_pickle(path+'data/fold_'+fold+'/scaler/'+f_id+'.pickle')

prior = Measure()                  # Construct a prior.
f1 = GP(Matern32(), measure=prior)        # Define our probabilistic model.
f2 = GP(Delta(), measure=prior)
f = f1+f2
post = prior | (f(trn_X), trn_y)           # Compute the posterior distribution.
pred = post(f).mean(tst_X).mat

pred_y = scaler.inverse_transform(pred)

if not os.path.exists(path+'results/'+model_name+'/fold_'+fold+'/'):
    os.makedirs(path+'results/'+model_name+'/fold_'+fold+'/')

np.save(path+'results/'+model_name+'/fold_'+fold+'/'+f_id+'.npy', pred_y)
# pd.to_pickle(model.param_array, path+'results/'+model_name+'/fold_'+fold+'/'+f_id+'.model')

Overwriting ../scripts/gp_m32.py


### NSGP

In [19]:
%%writefile ../scripts/nsgp_rbf.py
# from stheno import Measure, GP, EQ, Delta
import GPy
from polire.placement.base import Base
from NSGPy.NumPy import LLS
import numpy as np
import pandas as pd
import sys
import os
np.random.seed(0)

model_name = sys.argv[0].split('/')[-1].replace('.py','')
path = sys.argv[1]
fold = sys.argv[2]
f_id = sys.argv[3]
N_l_bar_max = 8

trn_X = np.load(path+'data/fold_'+fold+'/train/X/'+f_id+'.npy')
trn_y = np.load(path+'data/fold_'+fold+'/train/y/'+f_id+'.npy')
tst_X = np.load(path+'data/fold_'+fold+'/test/X/'+f_id+'.npy')

scaler = pd.read_pickle(path+'data/fold_'+fold+'/scaler/'+f_id+'.pickle')
 
m = GPy.models.GPRegression(trn_X, trn_y, GPy.kern.RBF(trn_X.shape[1], ARD=True))
m.optimize()
K = m.kern.K(trn_X)

greedy = Base(verbose=False)
greedy.cov_np = K
inds, _ = greedy.place(trn_X, N=20)

best_nlml = np.inf
best_model = None
for n in range(2,21):
    model = LLS(trn_X.shape[1], N_l_bar=n, N_l_bar_method='greedy')
    try:
        model.fit(trn_X, trn_y, n_restarts=2, near_opt_inds=inds[:n])
    except:
        continue
    nlml = model.params['likelihood (mll)']
    print(nlml)
    if nlml< best_nlml:
        best_nlml = nlml
        best_model = model
        if not os.path.exists(path+'results/'+model_name+'/fold_'+fold+'/'):
            os.makedirs(path+'results/'+model_name+'/fold_'+fold+'/')
#         pd.to_pickle(model.params, path+'results/'+model_name+'/fold_'+fold+'/'+f_id+'.model')

pred_y = scaler.inverse_transform(best_model.predict(tst_X)[0])

if not os.path.exists(path+'results/'+model_name+'/fold_'+fold+'/'):
    os.makedirs(path+'results/'+model_name+'/fold_'+fold+'/')

# np.save(path+'results/'+model_name+'/fold_'+fold+'/'+f_id+'.npy', pred_y)

Overwriting ../scripts/nsgp_rbf.py
