# Paper Numerical Results

Step 1: Install DASK etc (follow instructions in Dask.ipynb)

Step 2: Launch DASK server:
 
    dask-ec2 up --keyname research --keypair .ssh/research.pem --region-name eu-west-1 --ami ami-d37961b5 --tags research:dp --count 4 --volume-size 30 --type c4.8xlarge

Step 3: On the scheduler, run:

    pip install git+https://github.com/lionfish0/dask_dp4gp.git
    
Step 4: On the scheduler, run this notebook. In particular run this cell once: 

In [None]:
import os
runlist = ['pip install git+https://github.com/lionfish0/dask_dp4gp.git']

for item in runlist:
    print("Installing '%s' on workers..." % item)
    client.run(os.system,item)
    print("Installing '%s' on scheduler..." % item)
    client.run_on_scheduler(os.system,item)
    #os.system(item) #if you need to install it locally too

In [None]:
import dask_dp4gp

dask_dp4gp.install_libraries_on_workers()

Next run the following analysis:

In [1]:
from dp4gp.utils import dp_normalise, dp_unnormalise
from dp4gp import datasets
from dask.distributed import Client
from sklearn.model_selection import KFold
import dask_dp4gp
import numpy as np

client = Client('127.0.0.1:8786')

####Set up data and parameter search grid
kung = datasets.load_kung()
sensitivity = 100.0
y,ac_sens,norm_params = dp_normalise(kung[kung[:,3]==0,0:1],sensitivity)
X = kung[kung[:,3]==0,1:3]

#todo these don't do anything - shift to get used a p_grid items
epsilon = 1.0
delta = 0.01
cv = 3
p_grid = {"lengthscale":[], 'variance':[]}#, 'noisevariance':[]}
for ls in 5.0**np.arange(0,2):
    p_grid["lengthscale"].append(ls)
for v in 5.0**np.arange(-1,1):
    p_grid["variance"].append(v)
    
####Get the -RMSE for each fold/param-combo
scores = dask_dp4gp.getscores(X,y,p_grid,cv,ac_sens)

kf = KFold(n_splits=cv)
probabilities = []
for train_index, test_index in kf.split(X):
    X_train = X[train_index]
    y_train = y[train_index]
    probabilities.append(dask_dp4gp.getprobabilities(X_train,y_train,p_grid,5,ac_sens))
    
print(np.array(probabilities))
print(scores)
print(np.sum(probabilities*-scores,1))
print(np.mean(-scores,1))

[[ 0.0042492   0.02870927  0.2367837   0.69135983]
 [ 0.02662131  0.20103365  0.24542001  0.69299734]
 [ 0.048709    0.43837831  0.33126626  0.84603282]
 [ 0.08270285  0.48933589  0.38952283  1.31397035]
 [ 0.19239383  1.25068521  0.42003588  1.81254158]]
[[  1.04379140e-03   9.39256749e-03   1.46038836e-01   5.91199594e-01]
 [  3.18851435e-02   2.20208406e-01   2.07628670e-01   6.29649828e-01]
 [  4.04117234e-02   3.34054795e-01   2.47510204e-01   7.13719276e-01]
 [  8.87866119e-02   7.84130685e-01   2.83409834e-01   1.04308255e+00]
 [  2.12443756e-01   1.12550033e+00   3.40077077e-01   1.86990344e+00]]
[[  2.48378852e-04   1.82724148e-03   1.72865198e-01   5.04249809e-01]
 [  1.66914053e-02   1.26795300e-01   2.67845278e-01   6.64986382e-01]
 [  2.03271379e-02   1.66820512e-01   3.02653537e-01   7.56399727e-01]
 [  5.73889258e-02   3.80958199e-01   3.02935611e-01   9.89294247e-01]
 [  1.88143169e-01   1.21890455e+00   5.18963556e-01   2.39145729e+00]]
[[ 0.24855971  0.24396606  0.290