### This File used to predict cadmium using the following stacked structure
    ## First layer: has two nodes
        # input is location
        # output is Ni,Zn,Co
    ## Second layer:
        # input is Zn, Ni,Co, location
        # output is Cd
### This File Also used to predict cadmium using single GP

In [79]:
%matplotlib inline 

import sys
sys.path.append('../')
sys.path.append('../../stackedgp_src/')
from __future__ import division
from mpl_toolkits.mplot3d import axes3d
from stackedGPNetwork import StackedGPNetwork
from sklearn.cross_validation import KFold
import GPy, time
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

## Load Data

In [80]:
#x	y	Landuse	Rock	Cd	Co	Cr	Cu	Ni	Pb	Zn

X = np.genfromtxt('../data/jura_csv/X.txt', delimiter=' ')
Y = np.genfromtxt('../data/jura_csv/Y.txt', delimiter=' ')
Z = np.genfromtxt('../data/jura2.csv', delimiter=',')[:,[5]] # 5 is the index of Co
Y = np.concatenate((Y,Z),axis=1) # add Co to the responses

print 'Min of Y: ', Y.min(axis=0)
Y = np.log(Y)
scaler = StandardScaler()
Y[:259,[0]] = scaler.fit_transform(Y[:259,[0]])
Y[259:,[0]] = scaler.transform(Y[259:,[0]])
print 'normalisation: mean/std',scaler.mean_,scaler.std_

Min of Y:  [  0.135   1.98   25.      1.552]
normalisation: mean/std [ 0.03607936] [ 0.70738224]


## Train Stacked GP

In [81]:
# ntrain = X.shape[0]
trainX,testX = X[:259,:], X[259:,:]
trainY, testY = Y[:259,0:1], Y[259:,0:1]
trainZ, testZ = Y[:259,1:], Y[259:,1:]
testY = scaler.inverse_transform(np.concatenate((testY,testZ),axis=1))[:,0:1]
testY = np.exp(testY)

In [82]:
stackedNetwork = StackedGPNetwork(2)
no_GPs = trainZ.shape[1]
for i in range(no_GPs):
    stackedNetwork.createNewNode(0,trainX,trainZ[:,i:i+1], normalize=True, ARD = False, useGPU=False)
# stackedNetwork.createNewNode(0,trainX,trainZ[:,1:2], normalize=True, useGPU=False)

traindata = np.concatenate((trainX,trainZ), axis=1)
stackedNetwork.createNewNode(1,traindata,trainY, normalize=True, ARD=True, useGPU=False)


t0= time.clock()
stackedNetwork.optimize(numoptimizationtrails=1)
t= time.clock() - t0 # t is CPU seconds elapsed (floating point)
print 'Training Time = ',t

Training Time =  11.871444


## Test Stacked GP on Testing

In [83]:
#create the input data for the first layer
no_GPs = trainZ.shape[1]
fdata = np.tile(testX,no_GPs)
# predict from the first layer,
fmean, fvar = stackedNetwork.predictLayer(0,fdata,None,include_likelihood=True)

sdata = np.concatenate((testX,fmean),axis=1)
# stv = np.concatenate((np.zeros(testX.shape),np.zeros(testZ.shape)),axis=1)
stv = np.concatenate((np.zeros(testX.shape),fvar),axis=1)
mean, var = stackedNetwork.predictLayer(1,sdata,stv, jitter=1e-1, covoption=1, include_likelihood=True,include_covnoise=True)
E_af = scaler.inverse_transform(np.concatenate((mean,testZ),axis=1))[:,0:1]
E_af = np.exp(E_af)

In [84]:
#=========================================================================
#=========================================================================
print 'Applying log-normal distribution inverse...'

# E_af = np.exp(mean+var/2)
# varun = (np.exp(var) -1)*np.exp(2*mean + var)
varun = var
print 'Mean (min/max): ',E_af.min(), E_af.max()
# print 'Variance (min/max): ',varun.min(), varun.max()
# conf1 = E_af + 2*np.sqrt(varun)
# conf2 = E_af - 2*np.sqrt(varun)
rmse = np.sqrt(np.sum(np.square(E_af-testY))/testY.shape[0])
# rmse1,rmse2 =  np.sqrt(np.sum(np.square(conf1-testY))/testY.shape[0]), np.sqrt(np.sum(np.square(conf2-testY))/testY.shape[0])
print 'RMSE: ', rmse
# print '95% conf RMSE: ',rmse1,rmse2
mae = np.sum(abs(E_af-testY))/testY.shape[0]
print 'MAE: ', mae
# mad1,mad2 = np.sum(abs(conf1-testY))/testY.shape[0], np.sum(abs(conf2-testY))/testY.shape[0]
# print '95% conf MAD: ', mad1,mad2
pstd = np.sqrt(varun)
print 'Predicted STD: ', pstd.min(), pstd.max()

Applying log-normal distribution inverse...
Mean (min/max):  0.355365364722 1.71497331813
RMSE:  0.705874645668
MAE:  0.517794639684
Predicted STD:  0.641299199625 0.99529181619
