### This File used to predict cadmium using the following stacked structure
    ## First layer: has two nodes
        # input is location,Ni,Zn
        # output is Co,Cr
    ## Second layer:
        # input is location, Ni, Zn, Cr
        # output is Cd


In [1]:
%matplotlib inline 

import sys
sys.path.append('../')
sys.path.append('../../stackedgp_src/')
from __future__ import division
from mpl_toolkits.mplot3d import axes3d
from stackedGPNetwork import StackedGPNetwork
from sklearn.cross_validation import KFold
import GPy, time
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import FastICA

## Load Data

In [2]:
#x	y	Landuse	Rock	Cd	Co	Cr	Cu	Ni	Pb	Zn

X = np.genfromtxt('../data/jura_csv/X.txt', delimiter=' ')
Y = np.genfromtxt('../data/jura_csv/Y.txt', delimiter=' ')
Z = np.genfromtxt('../data/jura2.csv', delimiter=',')[:,[5,6]] # 5 is the index of Co
X = np.concatenate((X,Y[:,1:]),axis=1) # add Zn, Ni to the input matrix

Y = Y[:,0:1]

print 'Min of Y: ', Y.min(axis=0)
Z[:,1:] = np.log(Z[:,1:]+1)
Y = np.log(Y)
if Z.shape[1] > 1:
    ica = FastICA(n_components=Z.shape[1],random_state=100,fun='exp')
    Z[:259,:] = ica.fit_transform(Z[:259,:])
    Z[259:,:] = ica.transform(Z[259:,:])
    
scaler = StandardScaler()
scaler1 = StandardScaler()
scaler2 = StandardScaler()
Y[:259,:] = scaler.fit_transform(Y[:259,:])
Z[:259,:] = scaler1.fit_transform(Z[:259,:])
X[:259,2:] = scaler2.fit_transform(X[:259,2:])
X[259:,2:] = scaler2.transform(X[259:,2:])
print 'normalisation: mean/std',scaler.mean_,scaler.std_

Min of Y:  [ 0.135]
normalisation: mean/std [ 0.03607936] [ 0.70738224]


## Train Stacked GP

In [3]:
# ntrain = X.shape[0]
trainX,testX = X[:259,:], X[259:,:]
trainY, testY = Y[:259,:], Y[259:,:]
trainZ, testZ = Z[:259,:], Z[259:,:]
# testY = scaler.inverse_transform(testY)
testY = np.exp(testY)
print trainX.shape

(259, 4)


In [4]:
stackedNetwork = StackedGPNetwork(2)
no_GPs = trainZ.shape[1]
for i in range(no_GPs):
    stackedNetwork.createNewNode(0,trainX,trainZ[:,i:i+1], normalize=True, ARD = False, useGPU=False)

traindata = np.concatenate((trainX,trainZ), axis=1)
stackedNetwork.createNewNode(1,traindata,trainY, normalize=True, ARD=False, useGPU=False)


t0= time.clock()
stackedNetwork.optimize(numoptimizationtrails=1)
t= time.clock() - t0 # t is CPU seconds elapsed (floating point)
print 'Training Time = ',t

Training Time =  4.716989


## Test Stacked GP on Testing

In [5]:
#create the input data for the first layer
fdata = np.tile(testX,no_GPs)
# predict from the first layer,
fmean, fvar = stackedNetwork.predictLayer(0,fdata,None,include_likelihood=True)

sdata = np.concatenate((testX,fmean),axis=1)
# stv = np.concatenate((np.zeros(testX.shape),np.zeros(testZ.shape)),axis=1)
stv = np.concatenate((np.zeros(testX.shape),fvar),axis=1)
mean, var = stackedNetwork.predictLayer(1,sdata,stv, jitter=0, covoption=1, include_likelihood=True,include_covnoise=True)
E_af = scaler.inverse_transform(mean)
E_af = np.exp(E_af)

In [6]:
#=========================================================================
#=========================================================================
print 'Applying log-normal distribution inverse...'
varun = var
print 'Mean (min/max): ',E_af.min(), E_af.max()
rmse = np.sqrt(np.sum(np.square(E_af-testY))/testY.shape[0])
print 'RMSE: ', rmse
mae = np.sum(abs(E_af-testY))/testY.shape[0]
print 'MAE: ', mae
pstd = np.sqrt(varun)
print 'Predicted STD: ', pstd.min(), pstd.max()

Applying log-normal distribution inverse...
Mean (min/max):  0.336360040449 1.96397417148
RMSE:  0.526160635823
MAE:  0.360202603075
Predicted STD:  0.590137126057 0.942228717517
