# Load Layer outputs

In [37]:
import pickle
with open('NN_layer_outputs.dat','rb') as f:
    layer_outs,layer_outs_test= pickle.load(f)
f.close()

# Load recording and test data

In [40]:
import numpy as np
from keras.datasets import mnist
from scipy.misc import imresize

(Xtrain, Ytrain), (Xtest, Ytest) = mnist.load_data()

(ntrain, xdim, ydim) = Xtrain.shape
ntest = Xtest.shape[0]

# Recording data
X_pr = Xtrain[30000:60000, :, :]
Y_pr = Ytrain[30000:60000]

# downsample
factor = 0.25

Xtest_down = np.ones((Xtest.shape[0], int(xdim*factor), int(ydim*factor)))
for i in range(Xtest.shape[0]):
    Xtest_down[i,:,:] = imresize(Xtest[i,:,:], factor)

X_pr_down = np.ones((X_pr.shape[0], int(xdim*factor), int(ydim*factor)))
for i in range(X_pr.shape[0]):
    X_pr_down[i,:,:] = imresize(X_pr[i,:,:], factor)
    
# VECTORIZE IMAGES
Xtest_down = Xtest_down.reshape(ntest, int(xdim*factor)**2).astype('float32') / 255
X_pr_down = X_pr_down.reshape(X_pr_down.shape[0], int(xdim*factor)**2).astype('float32') / 255

# Run XGBOOST

In [None]:
from copy import copy, deepcopy
from RE_PartialRecData import RE_PartialRecData
from RE_PartialRecData2 import RE_PartialRecData2
import os
import xgboost as xgb
import sklearn
from xgboost.sklearn import XGBClassifier
import pickle

params = {}
# use softmax multi-class classification 'multi:softmax'
# use linear regression 'reg:linear'
params['objective'] = 'reg:linear'
# scale weight of positive examples
params['eta'] = 0.5               # Makes the model more robust by shrinking the weights on each step (0.01-0.2)
params['max_depth'] = 6           # Used to control over-fitting as higher depth will allow model to learn relations 
                                  # very specific to a particular sample. (3-10)
#params['max_delta_step']=         # it might help in logistic regression when class is extremely imbalanced.(def=0)
params['silent'] = 1
params['nthread'] = 4
# params['num_class'] = 10
num_round=5

# how many recordings?
nRecordings = 10
# how many neurons from the firs hidden layer?
subnetSize = [2**x for x in range(8)]
#subnetSize =[1, 64, 128]
nSubnetSize = len(subnetSize)
# which layers?
iLayer=2
# how many samples per recording?
nSamples = np.divide(int(X_pr_down.shape[0]/nRecordings),subnetSize)*100
#nSamples =3000
# how many iterations
nIterations = 5

# baseline prediction error
#bl = np.std(layer_outs_test[oLayer]-np.mean(layer_outs_test[oLayer]));

oLayer = len(layer_outs)-1  # index of output layer
nOutNeurons = layer_outs[oLayer].shape[1]
rmses = np.zeros([nIterations, nOutNeurons, nSubnetSize])

for ss in range(nSubnetSize):
    #nLayerNeurons = [subnetSize[ss], 0, 0, 0, 10]
    nLayerNeurons = subnetSize[ss]
    print(subnetSize[ss])
    for it in range(nIterations):
        # copy data
        layer_outputs = deepcopy(layer_outs)
        # subsample
#         X_subsample, Y_subsample = RE_PartialRecData(layer_outputs,[subnetSize[ss], 0, 0, 0, 10], 
#                                                      nRecordings, nSamples[ss])
        X_subsample, Y_subsample = RE_PartialRecData2(layer_outputs[iLayer], layer_outputs[oLayer], \
                                                      nLayerNeurons, nRecordings,nSamples[ss])
        #print('# nan neurons: ',np.count_nonzero(np.isnan(X_subsample[:3000,:]).sum(axis=0)))
        # prepare data for xgboost
        for iN in range(nOutNeurons):
            #print('#neuron, #iteratin, subnetsize: ', iN,it,subnetSize[ss])
            xg_train  = xgb.DMatrix(X_subsample, label=Y_subsample[:, iN])
            xg_test   = xgb.DMatrix(layer_outs_test[iLayer], label=layer_outs_test[oLayer][:,iN])
            watchlist = [(xg_train, 'train'), (xg_test, 'test')]
            # train XGboost
            bst = xgb.train(params, xg_train, num_round, watchlist, verbose_eval=False)
            # get predictions
            pred = bst.predict(xg_test)
            rmses[it,iN,ss] = np.sqrt(np.mean(np.square([(pred[i] - layer_outs_test[oLayer][:,iN][i]) 
                                         for i in range(len(layer_outs_test[oLayer][:,1]))])))
            print ('predicting, RMSE=%f' %rmses[it, iN, ss])

            
# save the rmse's
with open('RMSE_Layer1_NLNN.dat','wb') as f:
    pickle.dump(rmses, f)
f.close()


1
predicting, RMSE=0.229555
predicting, RMSE=0.263115
predicting, RMSE=0.257719
predicting, RMSE=0.232773
predicting, RMSE=0.268914
predicting, RMSE=0.249644
predicting, RMSE=0.225736
predicting, RMSE=0.224962
predicting, RMSE=0.271521
predicting, RMSE=0.272188
predicting, RMSE=0.197815
predicting, RMSE=0.262854
predicting, RMSE=0.292752
predicting, RMSE=0.242609
predicting, RMSE=0.274589
predicting, RMSE=0.205063
predicting, RMSE=0.269223
predicting, RMSE=0.280863
predicting, RMSE=0.280954
predicting, RMSE=0.275749
predicting, RMSE=0.139959
predicting, RMSE=0.260868
predicting, RMSE=0.271802
predicting, RMSE=0.266622
predicting, RMSE=0.187172
predicting, RMSE=0.243046
predicting, RMSE=0.241345
predicting, RMSE=0.259725
predicting, RMSE=0.274558
predicting, RMSE=0.270341
predicting, RMSE=0.275903
predicting, RMSE=0.272792
predicting, RMSE=0.280719
predicting, RMSE=0.247462
predicting, RMSE=0.246314
predicting, RMSE=0.243843
predicting, RMSE=0.223637
predicting, RMSE=0.241984
predicting

In [None]:
rmses.shape


In [None]:

import matplotlib.pyplot as pl
%matplotlib inline
import numpy as np

fig=pl.figure(figsize=(10,6))
ax1 = fig.add_subplot(111)
ax1.set_xlim([0, 128])
ax2 = ax1.twiny()

x = subnetSize;
y = np.mean(np.mean(rmses, axis=1), axis=0)
error = np.std(np.mean(rmses, axis=1), axis=0)
bl = np.std(layer_outs_test[oLayer]-np.mean(layer_outs_test[oLayer]));

pl.plot(x, y, 'k-')
horiz_line_data = np.array([bl for i in xrange(len(x))])
pl.plot(x, horiz_line_data, 'k--') 
pl.fill_between(x, y-error, y+error, alpha=0.2, facecolor='#808080')

ax1.set_xlabel('# observed neurons on Layer 1 (out of 128)', fontsize=18)
ax2.set_xlabel('Samples per recording',  fontsize=16)

new_tick_locations =range(len(nSamples))
ax2.set_xlim(ax1.get_xlim())
ax2.set_xticks(new_tick_locations)
ax2.set_xticklabels(nSamples)


# ax2.set_xticks(nSamples)
# ax2.set_xticklabels(nSamples[range(0, 5, 20)])
ax1.set_ylabel('RMSE', fontsize=18)
ax1.set_ylim([0, .35])

pl.text(90,0.3, 'baseline')
