# Load Layer outputs

In [2]:
nn = 'NN'

import pickle
with open('results/' + nn + '_layer_outputs.dat','rb') as f:
    layer_outs,layer_outs_test= pickle.load(f)
f.close()

# Load recording and test data

In [3]:
import numpy as np
from keras.datasets import mnist
from scipy.misc import imresize

(Xtrain, Ytrain), (Xtest, Ytest) = mnist.load_data()

(ntrain, xdim, ydim) = Xtrain.shape
ntest = Xtest.shape[0]

# Recording data
X_pr = Xtrain[30000:60000, :, :]
Y_pr = Ytrain[30000:60000]


# downsample
factor = 1
if factor<1:
    Xtest_down = np.ones((Xtest.shape[0], int(xdim*factor), int(ydim*factor)))
    for i in range(Xtest.shape[0]):
        Xtest_down[i,:,:] = imresize(Xtest[i,:,:], factor)

    X_pr_down = np.ones((X_pr.shape[0], int(xdim*factor), int(ydim*factor)))
    for i in range(X_pr.shape[0]):
        X_pr_down[i,:,:] = imresize(X_pr[i,:,:], factor)
else:
    Xtest_down = Xtest
    X_pr_down = X_pr
    
# VECTORIZE IMAGES
Xtest_down = Xtest_down.reshape(ntest, int(xdim*factor)**2).astype('float32') / 255
X_pr_down = X_pr_down.reshape(X_pr_down.shape[0], int(xdim*factor)**2).astype('float32') / 255

Using TensorFlow backend.


# Run XGBOOST and Average Results

In [17]:
from copy import copy, deepcopy
from RE_PartialRecData import RE_PartialRecData
from RE_PartialRecData2 import RE_PartialRecData2
import os
import xgboost as xgb
import sklearn
from sklearn import preprocessing

from xgboost.sklearn import XGBClassifier
import pickle

params = {}
# use softmax multi-class classification 'multi:softmax'
# use linear regression 'reg:linear'
params['objective'] = 'reg:linear'
# scale weight of positive examples
params['eta'] = 0.5               # Makes the model more robust by shrinking the weights on each step (0.01-0.2)
params['max_depth'] = 6           # Used to control over-fitting as higher depth will allow model to learn relations 
                                  # very specific to a particular sample. (3-10)
params['silent'] = 1
params['nthread'] = 4
# params['num_class'] = 10
num_round = 5

# how many recordings?
nRecordings = 10
# how many neurons from the firs hidden layer?
subnetSize = [2**x for x in range(8)]
nSubnetSize = len(subnetSize)
# how many samples per recording?
nSamples = np.divide(int(X_pr_down.shape[0]/nRecordings),subnetSize)*100
# how many iterations
nIterations = 20



# baseline prediction error
#bl = np.std(layer_outs_test[oLayer]-np.mean(layer_outs_test[oLayer]));

oLayer = len(layer_outs)-1  # index of output layer
nOutNeurons = layer_outs[oLayer].shape[1]
rmses = np.zeros([nIterations, nOutNeurons, nSubnetSize])

for iLayer in range(0, len(layer_outs)-1, 2):
    print('Layer ', iLayer)
    for ss in range(nSubnetSize):
        #r_coefs =np.zeros((nIterations, 10, 128))
        #nLayerNeurons = [subnetSize[ss], 0, 0, 0, 10]
        nLayerNeurons = subnetSize[ss]
        print(subnetSize[ss])
        for it in range(nIterations):
            # copy data - is this necessary?
            layer_outputs = deepcopy(layer_outs)
            rmses_rec = np.zeros((nRecordings, nOutNeurons))
            for iRec in range(nRecordings):
                # subsample
                X_subsample, Y_subsample = RE_PartialRecData2(layer_outputs[iLayer], layer_outputs[oLayer], \
                                                              nLayerNeurons, 1, nSamples[ss])
                # impute X_subsample with mean value, apply imputation to test set
                imp =preprocessing.Imputer(missing_values='NaN', strategy='mean')
                impf =imp.fit(X_subsample)
                X_new = impf.transform(X_subsample)
                X_test_new= impf.transform(layer_outs_test[iLayer])
                for iN in range(nOutNeurons):
                    xg_train  = xgb.DMatrix(X_new, label=Y_subsample[:, iN])
                    xg_test   = xgb.DMatrix(X_test_new, label=layer_outs_test[oLayer][:, iN])
                    watchlist = [(xg_train, 'train'), (xg_test, 'test')]
                    # train XGboost
                    bst = xgb.train(params, xg_train, num_round, watchlist, verbose_eval=False)
                    # get predictions
                    pred = bst.predict(xg_test)
                    rmses_rec[iRec, iNeuron] = np.sqrt(np.mean((bst.predict(xg_test)-layer_outs_test[oLayer][:,iN])**2))
 
            rmses[it,:,ss] = np.mean(rmses_rec, axis=0)

            print ('predicting, mean RMSEs=%f' %np.mean(rmses[it, :, ss]))

    fName = 'results/XGBMP_RMSES_Layer'+str(iLayer) + 'nRec' + str(nRecordings) + nn + '.dat'
    # save the rmse's
    with open(fName,'wb') as f:
        pickle.dump(rmses, f)
    f.close()


('Layer ', 0)
128
predicting, mean RMSEs=0.015707
('Layer ', 2)
128
predicting, mean RMSEs=0.005183


In [16]:
pred = bst.predict(xg_test)
np.sqrt(np.mean((bst.predict(xg_test)-layer_outs_test[oLayer][:,iN])**2))


0.044559918

# Save RMSE's

In [None]:
fName = 'results/XGB_RMSES_Layer' + str(iLayer) + 'diff_nRec' + nn + '.dat'
# save the rmse's
with open(fName,'wb') as f:
    pickle.dump(rmses, f)
f.close()


# Plot RMSE's

In [None]:
import matplotlib.pyplot as pl
%matplotlib inline
import numpy as np

RMSE = np.mean(np.mean(rmses, axis=0), axis=0)
RMSE = np.transpose(RMSE)
fig = pl.figure(figsize=(10,6))
ax = fig.add_subplot(111)
cax = ax.pcolor(RMSE)
ax.set_xlabel('# observed neurons (out of 128)', fontsize=18)
ax.set_ylabel('# partial recordings', fontsize=18)
ax.set_xticks(np.arange(0.5,nSubnetSize))
ax.set_xticklabels(subnetSize)
ax.set_yticks(np.arange(0.5,nRecordings))
ax.set_yticklabels(recordings)
ax.set_title('RMSE (Layer '+str(iLayer)+')', fontsize=24)
fig.colorbar(cax)
pl.draw()
figName = 'figures/1XGB_RMSES_Layer' + str(iLayer) + '_diff_nRec_' + nn
pl.savefig(figName +'.pdf', format='pdf')
pl.savefig(figName +'.eps', format='eps', dpi=1000)


# Plot RMSE's vs. No. observed neurons per recordings

In [None]:
# import matplotlib.pyplot as pl
# %matplotlib inline
# import numpy as np

# fig=pl.figure(figsize=(10,6))
# ax1 = fig.add_subplot(111)
# ax1.set_xlim([0, 128])
# ax2 = ax1.twiny()

# x = subnetSize;
# y = np.mean(np.median(rmses, axis=1), axis=0)
# error = np.std(np.mean(rmses, axis=1), axis=0)
# bl = np.std(layer_outs_test[oLayer]-np.mean(layer_outs_test[oLayer]));

# pl.semilogx(x, y, 'k-')
# # horiz_line_data = np.array([bl for i in xrange(len(x))])
# # pl.plot(x, horiz_line_data, 'k--') 
# pl.fill_between(x, y-error, y+error, alpha=0.2, facecolor='#808080')

# ax1.set_xlabel('# observed neurons on Layer' + str(iLayer) + '(out of 128)', fontsize=18)
# ax2.set_xlabel('Samples per recording',  fontsize=16)

# new_tick_locations = subnetSize
# ax1.set_xlim(ax1.get_xlim())
# ax1.set_xticks(new_tick_locations)
# ax1.set_xticklabels(new_tick_locations)

# new_tick_locations = subnetSize
# ax2.set_xlim(ax1.get_xlim())
# ax2.set_xticks(new_tick_locations)
# ax2.set_xticklabels(nSamples)


# # ax2.set_xticks(nSamples)
# # ax2.set_xticklabels(nSamples[range(0, 5, 20)])
# ax1.set_ylabel('RMSE', fontsize=18)
# ax1.set_ylim([0, .50])

# # pl.text(110,bl+0.005, 'baseline')
