In [1]:
import numpy  as np
import pandas as pd
from scipy.sparse import load_npz

train_x_path='train_x.npz'
train_y_path='train_y_encoded.npz'
test_x_path='test_x.npz'

print 'load in training set...'
train_x=np.array(load_npz(train_x_path).todense())
train_y = np.array(load_npz(train_y_path).todense())

#put random seed here
selected_sample_index=np.random.choice(int(train_x.shape[0]),1000,replace=False)
train_x=train_x[selected_sample_index,]
train_y=train_y[selected_sample_index,]

print 'train_x:',train_x.shape
print 'train_y:',train_y.shape


def sigmoid(z):
    return 1.0/(1.0+np.exp(-z))

def loss(y,nn_output):
    return 0.5*(sum(np.array(y)-np.array(nn_output))**2)

class NN(object):
    def __init__(self, num_neuron_per_layer,lr=0.01,batch=1,epoch=20):
        self.lr=lr
        self.batch=batch
        self.epoch=epoch
        self.num_layers = len(num_neuron_per_layer)
        self.num_neuron_per_layer = num_neuron_per_layer#dummy neuron for the bias ignored
        self.reset_weights_bias()

    def reset_weights_bias(self):
        self.weights=[]
        self.bias=[]
        for i in range(0,self.num_layers-1):
            weight=np.random.randn(self.num_neuron_per_layer[i], self.num_neuron_per_layer[i+1])
            self.weights.append(weight)
            self.bias.append(np.random.randn(self.num_neuron_per_layer[i+1]))

    #per sample
    def feedforward(self, nn_input):
        layer_output_his=[]
        layer_output_his.append(nn_input)#to facilitate sgd, add the 0-layer input
        layer_input=np.array(nn_input)
        #layer_input.reshape(-1,1)
        for w,b in zip(self.weights,self.bias):
            layer_output = sigmoid(np.dot(w.T, layer_input)+b[0])
            layer_output_his.append(layer_output)
            layer_input=layer_output
        return layer_output_his #layer_output_his[-1] is nn_output
    
    def gradient_descent(self,true_y,layer_output_his):
        delta_weights=[]
        delta_bs=[]
        layer_output=layer_output_his[-1]
        sig=np.dot(layer_output,np.ones(len(layer_output))-layer_output)
        delta_base=np.dot(true_y-layer_output,sig)
        layer_output_his=layer_output_his[:-1]
        for w,layer_output in zip(reversed(self.weights),reversed(layer_output_his)):
            delta_weight=-np.outer(layer_output,delta_base)
            delta_weights.append(delta_weight)
            delta_b=-delta_base
            delta_bs.append(delta_b)
            sig=np.dot(layer_output,np.ones(len(layer_output))-layer_output)
            delta_base=-np.dot(w,np.multiply(sig,delta_base))
        return delta_weights,delta_bs
    
    def batch_gradient_descent(self, input_x,input_y):
        while input_x.shape[0]>self.batch:
            
            current_batch_index=np.random.choice(int(input_x.shape[0]),self.batch,replace=False)
            batch_x=input_x[current_batch_index,]
            batch_y=input_y[current_batch_index,]
            input_x=np.delete(input_x, current_batch_index, axis=0)
            input_y=np.delete(input_y, current_batch_index, axis=0)
            acc_weights=0
            acc_bias=0
            for x,y in zip(batch_x,batch_y):
                layer_output_his=self.feedforward(x)
                delta_weights,delta_bs=self.gradient_descent(y,layer_output_his)
                acc_weights=np.add(acc_weights,delta_weights)
                acc_bias=np.add(acc_bias,delta_bs)
            self.update_model(acc_weights,acc_bias)
        if input_x.shape[0]>0:
            for x,y in zip(input_x,input_y):
                layer_output_his=self.feedforward(x)
                delta_weights,delta_bs=self.gradient_descent(y,layer_output_his)
                acc_weights=np.add(acc_weights,delta_weights)
                acc_bias=np.add(acc_bias,delta_bs)
            self.update_model(acc_weights,acc_bias)
    
    def update_model(self,delta_weights,delta_bs):
        for i in range(0,self.num_layers-1):
            self.weights[i]=np.add(self.weights[i],np.multiply(delta_weights[-(i+1)],self.lr))
            self.bias[i]=np.add(self.bias[i],np.multiply(delta_bs[-(i+1)],self.lr))
    
    def split_dataset(self,input_x,input_y,num_folder=3):
        if len(input_x)!=len(input_y):
            print 'len(input_x) = ',len(input_x),'; len(input_y) = ',len(input_y)
            print 'len(input_x) must be equal to len(input_y).'
            return None
        return (np.linspace(0,len(input_y),num_folder+1,endpoint=True)).astype(int).tolist()[1:]
        
        
    def fit_model(self,input_x,input_y,cv=False):
        if cv:
            split_index_end=self.split_dataset(input_x,input_y)
            mse_his=[]
            start=0
            for end in split_index_end:
                valid_x=input_x[start:end]
                valid_y=input_y[start:end]
                train_x=np.concatenate((input_x[0:start],input_x[end:]))
                train_y=np.concatenate((input_y[0:start],input_y[end:]))
                
                self.reset_weights_bias()
                
                for i in range(0,self.epoch):
                    print 'epoch ',i,' ...'
                    self.batch_gradient_descent(input_x,input_y)
                mse=0
                for x,y in zip(valid_x,valid_y):
                    mse+=loss(y,self.feedforward(x)[-1])
                mse_his.append(float(mse)/len(valid_y))
                start=end
            return np.array(mse_his).mean()
                        
        else:
            train_x=input_x
            train_y=input_y
            
            
            self.reset_weights_bias()
                
            for i in range(0,self.epoch):
                print 'epoch ',i,' ...'
                self.batch_gradient_descent(input_x,input_y)
            
            mse=0
            for x,y in zip(train_x,train_y):
                mse+=loss(y,self.feedforward(x)[-1])
            return float(mse)/len(train_y)
    
    #per sample
    def predict(self,x):
        return np.argmax(self.feedforward(x)[-1])
    
def grid_search_model(input_x,input_y,lr_candidates=[0.01],nn_candidates=[[4096,100,10]]):
    #an exmaple with lr
    #init models
    min_mse=float('inf')
    min_lr=None
    min_cc=None
    c_mse=None
    log=[]
    for c_nn in nn_candidates:
        for c_lr in lr_candidates:
            
            n=NN(num_neuron_per_layer=c_nn,lr=c_lr)
            c_mse=n.fit_model(input_x,input_y,cv=True)
            if min_mse>c_mse:
                min_mse=c_mse
                min_lr=c_lr
                min_nn=c_nn
            print 'best nn=',c_nn,'  c_lr=',c_lr, ' c_mse=',c_mse
            c_log=[]
            c_log.append(c_nn)
            c_log.append(c_lr)
            c_log.append(c_mse)
            log.append(c_log)
    return min_lr,min_nn,log
            

load in training set...
train_x: (1000L, 4096L)
train_y: (1000L, 10L)


In [2]:
lr_candidate =[0.001,0.01,0.1,1]
nn_candidate=[[4096,128,10],[4096,512,10],[4096,1024,10],[4096,2048,10],[4096,128,128,10],[4096,128,512,10],[4096,128,1024,10],[4096,512,128,10],[4096,512,512,10],[4096,512,1024,10]]
#nn_candidate=[[4096,128,10],[4096,512,10],[4096,128,128,10],[4096,128,512,10]]
best_lr,best_nn,log=grid_search_model(train_x,train_y,lr_candidates=lr_candidate,nn_candidates=nn_candidate)


epoch  0  ...
epoch  1  ...
epoch  2  ...
epoch  3  ...
epoch  4  ...
epoch  5  ...
epoch  6  ...
epoch  7  ...
epoch  8  ...
epoch  9  ...
epoch  10  ...
epoch  11  ...
epoch  12  ...
epoch  13  ...
epoch  14  ...
epoch  15  ...
epoch  16  ...
epoch  17  ...
epoch  18  ...
epoch  19  ...
epoch  0  ...
epoch  1  ...
epoch  2  ...
epoch  3  ...
epoch  4  ...
epoch  5  ...
epoch  6  ...
epoch  7  ...
epoch  8  ...
epoch  9  ...
epoch  10  ...
epoch  11  ...
epoch  12  ...
epoch  13  ...
epoch  14  ...
epoch  15  ...
epoch  16  ...
epoch  17  ...
epoch  18  ...
epoch  19  ...
epoch  0  ...
epoch  1  ...
epoch  2  ...
epoch  3  ...
epoch  4  ...
epoch  5  ...
epoch  6  ...
epoch  7  ...
epoch  8  ...
epoch  9  ...
epoch  10  ...
epoch  11  ...
epoch  12  ...
epoch  13  ...
epoch  14  ...
epoch  15  ...
epoch  16  ...
epoch  17  ...
epoch  18  ...
epoch  19  ...
best nn= [4096, 128, 10]   c_lr= 0.001  c_mse= 0.13553599704670172
epoch  0  ...
epoch  1  ...
epoch  2  ...
epoch  3  ...
epoch  



epoch  17  ...
epoch  18  ...
epoch  19  ...
epoch  0  ...
epoch  1  ...
epoch  2  ...
epoch  3  ...
epoch  4  ...
epoch  5  ...
epoch  6  ...
epoch  7  ...
epoch  8  ...
epoch  9  ...
epoch  10  ...
epoch  11  ...
epoch  12  ...
epoch  13  ...
epoch  14  ...
epoch  15  ...
epoch  16  ...
epoch  17  ...
epoch  18  ...
epoch  19  ...
epoch  0  ...
epoch  1  ...
epoch  2  ...
epoch  3  ...
epoch  4  ...
epoch  5  ...
epoch  6  ...
epoch  7  ...
epoch  8  ...
epoch  9  ...
epoch  10  ...
epoch  11  ...
epoch  12  ...
epoch  13  ...
epoch  14  ...
epoch  15  ...
epoch  16  ...
epoch  17  ...
epoch  18  ...
epoch  19  ...
best nn= [4096, 128, 10]   c_lr= 0.01  c_mse= 26.874850207638776
epoch  0  ...
epoch  1  ...
epoch  2  ...
epoch  3  ...
epoch  4  ...
epoch  5  ...
epoch  6  ...
epoch  7  ...
epoch  8  ...
epoch  9  ...
epoch  10  ...
epoch  11  ...
epoch  12  ...
epoch  13  ...
epoch  14  ...
epoch  15  ...
epoch  16  ...
epoch  17  ...
epoch  18  ...
epoch  19  ...
epoch  0  ...
epoch 

epoch  2  ...
epoch  3  ...
epoch  4  ...
epoch  5  ...
epoch  6  ...
epoch  7  ...
epoch  8  ...
epoch  9  ...
epoch  10  ...
epoch  11  ...
epoch  12  ...
epoch  13  ...
epoch  14  ...
epoch  15  ...
epoch  16  ...
epoch  17  ...
epoch  18  ...
epoch  19  ...
epoch  0  ...
epoch  1  ...
epoch  2  ...
epoch  3  ...
epoch  4  ...
epoch  5  ...
epoch  6  ...
epoch  7  ...
epoch  8  ...
epoch  9  ...
epoch  10  ...
epoch  11  ...
epoch  12  ...
epoch  13  ...
epoch  14  ...
epoch  15  ...
epoch  16  ...
epoch  17  ...
epoch  18  ...
epoch  19  ...
epoch  0  ...
epoch  1  ...
epoch  2  ...
epoch  3  ...
epoch  4  ...
epoch  5  ...
epoch  6  ...
epoch  7  ...
epoch  8  ...
epoch  9  ...
epoch  10  ...
epoch  11  ...
epoch  12  ...
epoch  13  ...
epoch  14  ...
epoch  15  ...
epoch  16  ...
epoch  17  ...
epoch  18  ...
epoch  19  ...
best nn= [4096, 1024, 10]   c_lr= 0.1  c_mse= 13.6767912437165
epoch  0  ...
epoch  1  ...
epoch  2  ...
epoch  3  ...
epoch  4  ...
epoch  5  ...
epoch  6  .

epoch  11  ...
epoch  12  ...
epoch  13  ...
epoch  14  ...
epoch  15  ...
epoch  16  ...
epoch  17  ...
epoch  18  ...
epoch  19  ...
best nn= [4096, 128, 128, 10]   c_lr= 0.1  c_mse= 40.495248043824176
epoch  0  ...
epoch  1  ...
epoch  2  ...
epoch  3  ...
epoch  4  ...
epoch  5  ...
epoch  6  ...
epoch  7  ...
epoch  8  ...
epoch  9  ...
epoch  10  ...
epoch  11  ...
epoch  12  ...
epoch  13  ...
epoch  14  ...
epoch  15  ...
epoch  16  ...
epoch  17  ...
epoch  18  ...
epoch  19  ...
epoch  0  ...
epoch  1  ...
epoch  2  ...
epoch  3  ...
epoch  4  ...
epoch  5  ...
epoch  6  ...
epoch  7  ...
epoch  8  ...
epoch  9  ...
epoch  10  ...
epoch  11  ...
epoch  12  ...
epoch  13  ...
epoch  14  ...
epoch  15  ...
epoch  16  ...
epoch  17  ...
epoch  18  ...
epoch  19  ...
epoch  0  ...
epoch  1  ...
epoch  2  ...
epoch  3  ...
epoch  4  ...
epoch  5  ...
epoch  6  ...
epoch  7  ...
epoch  8  ...
epoch  9  ...
epoch  10  ...
epoch  11  ...
epoch  12  ...
epoch  13  ...
epoch  14  ...
e

epoch  12  ...
epoch  13  ...
epoch  14  ...
epoch  15  ...
epoch  16  ...
epoch  17  ...
epoch  18  ...
epoch  19  ...
epoch  0  ...
epoch  1  ...
epoch  2  ...
epoch  3  ...
epoch  4  ...
epoch  5  ...
epoch  6  ...
epoch  7  ...
epoch  8  ...
epoch  9  ...
epoch  10  ...
epoch  11  ...
epoch  12  ...
epoch  13  ...
epoch  14  ...
epoch  15  ...
epoch  16  ...
epoch  17  ...
epoch  18  ...
epoch  19  ...
best nn= [4096, 128, 1024, 10]   c_lr= 1  c_mse= 28.49966068937779
epoch  0  ...
epoch  1  ...
epoch  2  ...
epoch  3  ...
epoch  4  ...
epoch  5  ...
epoch  6  ...
epoch  7  ...
epoch  8  ...
epoch  9  ...
epoch  10  ...
epoch  11  ...
epoch  12  ...
epoch  13  ...
epoch  14  ...
epoch  15  ...
epoch  16  ...
epoch  17  ...
epoch  18  ...
epoch  19  ...
epoch  0  ...
epoch  1  ...
epoch  2  ...
epoch  3  ...
epoch  4  ...
epoch  5  ...
epoch  6  ...
epoch  7  ...
epoch  8  ...
epoch  9  ...
epoch  10  ...
epoch  11  ...
epoch  12  ...
epoch  13  ...
epoch  14  ...
epoch  15  ...
epo

epoch  15  ...
epoch  16  ...
epoch  17  ...
epoch  18  ...
epoch  19  ...
epoch  0  ...
epoch  1  ...
epoch  2  ...
epoch  3  ...
epoch  4  ...
epoch  5  ...
epoch  6  ...
epoch  7  ...
epoch  8  ...
epoch  9  ...
epoch  10  ...
epoch  11  ...
epoch  12  ...
epoch  13  ...
epoch  14  ...
epoch  15  ...
epoch  16  ...
epoch  17  ...
epoch  18  ...
epoch  19  ...
epoch  0  ...
epoch  1  ...
epoch  2  ...
epoch  3  ...
epoch  4  ...
epoch  5  ...
epoch  6  ...
epoch  7  ...
epoch  8  ...
epoch  9  ...
epoch  10  ...
epoch  11  ...
epoch  12  ...
epoch  13  ...
epoch  14  ...
epoch  15  ...
epoch  16  ...
epoch  17  ...
epoch  18  ...
epoch  19  ...
best nn= [4096, 512, 1024, 10]   c_lr= 0.001  c_mse= 0.18859493633620006
epoch  0  ...
epoch  1  ...
epoch  2  ...
epoch  3  ...
epoch  4  ...
epoch  5  ...
epoch  6  ...
epoch  7  ...
epoch  8  ...
epoch  9  ...
epoch  10  ...
epoch  11  ...
epoch  12  ...
epoch  13  ...
epoch  14  ...
epoch  15  ...
epoch  16  ...
epoch  17  ...
epoch  18  .

In [3]:
print log
df = pd.DataFrame(log)
df.to_csv("tuning_log.csv",header=False,index=False)

[[[4096, 128, 10], 0.001, 0.13553599704670172], [[4096, 128, 10], 0.01, 26.874850207638776], [[4096, 128, 10], 0.1, 40.494929814634986], [[4096, 128, 10], 1, 40.49958335280937], [[4096, 512, 10], 0.001, 0.1422741642642227], [[4096, 512, 10], 0.01, 0.21672540728369563], [[4096, 512, 10], 0.1, 40.46449639131627], [[4096, 512, 10], 1, 40.499656410915065], [[4096, 1024, 10], 0.001, 0.19190186472176865], [[4096, 1024, 10], 0.01, 0.2841949474052728], [[4096, 1024, 10], 0.1, 13.6767912437165], [[4096, 1024, 10], 1, 40.483872580157936], [[4096, 2048, 10], 0.001, 0.2700838928069873], [[4096, 2048, 10], 0.01, 0.31257279024200785], [[4096, 2048, 10], 0.1, 0.3296884795481177], [[4096, 2048, 10], 1, 40.396362513838746], [[4096, 128, 128, 10], 0.001, 10.96494014118059], [[4096, 128, 128, 10], 0.01, 40.226515668608094], [[4096, 128, 128, 10], 0.1, 40.495248043824176], [[4096, 128, 128, 10], 1, 40.49951179677259], [[4096, 128, 512, 10], 0.001, 0.12130554841089218], [[4096, 128, 512, 10], 0.01, 5.02434

In [4]:
#build final model configured with best hyperparameter on whole training data
n=NN(num_neuron_per_layer=best_nn,lr=best_lr)
f_mse=n.fit_model(train_x,train_y,cv=False)

epoch  0  ...




epoch  1  ...
epoch  2  ...
epoch  3  ...
epoch  4  ...
epoch  5  ...
epoch  6  ...
epoch  7  ...
epoch  8  ...
epoch  9  ...
epoch  10  ...
epoch  11  ...
epoch  12  ...
epoch  13  ...
epoch  14  ...
epoch  15  ...
epoch  16  ...
epoch  17  ...
epoch  18  ...
epoch  19  ...


In [5]:
#print training accuracy
acc=0
for x,y in zip(train_x,train_y):
    predict_y=n.predict(x)
    if predict_y==np.argmax(y):
        acc+=1
print float(acc)/len(train_x)




0.083


In [6]:
'''
#load in test data
print 'load in dummy test set...'
test_x=train_x[100:]
test_y=train_y[100:]
print 'test_x:',test_x.shape
print 'test_y:',test_y.shape
'''

"\n#load in test data\nprint 'load in dummy test set...'\ntest_x=train_x[100:]\ntest_y=train_y[100:]\nprint 'test_x:',test_x.shape\nprint 'test_y:',test_y.shape\n"

In [7]:
'''
#print testing accuracy
acc=0
for x,y in zip(test_x,test_y):
    predict_y=n.predict(x)
    if predict_y==np.argmax(y):
        acc+=1
print float(acc)/len(test_x)
'''

'\n#print testing accuracy\nacc=0\nfor x,y in zip(test_x,test_y):\n    predict_y=n.predict(x)\n    if predict_y==np.argmax(y):\n        acc+=1\nprint float(acc)/len(test_x)\n'

In [8]:
'''
current_batch_index=np.random.choice(int(train_x.shape[0]),1,replace=False)
print current_batch_index
batch_x=train_x[current_batch_index,]
print batch_x
batch_y=train_y[current_batch_index,]
print batch_y
#input_x=np.delete(train_x, current_batch_index, axis=0)
#input_y=np.delete(train_y, current_batch_index, axis=0)
'''

'\ncurrent_batch_index=np.random.choice(int(train_x.shape[0]),1,replace=False)\nprint current_batch_index\nbatch_x=train_x[current_batch_index,]\nprint batch_x\nbatch_y=train_y[current_batch_index,]\nprint batch_y\n#input_x=np.delete(train_x, current_batch_index, axis=0)\n#input_y=np.delete(train_y, current_batch_index, axis=0)\n'

In [9]:
#print best_model
print n.lr
print n.num_neuron_per_layer
print n.batch
print n.epoch

0.01
[4096, 128, 1024, 10]
1
20


In [10]:
r=pd.read_csv('tuning_log.csv')
r.columns =['nn','lr','loss']

In [11]:
print r

                       nn     lr          loss
0         [4096, 128, 10]  0.010  2.687485e+01
1         [4096, 128, 10]  0.100  4.049493e+01
2         [4096, 128, 10]  1.000  4.049958e+01
3         [4096, 512, 10]  0.001  1.422742e-01
4         [4096, 512, 10]  0.010  2.167254e-01
5         [4096, 512, 10]  0.100  4.046450e+01
6         [4096, 512, 10]  1.000  4.049966e+01
7        [4096, 1024, 10]  0.001  1.919019e-01
8        [4096, 1024, 10]  0.010  2.841949e-01
9        [4096, 1024, 10]  0.100  1.367679e+01
10       [4096, 1024, 10]  1.000  4.048387e+01
11       [4096, 2048, 10]  0.001  2.700839e-01
12       [4096, 2048, 10]  0.010  3.125728e-01
13       [4096, 2048, 10]  0.100  3.296885e-01
14       [4096, 2048, 10]  1.000  4.039636e+01
15   [4096, 128, 128, 10]  0.001  1.096494e+01
16   [4096, 128, 128, 10]  0.010  4.022652e+01
17   [4096, 128, 128, 10]  0.100  4.049525e+01
18   [4096, 128, 128, 10]  1.000  4.049951e+01
19   [4096, 128, 512, 10]  0.001  1.213055e-01
20   [4096, 1

In [12]:
r.groupby('nn').groups

{'[4096, 1024, 10]': Int64Index([7, 8, 9, 10], dtype='int64'),
 '[4096, 128, 1024, 10]': Int64Index([23, 24, 25, 26], dtype='int64'),
 '[4096, 128, 10]': Int64Index([0, 1, 2], dtype='int64'),
 '[4096, 128, 128, 10]': Int64Index([15, 16, 17, 18], dtype='int64'),
 '[4096, 128, 512, 10]': Int64Index([19, 20, 21, 22], dtype='int64'),
 '[4096, 2048, 10]': Int64Index([11, 12, 13, 14], dtype='int64'),
 '[4096, 512, 1024, 10]': Int64Index([35, 36, 37, 38], dtype='int64'),
 '[4096, 512, 10]': Int64Index([3, 4, 5, 6], dtype='int64'),
 '[4096, 512, 128, 10]': Int64Index([27, 28, 29, 30], dtype='int64'),
 '[4096, 512, 512, 10]': Int64Index([31, 32, 33, 34], dtype='int64')}

In [13]:
#r.groupby('lr').groups
for lr in lr_candidate:
    print r.groupby('lr').get_group(lr)

                       nn     lr       loss
3         [4096, 512, 10]  0.001   0.142274
7        [4096, 1024, 10]  0.001   0.191902
11       [4096, 2048, 10]  0.001   0.270084
15   [4096, 128, 128, 10]  0.001  10.964940
19   [4096, 128, 512, 10]  0.001   0.121306
23  [4096, 128, 1024, 10]  0.001   0.000058
27   [4096, 512, 128, 10]  0.001  12.360544
31   [4096, 512, 512, 10]  0.001   0.283857
35  [4096, 512, 1024, 10]  0.001   0.188595
                       nn    lr          loss
0         [4096, 128, 10]  0.01  2.687485e+01
4         [4096, 512, 10]  0.01  2.167254e-01
8        [4096, 1024, 10]  0.01  2.841949e-01
12       [4096, 2048, 10]  0.01  3.125728e-01
16   [4096, 128, 128, 10]  0.01  4.022652e+01
20   [4096, 128, 512, 10]  0.01  5.024346e-06
24  [4096, 128, 1024, 10]  0.01  2.213244e-13
28   [4096, 512, 128, 10]  0.01  2.689979e+01
32   [4096, 512, 512, 10]  0.01  1.730702e-01
36  [4096, 512, 1024, 10]  0.01  1.539755e-09
                       nn   lr          loss
1        

In [14]:
for nn in nn_candidate:
    print r.groupby('nn').get_group(str(nn))

                nn    lr       loss
0  [4096, 128, 10]  0.01  26.874850
1  [4096, 128, 10]  0.10  40.494930
2  [4096, 128, 10]  1.00  40.499583
                nn     lr       loss
3  [4096, 512, 10]  0.001   0.142274
4  [4096, 512, 10]  0.010   0.216725
5  [4096, 512, 10]  0.100  40.464496
6  [4096, 512, 10]  1.000  40.499656
                  nn     lr       loss
7   [4096, 1024, 10]  0.001   0.191902
8   [4096, 1024, 10]  0.010   0.284195
9   [4096, 1024, 10]  0.100  13.676791
10  [4096, 1024, 10]  1.000  40.483873
                  nn     lr       loss
11  [4096, 2048, 10]  0.001   0.270084
12  [4096, 2048, 10]  0.010   0.312573
13  [4096, 2048, 10]  0.100   0.329688
14  [4096, 2048, 10]  1.000  40.396363
                      nn     lr       loss
15  [4096, 128, 128, 10]  0.001  10.964940
16  [4096, 128, 128, 10]  0.010  40.226516
17  [4096, 128, 128, 10]  0.100  40.495248
18  [4096, 128, 128, 10]  1.000  40.499512
                      nn     lr       loss
19  [4096, 128, 512, 10

In [15]:
r.groupby('lr').get_group(0.01).values

array([['[4096, 128, 10]', 0.01, 26.874850207638772],
       ['[4096, 512, 10]', 0.01, 0.21672540728369566],
       ['[4096, 1024, 10]', 0.01, 0.2841949474052728],
       ['[4096, 2048, 10]', 0.01, 0.31257279024200785],
       ['[4096, 128, 128, 10]', 0.01, 40.226515668608094],
       ['[4096, 128, 512, 10]', 0.01, 5.024346128435502e-06],
       ['[4096, 128, 1024, 10]', 0.01, 2.2132435050358262e-13],
       ['[4096, 512, 128, 10]', 0.01, 26.89979455227707],
       ['[4096, 512, 512, 10]', 0.01, 0.17307022456274088],
       ['[4096, 512, 1024, 10]', 0.01, 1.5397547232383618e-09]],
      dtype=object)

In [16]:
#load in real test data
print 'load in real test set...'
test_x=np.array(load_npz(test_x_path).todense())
result=[]
print 'test_x:',test_x.shape
for index, x in enumerate(test_x):
    predict_y=n.predict(x)
    result.append([index,predict_y])
result_df = pd.DataFrame(result)
result_df.columns =['Id','Label']
result_df.to_csv("test_y_predict.csv",header=True,index=False)

load in real test set...
test_x: (10000L, 4096L)


