# Continuation of multilayer perceptron

Here I continue training. In case we turn off the computer or have any other reasons to train again, this piece of code is written.

We import the packages:

In [None]:
import numpy as np
import sys
import theano
import theano.tensor as T
import matplotlib.pyplot as plt
import lasagne
from lasagne.nonlinearities import leaky_rectify, softmax, sigmoid, tanh


Shuffling function, like before, reading the data in.

In [None]:
def shuffle_together(input_1, input_2):
    if input_1.shape[0]!= input_2.shape[0]:
        print ("Problem, y and x array are not of the same shape.")
        return None
    c= np.arange(input_1.shape[0])
    np.random.shuffle(c)
    return input_1[c], input_2[c], c

input_data_all = np.load('train_sample.npy')
output_data_all= np.load('train_target.npy')
n_samples= input_data_all.shape[0]
print(n_samples)

Shuffle input and output data and create Theano variables for input and target minibatch;

In [2]:
input_data_all, output_data_all, _ = shuffle_together(input_data_all, output_data_all)
n_validation_samples= int(0.1*n_samples)
validation_input= input_data_all[-1*n_validation_samples:]
validation_output= output_data_all[-1*n_validation_samples:]
n_train_samples= n_samples-n_validation_samples
train_input= input_data_all[:n_train_samples]
train_output= output_data_all[:n_train_samples]
print(train_input.shape)
print(train_output.shape)
x=T.matrix('x', dtype= theano.config.floatX)
y=T.vector('y', dtype= theano.config.floatX)

(87007, 53)
(87007,)


Import saved data (note, file name might be different).

In [4]:
import pickle as pkl
with open('model5_v2.pkl','rb') as f:
    my_layers = pkl.load(f)

Create Lasagne layers given array of different numbers of dimensions, input to layers, x_variable and which nonlinearities to choose.
Create layers. Here we set up how layers will be assembled. Also, we get the last layer output.

In [5]:
def create_layers(l_dim,my_input,x_variable,l_nonlinearities):
    nr_layers = len(l_dim)
    layers = []
    layers.append(lasagne.layers.InputLayer(shape=my_input.shape, input_var=x_variable))
    if len(l_dim)==1:
        return(layers)
    else:
        i = 1
        while i<len(l_dim):
            layers.append(lasagne.layers.DenseLayer(layers[i-1], num_units=l_dim[i],nonlinearity=l_nonlinearities[i]))
            i += 1
        return(layers)
        
layers_dimensions = [input_data_all.shape[1],40,25,15,8,4,1]
layers_nonlinearities = [None,lasagne.nonlinearities.rectify,lasagne.nonlinearities.tanh,
                     lasagne.nonlinearities.tanh,lasagne.nonlinearities.rectify,lasagne.nonlinearities.linear,None]
network = create_layers(l_dim = layers_dimensions,my_input=train_input,x_variable=x,l_nonlinearities=layers_nonlinearities)
output_layer = network[-1]

Only now we change all current layers with saved ones, that we imported.

In [6]:
lasagne.layers.set_all_param_values(network, my_layers)

In [7]:
output_val =  lasagne.layers.get_output(output_layer)# Here we just check the output layer.
output_layer.get_params()[0].get_value()

array([[0.29602773],
       [0.87249047],
       [0.40673455],
       [0.54428656]])

In [8]:
# create loss function
prediction = lasagne.layers.get_output(output_layer)
loss = lasagne.objectives.squared_error(prediction, y)
loss = lasagne.objectives.aggregate(loss, mode = 'mean')
params = lasagne.layers.get_all_params(output_layer, trainable=True)
### We can adjust learning rate and all other parameters here.
l_rate = 0.000000014
updates = lasagne.updates.adam(loss, params, learning_rate=l_rate, beta1=0.9,
         beta2=0.989, epsilon=1e-10)#,epsilon=1e-1)
sample_function= theano.function(inputs= [x], outputs=[output_val])
train_fn = theano.function([x, y], loss, updates=updates)
[sample_out]= sample_function(validation_input)
sample_out

array([[1.59103475],
       [1.88912109],
       [1.54578102],
       ...,
       [1.61697189],
       [1.41937676],
       [1.60683313]])

In [9]:
print(np.std(sample_out))
print(np.mean(sample_out))

0.20333618750765622
1.6027220394086898


And we begin (re)training. The process here is similar as in the original training

In [15]:
batch_size= 300
num_epochs = 500000
n_batches= train_input.shape[0]//batch_size
total_it= num_epochs*n_batches
if 'r2_error' in locals():
    r2_error= np.concatenate([r2_error, np.zeros(total_it)], axis=0)
else:
    r2_error= np.zeros(total_it)
previous_count=0
train_input, train_output, _= shuffle_together(train_input, train_output)
# train network (assuming you've got some training data in numpy arrays)
ss_tot = np.sum((validation_output-np.mean(validation_output))**2)
for epoch in range(num_epochs):
    train_loss = 0
    for it in range(n_batches):
        curr_it= n_batches*epoch+it +previous_count
        input_batch= train_input[it*batch_size:(it+1)*batch_size,:]
        output_batch= train_output[it*batch_size:(it+1)*batch_size]  
        train_loss += train_fn(input_batch, output_batch)
    [sample_out]= sample_function(validation_input)

    
    if epoch%100==0 and epoch>9:
        SS_res = np.sum((sample_out.reshape(-1)-validation_output)**2)
        r2_error[epoch]= 1 - SS_res/ss_tot
        r2_error[epoch] = r2_error[epoch]*100
        ### We find that as we converge, the prediction improves very gradually. So, we only check each 100 predictions
        ### if there was an improvement.
        print("Epoch %d: Loss %g" % (epoch + 1, train_loss / len(input_data_all)),"r2_error:",r2_error[epoch],".........",
             "change in last 100 epochs:",r2_error[epoch]-r2_error[epoch-100])

        
    train_input, train_output, _= shuffle_together(train_input, train_output)

Epoch 101: Loss 1.66464e-08 r2_error: 99.98372664086975 ......... change in last 100 epochs: 99.98372664086975
Epoch 201: Loss 1.66444e-08 r2_error: 99.98372171707655 ......... change in last 100 epochs: -4.9237931989409844e-06
Epoch 301: Loss 1.66405e-08 r2_error: 99.98373032782847 ......... change in last 100 epochs: 8.610751919491122e-06
Epoch 401: Loss 1.66381e-08 r2_error: 99.98372745977171 ......... change in last 100 epochs: -2.8680567538685864e-06
Epoch 501: Loss 1.66357e-08 r2_error: 99.98372920722302 ......... change in last 100 epochs: 1.7474513072102127e-06
Epoch 601: Loss 1.66309e-08 r2_error: 99.98372963453342 ......... change in last 100 epochs: 4.2731039684440475e-07
Epoch 701: Loss 1.66282e-08 r2_error: 99.9837313703824 ......... change in last 100 epochs: 1.7358489827756785e-06
Epoch 801: Loss 1.66245e-08 r2_error: 99.98372857958066 ......... change in last 100 epochs: -2.7908017443678546e-06
Epoch 901: Loss 1.66206e-08 r2_error: 99.98372498012313 ......... change in 

Epoch 7201: Loss 1.64438e-08 r2_error: 99.98377582945423 ......... change in last 100 epochs: -8.230331332015339e-07
Epoch 7301: Loss 1.6446e-08 r2_error: 99.98376763643842 ......... change in last 100 epochs: -8.193015816004845e-06
Epoch 7401: Loss 1.64421e-08 r2_error: 99.98377942856051 ......... change in last 100 epochs: 1.1792122094789192e-05
Epoch 7501: Loss 1.64348e-08 r2_error: 99.98377892269316 ......... change in last 100 epochs: -5.058673480107245e-07
Epoch 7601: Loss 1.64328e-08 r2_error: 99.98378238957272 ......... change in last 100 epochs: 3.466879562097347e-06
Epoch 7701: Loss 1.64296e-08 r2_error: 99.98378020027394 ......... change in last 100 epochs: -2.1892987831506616e-06
Epoch 7801: Loss 1.64275e-08 r2_error: 99.98378228366606 ......... change in last 100 epochs: 2.083392118379379e-06
Epoch 7901: Loss 1.64238e-08 r2_error: 99.98379175960451 ......... change in last 100 epochs: 9.475938455238975e-06
Epoch 8001: Loss 1.64219e-08 r2_error: 99.98379631058441 ......... 

Epoch 14201: Loss 1.62447e-08 r2_error: 99.98381507403525 ......... change in last 100 epochs: -4.117142964332743e-07
Epoch 14301: Loss 1.62479e-08 r2_error: 99.98381004828371 ......... change in last 100 epochs: -5.02575153404905e-06
Epoch 14401: Loss 1.62425e-08 r2_error: 99.9838188078283 ......... change in last 100 epochs: 8.759544584791001e-06
Epoch 14501: Loss 1.62425e-08 r2_error: 99.98382043300566 ......... change in last 100 epochs: 1.6251773615749698e-06
Epoch 14601: Loss 1.62162e-08 r2_error: 99.98381760897426 ......... change in last 100 epochs: -2.824031398063198e-06
Epoch 14701: Loss 1.62367e-08 r2_error: 99.98381863406465 ......... change in last 100 epochs: 1.0250903841324543e-06
Epoch 14801: Loss 1.62331e-08 r2_error: 99.98381817149412 ......... change in last 100 epochs: -4.6257052588316583e-07
Epoch 14901: Loss 1.62315e-08 r2_error: 99.98381125934262 ......... change in last 100 epochs: -6.9121515053893745e-06
Epoch 15001: Loss 1.62284e-08 r2_error: 99.9838169387425 

Epoch 21201: Loss 1.60643e-08 r2_error: 99.98383486554829 ......... change in last 100 epochs: 1.0293502725744474e-06
Epoch 21301: Loss 1.60619e-08 r2_error: 99.98383301641599 ......... change in last 100 epochs: -1.8491323032776563e-06
Epoch 21401: Loss 1.60597e-08 r2_error: 99.98383219948053 ......... change in last 100 epochs: -8.169354543952068e-07
Epoch 21501: Loss 1.60565e-08 r2_error: 99.98383124387034 ......... change in last 100 epochs: -9.556101900898284e-07
Epoch 21601: Loss 1.6053e-08 r2_error: 99.98382815462062 ......... change in last 100 epochs: -3.0892497164813904e-06
Epoch 21701: Loss 1.60528e-08 r2_error: 99.98383050652319 ......... change in last 100 epochs: 2.351902566033459e-06
Epoch 21801: Loss 1.60505e-08 r2_error: 99.98382332296451 ......... change in last 100 epochs: -7.183558679457747e-06
Epoch 21901: Loss 1.60466e-08 r2_error: 99.98383147324478 ......... change in last 100 epochs: 8.150280265795118e-06
Epoch 22001: Loss 1.60449e-08 r2_error: 99.98382852642543

Epoch 28201: Loss 1.58848e-08 r2_error: 99.98381065603681 ......... change in last 100 epochs: 1.2893677165948247e-06
Epoch 28301: Loss 1.58835e-08 r2_error: 99.98381147669974 ......... change in last 100 epochs: 8.206629331652948e-07
Epoch 28401: Loss 1.58777e-08 r2_error: 99.98380536684112 ......... change in last 100 epochs: -6.109858617264763e-06
Epoch 28501: Loss 1.58795e-08 r2_error: 99.98381065042064 ......... change in last 100 epochs: 5.283579511683456e-06
Epoch 28601: Loss 1.58762e-08 r2_error: 99.98381072303918 ......... change in last 100 epochs: 7.26185476196406e-08
Epoch 28701: Loss 1.58736e-08 r2_error: 99.98381093868292 ......... change in last 100 epochs: 2.156437375333553e-07
Epoch 28801: Loss 1.58693e-08 r2_error: 99.98380973604957 ......... change in last 100 epochs: -1.2026333564563174e-06
Epoch 28901: Loss 1.58693e-08 r2_error: 99.9838102237884 ......... change in last 100 epochs: 4.877388306567809e-07
Epoch 29001: Loss 1.58661e-08 r2_error: 99.98380655790992 ....

Epoch 35201: Loss 1.5709e-08 r2_error: 99.98382575338162 ......... change in last 100 epochs: -5.189875977862357e-06
Epoch 35301: Loss 1.57087e-08 r2_error: 99.98382826003622 ......... change in last 100 epochs: 2.5066545958907227e-06
Epoch 35401: Loss 1.57002e-08 r2_error: 99.98383127778392 ......... change in last 100 epochs: 3.0177477015058685e-06
Epoch 35501: Loss 1.57015e-08 r2_error: 99.98383339445077 ......... change in last 100 epochs: 2.1166668489058793e-06
Epoch 35601: Loss 1.56987e-08 r2_error: 99.98382277943317 ......... change in last 100 epochs: -1.0615017600912324e-05
Epoch 35701: Loss 1.56963e-08 r2_error: 99.98383368839741 ......... change in last 100 epochs: 1.0908964242162256e-05
Epoch 35801: Loss 1.56946e-08 r2_error: 99.98383454083533 ......... change in last 100 epochs: 8.524379211394262e-07
Epoch 35901: Loss 1.56922e-08 r2_error: 99.98383611926022 ......... change in last 100 epochs: 1.5784248859063155e-06
Epoch 36001: Loss 1.56898e-08 r2_error: 99.98383535450225

Epoch 42201: Loss 1.55355e-08 r2_error: 99.98385632767875 ......... change in last 100 epochs: -1.804472049116157e-06
Epoch 42301: Loss 1.55342e-08 r2_error: 99.98385690138308 ......... change in last 100 epochs: 5.73704326711777e-07
Epoch 42401: Loss 1.55307e-08 r2_error: 99.98385736202651 ......... change in last 100 epochs: 4.6064343450780143e-07
Epoch 42501: Loss 1.55276e-08 r2_error: 99.98385809917701 ......... change in last 100 epochs: 7.371505006403822e-07
Epoch 42601: Loss 1.55245e-08 r2_error: 99.98385786506397 ......... change in last 100 epochs: -2.3411304539422417e-07
Epoch 42701: Loss 1.55251e-08 r2_error: 99.9838604563651 ......... change in last 100 epochs: 2.591301125676182e-06
Epoch 42801: Loss 1.55192e-08 r2_error: 99.98386047661097 ......... change in last 100 epochs: 2.0245877863089845e-08
Epoch 42901: Loss 1.55194e-08 r2_error: 99.98386237970877 ......... change in last 100 epochs: 1.9030977966849605e-06
Epoch 43001: Loss 1.5513e-08 r2_error: 99.98385945011616 ...

Epoch 49201: Loss 1.53691e-08 r2_error: 99.98389733598592 ......... change in last 100 epochs: 2.7986811943492285e-06
Epoch 49301: Loss 1.53673e-08 r2_error: 99.98389767237954 ......... change in last 100 epochs: 3.363936258438116e-07
Epoch 49401: Loss 1.5365e-08 r2_error: 99.9838997894665 ......... change in last 100 epochs: 2.117086964403825e-06
Epoch 49501: Loss 1.53599e-08 r2_error: 99.9838967027416 ......... change in last 100 epochs: -3.08672490234585e-06
Epoch 49601: Loss 1.53599e-08 r2_error: 99.9838963345451 ......... change in last 100 epochs: -3.681965097257489e-07
Epoch 49701: Loss 1.53584e-08 r2_error: 99.98390038392454 ......... change in last 100 epochs: 4.049379441539713e-06
Epoch 49801: Loss 1.53549e-08 r2_error: 99.98389871155335 ......... change in last 100 epochs: -1.6723711837585142e-06
Epoch 49901: Loss 1.53546e-08 r2_error: 99.98389897286233 ......... change in last 100 epochs: 2.613089833403137e-07
Epoch 50001: Loss 1.53523e-08 r2_error: 99.9838999841174 .......

Epoch 56201: Loss 1.52134e-08 r2_error: 99.98393380310266 ......... change in last 100 epochs: 1.1818727898571524e-06
Epoch 56301: Loss 1.5209e-08 r2_error: 99.98393528126778 ......... change in last 100 epochs: 1.4781651174189392e-06
Epoch 56401: Loss 1.52071e-08 r2_error: 99.98393438315802 ......... change in last 100 epochs: -8.981097607829724e-07
Epoch 56501: Loss 1.52045e-08 r2_error: 99.98393592904985 ......... change in last 100 epochs: 1.5458918340982564e-06
Epoch 56601: Loss 1.5193e-08 r2_error: 99.98393700592045 ......... change in last 100 epochs: 1.0768706033559283e-06
Epoch 56701: Loss 1.52024e-08 r2_error: 99.9839391327397 ......... change in last 100 epochs: 2.126819254044676e-06
Epoch 56801: Loss 1.51977e-08 r2_error: 99.98393650204885 ......... change in last 100 epochs: -2.630690858040907e-06
Epoch 56901: Loss 1.51939e-08 r2_error: 99.98393761896264 ......... change in last 100 epochs: 1.1169137934530227e-06
Epoch 57001: Loss 1.51941e-08 r2_error: 99.98393240043713 ..

Epoch 63201: Loss 1.50588e-08 r2_error: 99.98397539645327 ......... change in last 100 epochs: 1.3093358859350701e-06
Epoch 63301: Loss 1.50569e-08 r2_error: 99.98397623994555 ......... change in last 100 epochs: 8.434922733613348e-07
Epoch 63401: Loss 1.50484e-08 r2_error: 99.98397849127025 ......... change in last 100 epochs: 2.251324701774138e-06
Epoch 63501: Loss 1.50511e-08 r2_error: 99.98398250749652 ......... change in last 100 epochs: 4.016226270664447e-06
Epoch 63601: Loss 1.50444e-08 r2_error: 99.98398060543983 ......... change in last 100 epochs: -1.90205669525767e-06
Epoch 63701: Loss 1.50478e-08 r2_error: 99.98398274968281 ......... change in last 100 epochs: 2.1442429840590194e-06
Epoch 63801: Loss 1.50462e-08 r2_error: 99.98397729726838 ......... change in last 100 epochs: -5.452414427509211e-06
Epoch 63901: Loss 1.50432e-08 r2_error: 99.98397952016522 ......... change in last 100 epochs: 2.222896839043642e-06
Epoch 64001: Loss 1.50411e-08 r2_error: 99.98398416380788 ...

Epoch 70201: Loss 1.49061e-08 r2_error: 99.98400451578436 ......... change in last 100 epochs: 3.4689506094309763e-06
Epoch 70301: Loss 1.4907e-08 r2_error: 99.9840064769356 ......... change in last 100 epochs: 1.9611512414030585e-06
Epoch 70401: Loss 1.48967e-08 r2_error: 99.98400850885461 ......... change in last 100 epochs: 2.0319190099371554e-06
Epoch 70501: Loss 1.49011e-08 r2_error: 99.98400576554772 ......... change in last 100 epochs: -2.743306893648878e-06
Epoch 70601: Loss 1.48979e-08 r2_error: 99.98400718101888 ......... change in last 100 epochs: 1.415471160726156e-06
Epoch 70701: Loss 1.48968e-08 r2_error: 99.98400743270423 ......... change in last 100 epochs: 2.516853498946148e-07
Epoch 70801: Loss 1.48933e-08 r2_error: 99.98400483655054 ......... change in last 100 epochs: -2.5961536920249273e-06
Epoch 70901: Loss 1.48938e-08 r2_error: 99.98400586758986 ......... change in last 100 epochs: 1.0310393179224775e-06
Epoch 71001: Loss 1.48913e-08 r2_error: 99.9840060450266 ..

KeyboardInterrupt: 

We check R-squares and save the model.

In [16]:
test_x = np.load('test_sample.npy')
test_y= np.load('test_target.npy')
[sample_out]= sample_function(test_x)
ss_tot = np.sum((test_y-np.mean(test_y))**2)
SS_res = np.sum((sample_out.reshape(-1)-test_y)**2)
print("Rsquared:",(1 - SS_res/ss_tot)*100)

Rsquared: 99.99885091790631


The train and validation set are also saved in case we want to reproduce the data and be able to always make more tests on the same data

In [17]:
np.save('train_sample1', train_input)
np.save('train_target1', train_output)
np.save('validation_sample1', validation_input)
np.save('validation_target1', validation_output)


In [81]:
print("mean sample",np.mean(sample_out))
print("mean validation",np.mean(validation_output))

mean sample 1.5994574586520056
mean validation 1.6014248846591497


We train linear and random forest model beore we save predictions of all of them on test set. Predictions can then be compared.

In [120]:
test_x = np.load('test_sample.npy'))
test_y= np.load('test_target.npy')
from sklearn.linear_model import LinearRegression
reg = LinearRegression().fit(train_input, train_output)
linear_prediction = reg.predict(test_x)
from sklearn.ensemble import RandomForestRegressor
regr = RandomForestRegressor(n_estimators=250,criterion="mse")
regr.fit(train_input, train_output)
rf_prediction = regr.predict(test_x)
[sample_out]= sample_function(test_x)
import pandas as pd
export = pd.DataFrame(sample_out)
export['test_output'] = test_y
export['Random_forest'] = rf_prediction
export['Linear_prediction'] = linear_prediction
export.columns = ['sample_output', 'test_output','Random_forest','Linear_prediction']
export.to_csv("model_output5.csv")

In [21]:
values = lasagne.layers.get_all_param_values(network)
values
import pickle as pkl
with open('model5_v2.pkl','wb') as f:
    pkl.dump(values, f)