In [3]:

import numpy as np

from sklearn.metrics import mean_squared_error,accuracy_score,mean_absolute_error
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense,Dropout,Input
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.regularizers import l1,l2
from tensorflow.keras.constraints import max_norm
from tensorflow.keras.optimizers import SGD,Adam,RMSprop
import pandas as pd
import time
from tensorflow.keras import backend as K
import os

In [4]:
!pip install hessianfree
import hessianfree as hf
from hessianfree.loss_funcs import LossFunction
from functools import wraps



In [5]:
# TO TURN GPU for Keras, set devic = cuda or gpu or gpu0 like this
os.environ["THEANO_FLAGS"] = "device=cuda,openmp=1,floatX=float32" 
# TO TURN ON OPENMP
os.environ["THEANO_FLAGS"] = "device=cpu,openmp=1,floatX=float32" 

In [6]:
def keras_NN(n_nodes,optimizer):
    '''This function initializes and return a new neural network with regularization techniques
       
       input: 
       n_nodes: a list of units per layer like [42,24,12,1] 
       optimizer: one of the following:
        sgd = SGD
        rmsprop = RMSprop
        adagrad = Adagrad
        adadelta = Adadelta
        adam = Adam
        adamax = Adamax
        nadam = Nadam
       

       output: an object that contains these methods:
       
       model.predict(X): return predictions corresponding to X
       
       model.get_weights(): return a list of current model weights, in the order of w0,b1,w1,b1,....w4,b4
       
       model.set_weights(): takes in a list of weights in the same format as what model.get_weights() returns
       
       model.fit(X_tr,Y_tr,verbose=0,epochs=50,batch_size=1024,validation_split=0.2, callbacks=[early_stopping]): 
       
       train a model with the inputs and the specification, you can train 1 epoch;  
       and return history of loss during training (using hist.history['loss']) and validation loss if callbacks =
       [EarlyStopping(patience=5)] (using hist.history['val_loss']) 
       
    '''
    # Clear the model
    model = None
    # BUILD INPUT LAYER
    inputs = Input(shape=(n_nodes[0],))

    # CONNECT TO THE FIRST HIDDEN LAYER
    x = Dense(n_nodes[1], kernel_initializer='he_normal', 
                    kernel_regularizer=l2(0.0001),kernel_constraint = max_norm(5), activation='relu')(inputs)
    x = Dropout(0.2)(x) # add dropout 

    # ADD SOME MORE HIDDEN LAYERS
    for i in range(2,len(n_nodes)-1):
        x = Dense(n_nodes[i],  kernel_initializer='he_normal', activation='relu',bias_initializer='he_normal',
            kernel_regularizer=l2(0.0001),kernel_constraint = max_norm(3))(x)
        x = Dropout(0.2)(x) # add dropout 

    # OUTPUT LAYER
    predictions = Dense(1, kernel_initializer='he_normal', activation='linear')(x)

    # INITIALIZE MODEL (now you can call model.get_weights() )
    model = Model(inputs=inputs, outputs=predictions)

    # Compile model with LOSS FUNCTION and ADAM OPTIMIZER
    model.compile(loss='mean_squared_error', optimizer=optimizer)
    return model

In [8]:
# Example OF comparing keras and Hessian Free: 

# read data and define training, validation and test set
data = np.genfromtxt('price_inputs_GS2016.csv',delimiter=',',skip_header=1)
X,ret = data[:,2:],data[:,1:2] # X means features, ret means target 
print('shape of total X and ret:',X.shape,ret.shape)

n_test = int(X.shape[0]*0.25)
N = X.shape[0] - n_test
n_val = int(N*0.2)
X_tr_temp, X_test, ret_tr_temp,ret_test = X[:-n_test],X[-n_test:],ret[:-n_test],ret[-n_test:]
X_tr,X_val,ret_tr,ret_val = X_tr_temp[:-n_val], X_tr_temp[-n_val:],ret_tr_temp[:-n_val],ret_tr_temp[-n_val:]


# define evaluation metrics
accuracy = lambda pred,truth: np.mean((pred>0)==(truth>0))
hit_ratio = lambda x,y: np.mean( ((x[1:] - x[:-1]) * (y[1:]-y[:-1]))>0 )
eval_f = [accuracy,hit_ratio,mean_squared_error,mean_absolute_error]
labels = 'accuracy,hit_ratio,mean_squared_error,mean_absolute_error'.split(',')

n_trials = 1 # run some number of trials for each model for confidence interval 

shape of total X and ret: (19669, 42) (19669, 1)


In [9]:
################### KERAS ONLY ######################

 
# define hyperparameters
n_nodes = [42,24,12,1] # number of units per layer
batch_size = 1024

early_stopping = EarlyStopping(patience=5)
# CHOOSE adam or adagrad 
model = keras_NN(n_nodes=n_nodes,optimizer='sgd')
model.fit(X_tr,ret_tr,verbose=0,epochs=100,batch_size=batch_size,
                 validation_data=(X_val,ret_val),callbacks=[early_stopping])
print('After fitting on the training set for 100 epochs, keras return this weight parameter') 
print(model.get_weights())

After fitting on the training set for 100 epochs, keras return this weight parameter
[array([[-0.11677323,  0.03750786, -0.02175612, ..., -0.05536552,
        -0.06611554, -0.05894994],
       [ 0.02889547, -0.06271525,  0.07103355, ...,  0.13097164,
         0.4106272 ,  0.13271786],
       [ 0.11421447,  0.11550514, -0.03584766, ..., -0.29455692,
         0.25533745, -0.14664645],
       ...,
       [ 0.1610842 , -0.21801807, -0.17005669, ...,  0.28941494,
        -0.08052007,  0.19307862],
       [ 0.30354312, -0.06030658, -0.05299408, ...,  0.18896367,
        -0.04619488, -0.31426293],
       [ 0.01657236, -0.06964859, -0.3642317 , ...,  0.27775666,
        -0.35603642,  0.3276148 ]], dtype=float32), array([ 0.02553064, -0.01172137, -0.05070024, -0.048676  , -0.03211929,
       -0.00596489, -0.02388897,  0.00882523, -0.03264376,  0.00826216,
        0.03675783, -0.01347234,  0.02514146, -0.03406848,  0.0194388 ,
       -0.00542165,  0.00380377, -0.00282063,  0.01813933, -0.0451947

In [10]:
################### Hessian Free ######################



def output_loss(func):
    """Convenience decorator that takes a loss defined for the output layer
    and converts it into the more general form in terms of all layers."""

    @wraps(func)
    def wrapped_loss(self, activities, targets):
        result = [None for _ in activities[:-1]]
        result += [func(self, activities[-1], targets)]

        return result

    return wrapped_loss

class mse(LossFunction):
    
    @output_loss
    def loss(self, output, targets):
        return np.sum(np.nan_to_num(output - targets) ** 2,
                      axis=tuple(range(1, output.ndim))) / 2 /output.shape[0]

    @output_loss
    def d_loss(self, output, targets):
        return np.nan_to_num(output - targets)/output.shape[0]

    @output_loss
    def d2_loss(self, output, _):
        return np.ones_like(output)/output.shape[0]
    
def pack_weights(ff):
    '''
    input: an hessian free model
    output: a list of weight following keras' format
    ff follows this format: [(W_0,b_0),(W_1,b_1)...(W_H,b_H)]'''
    res = []
    for i in range(len(n_nodes)-1):
        weights = ff.get_weights(ff.W,(i,i+1))
        
        res.extend([np.array(weights[0]),np.array(weights[1])])
    return res

pshape = lambda a_list: [ w.shape for w in a_list]


# define hyperparameters
layers = (len(n_nodes)-1)*['ReLU'] + ['Linear'] # all relu except linear for output layer
n_nodes = [42,24,12,1] # number of units per layer
batch_size = 1024


# initialize a hessian free model with GPU use optional
ff = hf.FFNet(n_nodes,layers=layers,loss_type=mse(),
          W_init_params={ "coeff":1.0, "biases":1.0,"init_type":'gaussian'},use_GPU=0)

ff.run_epochs(X,ret,test=(X_val,ret_val),minibatch_size=1024,
                      optimizer=hf.opt.HessianFree(CG_iter=2),
                      max_epochs=50, plotting=True,print_period=None)

print('After fitting on the training set for 100 epochs, hessian free return this weight parameter') 
print(pack_weights(ff))



After fitting on the training set for 100 epochs, hessian free return this weight parameter
[array([[ 0.8292594 , -0.5707942 , -0.44953394, ...,  0.09422408,
         0.8934627 ,  0.6504265 ],
       [ 1.2634635 ,  0.08433252, -0.53944516, ...,  0.9171897 ,
         0.74347943,  1.1688513 ],
       [-1.3517568 ,  1.4753577 ,  0.41765174, ..., -0.73805666,
        -0.75450635, -0.6591747 ],
       ...,
       [ 0.91960186, -0.9872238 , -0.68048793, ...,  0.45513633,
         1.220806  , -0.7912785 ],
       [-0.36187336, -0.73655057,  0.4322151 , ..., -0.3468003 ,
        -1.585897  , -0.03222641],
       [-0.53680503, -0.2690873 , -0.69392437, ..., -0.2054766 ,
        -0.53520006, -0.21770248]], dtype=float32), array([1.0585736 , 0.8601018 , 0.9611594 , 1.045068  , 1.0495595 ,
       0.920602  , 0.91276264, 0.8937233 , 1.0429558 , 1.071606  ,
       0.99281   , 0.9316847 , 1.0238755 , 1.0735124 , 1.1104681 ,
       1.2050701 , 1.0519226 , 1.0810913 , 1.0407579 , 1.0515708 ,
       0.9

In [11]:
############################## Evaluation metrics ##############################

# run some number of trials for each model
n_trials = 1
n_nodes = [42,24,12,1] # number of units per layer
batch_size = 1024
layers = (len(n_nodes)-1)*['ReLU'] + ['Linear'] # all relu except linear for output layer

# define evaluation metrics
accuracy = lambda pred,truth: np.mean((pred>0)==(truth>0))
hit_ratio = lambda x,y: np.mean( ((x[1:] - x[:-1]) * (y[1:]-y[:-1]))>0 )
eval_f = [accuracy,hit_ratio,mean_squared_error,mean_absolute_error]
labels = 'accuracy,hit_ratio,mean_squared_error,mean_absolute_error'.split(',')

timer = np.zeros((n_trials,2))
scores = np.zeros( (n_trials,len(labels), 2) )

for i in range(n_trials):
                      
    # CHOOSE sgd, adam or adagrad 
    early_stopping = EarlyStopping(patience=5)
    start = time.time()
    model = keras_NN(n_nodes=n_nodes,optimizer='sgd')
    hist = model.fit(X_tr,ret_tr,verbose=0,epochs=100,batch_size=batch_size,
                     validation_data=(X_val,ret_val),callbacks=[early_stopping])
    timer[i,0] = time.time()-start
    
    # evaluation metrics
    pred = model.predict(X_test).flatten()
    truth = ret_test.flatten()
    scores[i,:,0] = [ f(pred,truth) for j,f in enumerate(eval_f) ]
               
    
    # initliaze a hessian free model
    ff = hf.FFNet(n_nodes,layers=layers,loss_type=hf.loss_funcs.SquaredError(),
              W_init_params={ "coeff":1.0, "biases":1.0,"init_type":'gaussian'},use_GPU=0)
    
    # Hession free
    start = time.time()
    ff.run_epochs(X,ret,test=(X_val,ret_val),minibatch_size=1024,
                          optimizer=hf.opt.HessianFree(CG_iter=2),
                          max_epochs=50, plotting=True,print_period=None)
    timer[i,1] = time.time()-start
    
    # here I am borrowing Keras' model to evaluate the loss function of weights from Hessian free
    model.set_weights(pack_weights(ff))
    
     # evaluation metrics
    pred = model.predict(X_test).flatten()
    truth = ret_test.flatten()
    scores[i,:,1] = [ f(pred,truth) for j,f in enumerate(eval_f) ]
    


# print 'keras training loss',hist.history['loss']
# print 'valdidation loss',hist.history['val_loss']
# print 'Hessian Free training loss',ff.optimizer.plots['training error (log)'] # it says log but it's not for MSE
# print 'Hessian Free validation loss',ff.test_errs

for jj in range(2):
    print 
    exp = 'keras adagrad,hessian free'.split(',')[jj]
    print('Evaluating ',exp)
    print('running time per trial',timer[:,jj])
    s = scores[:,:,jj]
    print('prediction scores')
    
    mu = s.mean(axis=0)
    sd = s.std(axis=0)

    lower_bound = np.percentile(s, 2.5, axis=0)
    upper_bound = np.percentile(s, 97.5, axis=0)
     
    for i in range(s.shape[1]):
        print(labels[i])
        ##print('mean {} and std {}'.format(mu[i],std[i]))
        print('2.5 and 97.5 percentile [{},{}]'.format(lower_bound[i],upper_bound[i]))





Evaluating  keras adagrad
running time per trial [4.99666834]
prediction scores
accuracy
2.5 and 97.5 percentile [0.49928818385194224,0.49928818385194224]
hit_ratio
2.5 and 97.5 percentile [0.4420260374288039,0.4420260374288039]
mean_squared_error
2.5 and 97.5 percentile [1.1620071253546367,1.1620071253546367]
mean_absolute_error
2.5 and 97.5 percentile [0.7196781295589493,0.7196781295589493]
Evaluating  hessian free
running time per trial [4.99828696]
prediction scores
accuracy
2.5 and 97.5 percentile [0.49928818385194224,0.49928818385194224]
hit_ratio
2.5 and 97.5 percentile [0.49125305126118796,0.49125305126118796]
mean_squared_error
2.5 and 97.5 percentile [5.6297867114931,5.6297867114931]
mean_absolute_error
2.5 and 97.5 percentile [1.471740810557929,1.471740810557929]
