In [1]:
import pandas as pd
import numpy as np 

import os 
from datetime import date
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import math
import torch.nn.functional as F
from torch.nn.modules.pooling import *
import time
import random
import datetime
from torch.autograd import Variable
import tensorflow as tf
from sklearn import preprocessing
import matplotlib.pyplot as plt
from utils import *
from data_processing import * 
import scipy as sp 
from sklearn.metrics import *
from sklearn.linear_model import Ridge
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import BaggingClassifier

# Transform Learning

In [2]:
def calOutShape(input_shape,ksize1=3,ksize2 =3,stride=1,maxpool1=False, maxpool2=False, mpl_ksize=2):
    mpl_stride = 2
    pad = ksize1//2
    dim2 = int((input_shape[2]-ksize1+2*pad)/stride) + 1
    if maxpool1 == True:
        dim2 = (dim2 - mpl_ksize)//mpl_stride + 1
    pad = ksize2//2
    dim2 = int((dim2-ksize2+2*pad)/stride) + 1
    if maxpool2 == True:
        dim2 = (dim2 - mpl_ksize)//mpl_stride + 1
    return dim2



class Transform(nn.Module):
    
    def __init__(self,input_shape, out_planes1 = 8, out_planes2 = 16,ksize1 = 3,ksize2 = 3,maxpool1 = False, 
                 maxpool2 = False,mpl_ksize=2):
        super(Transform, self).__init__()
        self.ksize1 = ksize1
        self.ksize2 = ksize2
        self.mpl_ksize = mpl_ksize
        self.out_planes1 = out_planes1
        self.out_planes2 = out_planes2
        self.init_T()
        self.maxpool1 = maxpool1
        self.maxpool2 = maxpool2
        self.input_shape = input_shape
        self.i = 1
        self.atom_ratio = 0.5
        self.init_X()
        self.gap = AdaptiveAvgPool1d(1)
        
        
        
    
    def init_T(self):
        conv = nn.Conv1d(1, out_channels = self.out_planes1, kernel_size = self.ksize1, stride=1, bias=True)
        self.T1 = conv._parameters['weight']
        conv = nn.Conv1d(in_channels = self.out_planes1, out_channels = self.out_planes2, 
                         kernel_size = self.ksize2, stride=1, bias=True)
        self.T2 = conv._parameters['weight']

       
           
        
    def init_X(self):
        dim2 = calOutShape(self.input_shape,self.ksize1,self.ksize2,stride = 1,maxpool1 = self.maxpool1, 
                           maxpool2 = self.maxpool2, mpl_ksize = self.mpl_ksize)
        X_shape = [self.input_shape[0],self.out_planes2,dim2]
        self.X  = nn.Parameter(torch.randn(X_shape), requires_grad=True)
        self.num_features = self.out_planes2*dim2
        self.num_atoms = int(self.num_features*self.atom_ratio*5) #dim2//2
        T_shape = [self.num_atoms,self.num_features]
        self.T = nn.Parameter(torch.randn(T_shape), requires_grad=True)

        
    def forward(self, inputs):
        x = F.conv1d(inputs, self.T1,padding = self.ksize1//2)
        if self.maxpool1:
            x = F.max_pool1d(x, 2)
        x = F.selu(x)
        x = F.conv1d(x, self.T2, padding = self.ksize2//2)
        if self.maxpool2:
            x = F.max_pool1d(x, 2)
        y = torch.mm(self.T,x.view(x.shape[0],-1).t())
        return x, y
        
          
    def get_params(self):
        return self.T1, self.T2, self.X, self.T
    
    
    def X_step(self):
        self.X.data = torch.clamp(self.X.data, min=0)


    def Z_step(self):
        self.Z.data = torch.clamp(self.Z.data, min=0)
        
        
    def get_TZ_Dims(self):
        return self.num_features,self.num_atoms, self.input_shape[0]
        
        
class Network(nn.Module): 
    def __init__(self,inputs_shape=(4,5,1),out_planes1 = 8, out_planes2 = 16,ksize1 = 3,ksize2 = 3,
             maxpool1=False, maxpool2=False, mpl_ksize=2,num_classes=2):
        super(Network, self).__init__()
        self.Transform1 = Transform(inputs_shape,out_planes1 = out_planes1, out_planes2 = out_planes2,ksize1 = ksize1,
                                    ksize2 = ksize2,maxpool1=maxpool1, maxpool2=maxpool2, mpl_ksize=mpl_ksize)
        self.Transform2 = Transform(inputs_shape,out_planes1 = out_planes1, out_planes2 = out_planes2,ksize1 = ksize1,
                                    ksize2 = ksize2,maxpool1=maxpool1, maxpool2=maxpool2, mpl_ksize=mpl_ksize)
        self.Transform3 = Transform(inputs_shape,out_planes1 = out_planes1, out_planes2 = out_planes2,ksize1 = ksize1,
                                    ksize2 = ksize2,maxpool1=maxpool1, maxpool2=maxpool2, mpl_ksize=mpl_ksize)
        self.Transform4 = Transform(inputs_shape,out_planes1 = out_planes1, out_planes2 = out_planes2,ksize1 = ksize1,
                                    ksize2 = ksize2,maxpool1=maxpool1, maxpool2=maxpool2, mpl_ksize=mpl_ksize)
        self.Transform5 = Transform(inputs_shape,out_planes1 = out_planes1, out_planes2 = out_planes2,ksize1 = ksize1,
                                    ksize2 = ksize2,maxpool1=maxpool1, maxpool2=maxpool2, mpl_ksize=mpl_ksize)
        self.num_features,self.num_atoms, self.input_shape = self.Transform1.get_TZ_Dims()
        Z_shape = [self.num_atoms,self.input_shape]
        self.Z = nn.Parameter(torch.randn(Z_shape), requires_grad=True)
        self.pred_list = []
        self.init_TX()
        

    def init_TX(self):
        self.T11,self.T21, self.X1, self.Tp1 = self.Transform1.get_params()
        self.T12,self.T22, self.X2, self.Tp2 = self.Transform2.get_params()
        self.T13,self.T23, self.X3, self.Tp3 = self.Transform3.get_params()
        self.T14,self.T24, self.X4, self.Tp4 = self.Transform4.get_params()
        self.T15,self.T25, self.X5, self.Tp5 = self.Transform5.get_params()
        self.T1 = torch.stack((self.T11,self.T12,self.T13,self.T14,self.T15),1)
        self.T2 = torch.stack((self.T21,self.T22,self.T23,self.T24,self.T25),1)
        self.X = torch.stack((self.X1,self.X2,self.X3,self.X4,self.X5),1) 
        self.T = torch.stack((self.Tp1,self.Tp2,self.Tp3,self.Tp4,self.Tp5),1) 
        
        
        
    def forward(self,x):
        batch_size, no_of_series, no_of_days = x.shape
        
        close = np.reshape(x[:,0],(batch_size,1,no_of_days))
        out1,out1p = self.Transform1(close)
        
        _open = np.reshape(x[:,1],(batch_size,1,no_of_days))
        out2,out2p = self.Transform2(_open)
        
        
        high = np.reshape(x[:,2],(batch_size,1,no_of_days))
        out3,out3p = self.Transform3(high)
        
        low = np.reshape(x[:,3],(batch_size,1,no_of_days))
        out4,out4p = self.Transform4(low)
        
        volume = np.reshape(x[:,4],(batch_size,1,no_of_days))
        out5, out5p = self.Transform5(volume)
        
        self.pred_list = [out1,out2,out3,out4,out5]

        gp1 = out1p + out2p + out3p + out4p + out5p
        return gp1
    
    
    def X_step(self):
        self.Transform1.X_step()
        self.Transform2.X_step()
        self.Transform3.X_step()
        self.Transform4.X_step()
        self.Transform5.X_step()
        
        
    def Z_step(self):
        self.Z.data = torch.clamp(self.Z.data, min=0)
        
    
    def conv_loss_distance(self):
        self.init_TX()
        
        loss = 0.0
        X_list = [self.X1,self.X2,self.X3,self.X4,self.X5]
        for i in range(len(self.pred_list)): 
            X = X_list[i].view(X_list[i].size(0), -1)
            predictions = self.pred_list[i].view(self.pred_list[i].size(0), -1)
            Y = predictions - X
            loss += Y.pow(2).mean()
            
        return loss
    
        
    def conv_loss_logdet(self):

        loss = 0.0
        for T in [self.T11,self.T21,self.T12,self.T22,self.T13,self.T23,self.T14,self.T24,self.T15,self.T25]:
            T = T.view(T.shape[0],-1)
            U, s, V = torch.svd(T)
            loss += -s.log().sum()
        return loss
        
        
    def conv_loss_frobenius(self):
        loss = 0.0
        for T in [self.T11,self.T21,self.T12,self.T22,self.T13,self.T23,self.T14,self.T24,self.T15,self.T25]:
            loss += T.pow(2).sum()
        return loss
    

    def loss_distance(self,predictions):

        loss = 0.0
        Y = predictions - self.Z
        loss += Y.pow(2).mean()    
        
        return loss
        
    def loss_logdet(self):
        loss = 0.0
        T = torch.stack((self.Tp1,self.Tp2,self.Tp3,self.Tp4,self.Tp5),1)
        T = T.view(T.shape[0],-1)
        U, s, V = torch.svd(T)
        loss = -s.log().sum()
        return loss
        
        
    def loss_frobenius(self):
        loss = 0.0
        t_p = torch.stack((self.Tp1,self.Tp2,self.Tp3,self.Tp4,self.Tp5),1)
        loss = t_p.pow(2).sum()
        return loss


    def computeLoss(self,predictions,mu,eps,lam):
        loss1 = self.conv_loss_distance()
        loss2 = self.conv_loss_frobenius() * eps
        loss3 = self.conv_loss_logdet() * mu
        loss4 = self.loss_distance(predictions)
        loss5 = self.loss_frobenius() * eps
        loss6 = self.loss_logdet() * mu
        loss = loss1 + loss2 + loss3 + loss4 + loss5 + loss6
        return loss

    
    def getTZ(self):
        return self.T.view(self.T.shape[0],-1), self.Z

## Training Related Functions 

In [3]:
def train_model(epoch, model, optimizer, train_loader, batch_size, mu, eps, lam):
    model.train()
    t0 = time.time()
    correct = 0
    total = 0
    final_loss = 0
    i = 0 
    j = 0
    T_list = []
    Z_list = []
    for batch_idx, (X,future_prices) in enumerate(train_loader):
        #print('batch:',batch_idx)
        data,future_prices = map(lambda x: Variable(x), [X,future_prices])
        data_size1 = data.shape[0]
        if j == 0: 
            prev_data = data
            prev_future_prices = future_prices
            j += 1
        if data.shape[0]<batch_size:
            diff = batch_size - data.shape[0]
            temp_data,temp_labels = prev_data[-diff:,:,:], prev_future_prices[-diff:]
            i = 1
            data, temp_future_prices = torch.cat((data,temp_data),0),torch.cat((future_prices,temp_future_prices),0)
            print('appended data')
            
        optimizer.zero_grad()
        

        output = model(data)

        
        final_output = output
        
        loss = model.computeLoss(final_output,mu,eps,lam)
        if epoch%plot_epoch_interval==0:
            train_loss.append(loss)
            epochs_list.append(epoch)
        final_loss += loss
        loss.backward()
        optimizer.step()
        model.X_step()
        model.Z_step()
        prev_data = data
        prev_future_prices = future_prices
    print('Epoch : {} , Training loss : {:.4f}\n'.format(epoch, final_loss.item()))
    return train_loss#, T_list, Z_list

 
    
def train_on_batch(lr,epochs,momentum,X_train,Y_train,X_test,Y_test,batch_size):
    print('seed:',seed)
    cuda = False
    torch.manual_seed(seed)
    np.random.seed(seed)
    train_loader = DataLoader(RegFinancialData(X_train,Y_train),batch_size=batch_size,shuffle=True) 
    test_loader = DataLoader(RegFinancialData(X_test,Y_test),batch_size=batch_size,shuffle=False) 
    
    
    mu = 0.01
    eps = 0.0001
    lam = 0 
    out_planes1 = out_pl1
    out_planes2 = out_pl2
    ksize1 = ks1
    ksize2 = ks2
    maxpool1 = maxpl1
    maxpool2 = maxpl2
    mpl_ksize = mpl_ks
    model = Network(inputs_shape=(batch_size,1,window_size),out_planes1 = out_planes1,out_planes2 = out_planes2,  
                    ksize1 = ksize1,ksize2 = ksize2, maxpool1 = maxpool1, maxpool2 = maxpool2, mpl_ksize=mpl_ksize)
#     for params in model.parameters():
#         print(params)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.999), eps=1e-08, weight_decay=5e-5, 
                                 amsgrad=False)

    for epoch in range(1, epochs + 1):
        train_loss = train_model(epoch, model, optimizer, train_loader, batch_size, mu, eps, lam)

    model.eval()

    S_train = Variable(torch.from_numpy(X_train).float(), requires_grad=False)
    S_test  = Variable(torch.from_numpy(X_test).float(), requires_grad=False)
    Z_train =  model(S_train).cpu().data.numpy()
    Z_test  = model(S_test).cpu().data.numpy()
    print('*'*100)
    print("Shape of Z_train: " + str(Z_train.shape))
    print("Shape of Z_test:  " + str(Z_test.shape))
    print('*'*100)

    return Z_train.transpose(),Z_test.transpose(),train_loss

# Main

In [4]:
window_size = 5
fileName = 'phd_research_data.csv'
data_df = getData(fileName)
if fileName == 'phd_research_data.csv':
    data_df.drop(['Unnamed: 0'],inplace=True,axis=1)
data_df,labels_df = labelData(data_df.copy())
data = np.asarray(data_df)

Unnamed: 0.1,Unnamed: 0,SYMBOL,CLOSE,OPEN_INT,OPEN,HIGH,LOW,CONTRACTS,CHG_IN_OI,DATE
0,0,ABIRLANUVO,1247.8,627750,1253.05,1259.0,1243.0,448,-9750,2014-01-01
1,1,ACC,1119.2,912750,1119.15,1123.85,1109.5,789,-250,2014-01-01
2,2,ADANIENT,271.55,3606000,266.0,272.3,264.35,1248,62000,2014-01-01
3,3,ADANIPORTS,158.05,5568000,157.3,159.35,157.3,585,356000,2014-01-01
4,4,ADANIPOWER,39.85,28656000,39.65,40.0,39.35,719,104000,2014-01-01


In [5]:
stocks_list = getStocksList(data_df)

length :  150


In [6]:
def checkClassImbal(Y_train):
    Ytrain_df= pd.DataFrame(Y_train,columns=[0])
    print(Ytrain_df.shape)
    print(Ytrain_df.columns)
    df = Ytrain_df.groupby(0).size()
    print(df)
    return df

In [7]:
start = 0
end = 150
seed_range = 10

In [8]:
train_loss = []
train_accuracies = []
epochs_list = []
learning_rates = []
epoch_interval = 10
plot_epoch_interval = 5
test_accuracies = []

In [9]:
lr = 0.001
momentum = 0.9
epochs = 100
test_size = 0.2
features_list = ['CLOSE','OPEN','HIGH','LOW','CONTRACTS']

In [12]:
out_pl1 = 4
out_pl2 = 8
maxpl1 = True
maxpl2 = False
ks1 = 5
ks2 = 3
mpl_ks = 2
custom_batch_size_flag = False
bs = 32
if custom_batch_size_flag == True:
    param_path = '_op1_' + str(out_pl1) + '_op2_' + str(out_pl2) \
            +'_mp1_' + str(maxpl1) + '_mp2_' + str(maxpl2) + '_ks1_' + str(ks1) + '_ks2_' + str(ks2) \
            + '_bs_' + str(bs) + '_new'
else:
    param_path = '_op1_' + str(out_pl1) + '_op2_' + str(out_pl2) \
                +'_mp1_' + str(maxpl1) + '_mp2_' + str(maxpl2) + '_ks1_' + str(ks1) + '_ks2_' + str(ks2) + '_new'
print(param_path)

_op1_4_op2_8_mp1_True_mp2_False_ks1_5_ks2_3_new


In [13]:
t_0 = time.time()
tr_loss_dict = {}
#stocks_list = ['ABIRLANUVO','ACC','ADANIENT','ADANIPORTS','ADANIPOWER','AJANTPHARM', 'ALBK']#['ADANIPORTS']
for stock in stocks_list[start:end]:
    t0 = time.time()
    _,windowed_data,_, next_day_values = getWindowedDataReg(data_df,stock,window_size)
    feat_wise_data = getFeatWiseData(windowed_data,features_list)
    feat_wise_data = feat_wise_data[:feat_wise_data.shape[0]-1]
    prev_day_values = getPrevDayFeatures(feat_wise_data)
    next_day_values = next_day_values[:,0]
    next_day_values = next_day_values[0:next_day_values.shape[0]-1]
    X_train,Y_train,X_test,Y_test = splitData(feat_wise_data,next_day_values,test_size=test_size)
    #X_test = X_test[0:X_test.shape[0]-1]
    prev_day_values = prev_day_values[X_train.shape[0]:][:,0]
    #prev_day_values = prev_day_values[0:prev_day_values.shape[0]-1]
    print('prev_day_values.shape:',prev_day_values.shape)
    print('X_test.shape:',X_test.shape)
    print('Y_test.shape:',Y_test.shape)
    print('next_day_values.shape:', next_day_values.shape)
    prev_val_path = base_path + 'data/Reg2/TL_Test/' + stock + param_path +  '_' + str(test_size) + '_tl_yprev_cp.npy'
    np.save(prev_val_path,prev_day_values)
    for sd in range(1,seed_range+1):
        t01 = time.time()
        seed = sd
        print('Training for stock :',stock)
        print('seed : ',seed)
        print('starting at time:',t0)
        print('*'*100)
        if custom_batch_size_flag:
            batch_size = bs
        else:
            batch_size = X_train.shape[0]
        Ztrain, Ztest,train_loss = train_on_batch(lr,epochs,momentum,X_train,Y_train,X_test,Y_test,batch_size)
        tr_loss_dict[stock] = {}
        tr_loss_dict[stock] = train_loss
        train_loss = []
        xtr_path = base_path + 'data/Reg2/TL_Train/' + stock + param_path +'_' + str(test_size) + '_tl_xtrain' + str(seed) + '.npy'
        ytr_path = base_path + 'data/Reg2/TL_Train/' + stock + param_path +  '_' + str(test_size) + '_tl_ytrain' + str(seed) + '.npy'
        xte_path = base_path + 'data/Reg2/TL_Test/' + stock + param_path +  '_' + str(test_size) + '_tl_xtest' + str(seed) + '.npy'
        yte_path = base_path + 'data/Reg2/TL_Test/' + stock + param_path +  '_' + str(test_size) + '_tl_ytest' + str(seed) + '.npy'
        np.save(xtr_path,Ztrain)
        np.save(ytr_path,Y_train)
        np.save(xte_path,Ztest)
        np.save(yte_path,Y_test)
        t11 = time.time()
        print('*'*100)
        #print('*'*100)
        print('\n')
        print('time taken for training one stock:',datetime.timedelta(seconds = t11-t01))
    t1 = time.time()
    print('time taken for one stock through all seeds',datetime.timedelta(seconds = t1-t0))
t_1 = time.time()
print('time taken for 125 stocks through all seeds : ',str(datetime.timedelta(seconds = t_1-t_0)))


prev_day_values.shape: (171,)
X_test.shape: (171, 5, 5)
Y_test.shape: (171,)
next_day_values.shape: (855,)
Training for stock : ABIRLANUVO
seed :  1
starting at time: 1571845163.8744543
****************************************************************************************************
seed: 1
Epoch : 1 , Training loss : 7059385.0000

Epoch : 2 , Training loss : 6717116.0000

Epoch : 3 , Training loss : 6392193.5000

Epoch : 4 , Training loss : 6083825.5000

Epoch : 5 , Training loss : 5791593.0000

Epoch : 6 , Training loss : 5514709.5000

Epoch : 7 , Training loss : 5252405.0000

Epoch : 8 , Training loss : 5004113.5000

Epoch : 9 , Training loss : 4769313.5000

Epoch : 10 , Training loss : 4547394.5000

Epoch : 11 , Training loss : 4337705.5000

Epoch : 12 , Training loss : 4139599.0000

Epoch : 13 , Training loss : 3952617.5000

Epoch : 14 , Training loss : 3776108.7500

Epoch : 15 , Training loss : 3609572.5000

Epoch : 16 , Training loss : 3452510.7500

Epoch : 17 , Training loss

Epoch : 73 , Training loss : 360192.6875

Epoch : 74 , Training loss : 350005.5000

Epoch : 75 , Training loss : 340297.9375

Epoch : 76 , Training loss : 331066.3438

Epoch : 77 , Training loss : 322276.4375

Epoch : 78 , Training loss : 313920.1875

Epoch : 79 , Training loss : 305970.8125

Epoch : 80 , Training loss : 298408.0312

Epoch : 81 , Training loss : 291222.1562

Epoch : 82 , Training loss : 284392.7188

Epoch : 83 , Training loss : 277897.3438

Epoch : 84 , Training loss : 271725.8125

Epoch : 85 , Training loss : 265863.5938

Epoch : 86 , Training loss : 260288.5938

Epoch : 87 , Training loss : 254995.2812

Epoch : 88 , Training loss : 249965.9688

Epoch : 89 , Training loss : 245189.0000

Epoch : 90 , Training loss : 240646.3438

Epoch : 91 , Training loss : 236330.1875

Epoch : 92 , Training loss : 232226.5625

Epoch : 93 , Training loss : 228327.3438

Epoch : 94 , Training loss : 224616.1875

Epoch : 95 , Training loss : 221085.9375

Epoch : 96 , Training loss : 21772

Epoch : 38 , Training loss : 3952888.5000

Epoch : 39 , Training loss : 3813799.5000

Epoch : 40 , Training loss : 3681810.2500

Epoch : 41 , Training loss : 3556666.5000

Epoch : 42 , Training loss : 3437927.0000

Epoch : 43 , Training loss : 3325295.0000

Epoch : 44 , Training loss : 3218431.0000

Epoch : 45 , Training loss : 3117041.2500

Epoch : 46 , Training loss : 3020817.2500

Epoch : 47 , Training loss : 2929478.0000

Epoch : 48 , Training loss : 2842783.2500

Epoch : 49 , Training loss : 2760442.2500

Epoch : 50 , Training loss : 2682200.0000

Epoch : 51 , Training loss : 2607926.5000

Epoch : 52 , Training loss : 2537323.2500

Epoch : 53 , Training loss : 2470187.5000

Epoch : 54 , Training loss : 2406337.2500

Epoch : 55 , Training loss : 2345602.5000

Epoch : 56 , Training loss : 2287761.7500

Epoch : 57 , Training loss : 2232688.0000

Epoch : 58 , Training loss : 2180174.2500

Epoch : 59 , Training loss : 2130139.7500

Epoch : 60 , Training loss : 2082410.6250

Epoch : 61 

Epoch : 6 , Training loss : 8549072.0000

Epoch : 7 , Training loss : 8096836.5000

Epoch : 8 , Training loss : 7667460.0000

Epoch : 9 , Training loss : 7260292.0000

Epoch : 10 , Training loss : 6874511.5000

Epoch : 11 , Training loss : 6509334.0000

Epoch : 12 , Training loss : 6163870.0000

Epoch : 13 , Training loss : 5837289.5000

Epoch : 14 , Training loss : 5528854.5000

Epoch : 15 , Training loss : 5237817.5000

Epoch : 16 , Training loss : 4963263.5000

Epoch : 17 , Training loss : 4704309.5000

Epoch : 18 , Training loss : 4460220.0000

Epoch : 19 , Training loss : 4230236.0000

Epoch : 20 , Training loss : 4013575.0000

Epoch : 21 , Training loss : 3809524.2500

Epoch : 22 , Training loss : 3617468.2500

Epoch : 23 , Training loss : 3436830.2500

Epoch : 24 , Training loss : 3266817.0000

Epoch : 25 , Training loss : 3106795.0000

Epoch : 26 , Training loss : 2956083.5000

Epoch : 27 , Training loss : 2814185.7500

Epoch : 28 , Training loss : 2680577.5000

Epoch : 29 , Tr

Epoch : 87 , Training loss : 736678.6250

Epoch : 88 , Training loss : 726703.8125

Epoch : 89 , Training loss : 716930.8125

Epoch : 90 , Training loss : 707387.2500

Epoch : 91 , Training loss : 698014.9375

Epoch : 92 , Training loss : 688840.8750

Epoch : 93 , Training loss : 679850.0000

Epoch : 94 , Training loss : 671024.0625

Epoch : 95 , Training loss : 662385.5000

Epoch : 96 , Training loss : 653906.4375

Epoch : 97 , Training loss : 645592.3750

Epoch : 98 , Training loss : 637444.1250

Epoch : 99 , Training loss : 629437.1875

Epoch : 100 , Training loss : 621550.8750

****************************************************************************************************
Shape of Z_train: (40, 684)
Shape of Z_test:  (40, 171)
****************************************************************************************************
****************************************************************************************************


time taken for training one stock: 0:00:05.723663


Epoch : 54 , Training loss : 1114724.0000

Epoch : 55 , Training loss : 1085255.2500

Epoch : 56 , Training loss : 1057253.0000

Epoch : 57 , Training loss : 1030616.0000

Epoch : 58 , Training loss : 1005287.7500

Epoch : 59 , Training loss : 981155.5000

Epoch : 60 , Training loss : 958179.3125

Epoch : 61 , Training loss : 936282.3750

Epoch : 62 , Training loss : 915395.0000

Epoch : 63 , Training loss : 895455.0000

Epoch : 64 , Training loss : 876441.6250

Epoch : 65 , Training loss : 858256.8750

Epoch : 66 , Training loss : 840891.0625

Epoch : 67 , Training loss : 824273.8750

Epoch : 68 , Training loss : 808377.1875

Epoch : 69 , Training loss : 793145.1250

Epoch : 70 , Training loss : 778547.1250

Epoch : 71 , Training loss : 764558.9375

Epoch : 72 , Training loss : 751139.9375

Epoch : 73 , Training loss : 738251.1250

Epoch : 74 , Training loss : 725861.0625

Epoch : 75 , Training loss : 713961.8750

Epoch : 76 , Training loss : 702525.6250

Epoch : 77 , Training loss : 

KeyboardInterrupt: 

# External Regressor + Thresholding

In [14]:
def ridge_regressor(Xtrain, Ytrain, Xtest, Ytest, alpha = 1.0, random_state = 1):
    clf = Ridge(alpha=alpha,random_state = random_state)
    clf.fit(Xtrain, Ytrain) 
    y_pred = clf.predict(Xtest)
    mae = mean_absolute_error(Ytest, y_pred)
    mse = mean_squared_error(Ytest, y_pred)
    rmse = math.sqrt(mse)
    return y_pred, mae, mse,rmse

In [15]:
res_file_name = base_path+'Results2/Reg2/res_' + param_path + 'final.csv'
pred_file_name = base_path+'Results2/Reg2/res_' + param_path + '_pred_global_final.csv'
if os.path.exists(res_file_name):
    os.remove(res_file_name)
if os.path.exists(pred_file_name):
    os.remove(pred_file_name)

In [17]:
gap = 5.0
log_interval = 1
cnt = 0 
alpha = 0.1
random_state = 1
test_acc_dict  = {}
final_results_df = pd.DataFrame()
t0 = time.time()
for stock in stocks_list[start:end]:
    t01 = time.time()
    temp_dict = {}
    test_acc_dict[stock] = {}
    best_ar = 0.0
    best_mae = 100000.0
    best_f1_score = 0.0
    mae, mse, rmse = 0.0, 0.0, 0.0
    found = 0
    ytr_pred, yte_pred, tr_scores, te_score = [],[],[],[]
    AR = 0
    temp_ytr_pred, temp_yte_pred, temp_tr_scores, temp_te_scores = [],[],[],[]
    temp_AR = 0
    prev_val_path = base_path + 'data/Reg2/TL_Test/' + stock + param_path +  '_' + str(test_size) +  '_tl_yprev_cp.npy'
    prev_day_value = np.load(prev_val_path)
    for sd in range(1,seed_range+1):
        seed = sd
        print('stock : ', stock)
        xtr_path = base_path + 'data/Reg2/TL_Train/' + stock + param_path +'_' + str(test_size) + '_tl_xtrain' + str(seed) + '.npy'
        ytr_path = base_path + 'data/Reg2/TL_Train/' + stock + param_path +  '_' + str(test_size) + '_tl_ytrain' + str(seed) + '.npy'
        xte_path = base_path + 'data/Reg2/TL_Test/' + stock + param_path +  '_' + str(test_size) + '_tl_xtest' + str(seed) + '.npy'
        yte_path = base_path + 'data/Reg2/TL_Test/' + stock + param_path +  '_' + str(test_size) + '_tl_ytest' + str(seed) + '.npy'
        Ztrain = np.load(xtr_path)
        Y_train = np.load(ytr_path)
        Ztest = np.load(xte_path)
        Y_test = np.load(yte_path)
        print('seed : {: }'.format(seed))
        yte_pred, mae, mse, rmse = ridge_regressor(Ztrain, Y_train, Ztest, Y_test, alpha = alpha, random_state = random_state)
        pred_labels = np.where((yte_pred - prev_day_value)>0,1,0)
        true_labels = np.where((Y_test - prev_day_value)>0,1,0)
        te_acc = round(accuracy_score(true_labels, pred_labels)*100,3)
        precision, recall, f1_score,_ = precision_recall_fscore_support(true_labels, pred_labels, pos_label=1, average='binary')
        limit = Ztrain.shape[0]
        #print(pred_labels)
        mae2 = np.sum(np.abs(yte_pred-Y_test))/np.sum(Y_test)
        print('mae2 :', mae2)
        print('precision :{:.4f}, recall:{:.4f}, f1_score : {:.4f}'.format(precision, recall, f1_score))
        if  f1_score > best_f1_score:
            best_te_acc = te_acc
            best_mae = mae
            found = 1
            best_f1_score = f1_score
            test_acc_dict[stock]['seed'] = seed
            test_acc_dict[stock]['mae'] = mae
            test_acc_dict[stock]['mae2'] = mae2
            test_acc_dict[stock]['mse'] = mse
            test_acc_dict[stock]['rmse'] = rmse
            test_acc_dict[stock]['Test Accuracy'] = te_acc
            AR = compAnnualReturns(stock,pred_labels,data_df,window_size,limit)
            test_acc_dict[stock]['AR'] = AR
            test_acc_dict[stock]['Precision'] = precision
            test_acc_dict[stock]['Recall'] = recall
            test_acc_dict[stock]['F1_score'] = f1_score
            temp_dict['pred_cp'] = yte_pred
            temp_dict['Stock_Class'] = true_labels
            temp_dict['ypred'] = pred_labels
            temp_dict['found'] = found
        
    temp_final_df = pd.DataFrame(Y_test,columns=['actual_close_price'])
    temp_final_df['predicted_close_price'] = temp_dict['pred_cp']
    temp_final_df['prev_day_close_price'] = prev_day_value
    temp_final_df['Stock_Class'] = temp_dict['Stock_Class']
    temp_final_df['ypred'] = temp_dict['ypred']
    temp_final_df['difference'] = temp_final_df['predicted_close_price'] - temp_final_df['actual_close_price']
    temp_final_df['abs_difference'] = temp_final_df['difference'].abs()
    temp_final_df['SYMBOL'] = stock
    temp_final_df['found'] = found
    final_results_df = pd.concat([final_results_df,temp_final_df],axis = 0)
    cnt += 1
    if cnt%log_interval==0:
        df = pd.DataFrame.from_dict(data = test_acc_dict, orient = 'index').reset_index()
        print('test_acc_dict data:')
        print(df.head(2))
        if not os.path.exists(res_file_name):
            df.to_csv(res_file_name,index=None, header='column_names')
        else: # else it exists so append without writing the header
            df.to_csv(res_file_name, mode='a',index=None, header=False)
        if not os.path.exists(pred_file_name):
            final_results_df.to_csv(pred_file_name,index=None, header='column_names')
        else: # else it exists so append without writing the header
            final_results_df.to_csv(pred_file_name, mode='a',index=None, header=False)
        print('final_results_df data:')
        print(final_results_df.head(2))
        test_acc_dict = {}
        final_results_df = pd.DataFrame()
    t11 = time.time()
    print('time taken for one stock with ridge: ' ,datetime.timedelta(seconds = t11 - t01))
    print('*'*100)
t1 = time.time()
print('time taken for all stocks with ridge: ' ,datetime.timedelta(seconds = t1 - t0))


stock :  ABIRLANUVO
seed :  1
mae2 : 0.016774158058202034
precision :0.5446, recall:0.6932, f1_score : 0.6100


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X.sort_values(by = ['DATE'],inplace=True)


days: 251
stock :  ABIRLANUVO
seed :  2
mae2 : 0.022962603931086855
precision :0.5474, recall:0.5909, f1_score : 0.5683
stock :  ABIRLANUVO
seed :  3
mae2 : 0.020643607185934054
precision :0.5242, recall:0.7386, f1_score : 0.6132


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X.sort_values(by = ['DATE'],inplace=True)


days: 251
stock :  ABIRLANUVO
seed :  4
mae2 : 0.016827133810959165
precision :0.5422, recall:0.5114, f1_score : 0.5263
stock :  ABIRLANUVO
seed :  5
mae2 : 0.01581606129574119
precision :0.4778, recall:0.4886, f1_score : 0.4831
stock :  ABIRLANUVO
seed :  6
mae2 : 0.021776766351177333
precision :0.5288, recall:0.6250, f1_score : 0.5729
stock :  ABIRLANUVO
seed :  7
mae2 : 0.018084398954094198
precision :0.5534, recall:0.6477, f1_score : 0.5969
stock :  ABIRLANUVO
seed :  8
mae2 : 0.022936751294701005
precision :0.5514, recall:0.6705, f1_score : 0.6051
stock :  ABIRLANUVO
seed :  9
mae2 : 0.016069937478851104
precision :0.5517, recall:0.5455, f1_score : 0.5486
stock :  ABIRLANUVO
seed :  10
mae2 : 0.01917293870642169
precision :0.4958, recall:0.6705, f1_score : 0.5700
test_acc_dict data:
        index  seed        mae      mae2          mse       rmse  \
0  ABIRLANUVO     3  30.262568  0.020644  1448.177533  38.054928   

   Test Accuracy     AR  Precision    Recall  F1_score  
0      

FileNotFoundError: [Errno 2] No such file or directory: '../Results2/Reg2/res__op1_4_op2_8_mp1_True_mp2_False_ks1_5_ks2_3_newfinal.csv'

# External Classifier

In [18]:
rf_res_file_name = base_path+'Results2/Reg2/Classification/convTL_TL_classification_2layers_results.csv'
rf_pred_file_name = base_path+'Results2/Reg2/Classification/convTL_TL_classification_2layers_pred.csv'
if os.path.exists(rf_res_file_name):
     os.remove(rf_res_file_name)
if os.path.exists(rf_pred_file_name):
     os.remove(rf_pred_file_name)

In [19]:
def clfRF(Ztrain,Y_train,Ztest,Y_test,n_clf=5,depth=1,rnd_state=11):
    clf_rf = RandomForestClassifier(n_estimators=n_clf, max_depth=depth,random_state=rnd_state)
    clf_rf.fit(Ztrain, Y_train)
    ytr_rf_pred = clf_rf.predict(Ztrain)
    yte_rf_pred = clf_rf.predict(Ztest)
    tr_scores = clf_rf.predict_proba(Ztrain)
    te_scores = clf_rf.predict_proba(Ztest)
    return ytr_rf_pred, yte_rf_pred, tr_scores, te_scores

In [22]:
gap = 5.0
log_interval = 1
tr_acc_min = 54
cnt = 0 
pos_label = 1
depth = 4
num_clfs = 5
rf_test_acc_dict  = {}
final_results_df = pd.DataFrame()
t0 = time.time()
for stock in stocks_list[start:end]:
    t01 = time.time()
    temp_dict = {}
    rf_test_acc_dict[stock] = {}
    best_tr_acc = 0.0
    best_te_acc = 0.0
    best_f1_score = 0.0
    f1_score = 0.0
    best_ar = 0.0
    found = 0
    ytr_pred, yte_pred, tr_scores, te_score = [],[],[],[]
    AR = 0
    temp_ytr_pred, temp_yte_pred, temp_tr_scores, temp_te_scores = [],[],[],[]
    temp_f1_score = 0
    temp_auc = 0
    temp_AR= 0
    _,windowed_data,_, _ = getWindowedDataReg(data_df,stock,window_size)
    feat_wise_data = getFeatWiseData(windowed_data,features_list)
    prev_day_values = getPrevDayFeatures(feat_wise_data)
    prev_day_values = prev_day_values[:,0]
    for sd in range(1,seed_range+1):
        seed = sd
        print('stock : ', stock)
        xtr_path = base_path + 'data/Reg2/TL_Train/' + stock + param_path +'_' + str(test_size) + '_tl_xtrain' + str(seed) + '.npy'
        ytr_path = base_path + 'data/Reg2/TL_Train/' + stock + param_path +  '_' + str(test_size) + '_tl_ytrain' + str(seed) + '.npy'
        xte_path = base_path + 'data/Reg2/TL_Test/' + stock + param_path +  '_' + str(test_size) + '_tl_xtest' + str(seed) + '.npy'
        yte_path = base_path + 'data/Reg2/TL_Test/' + stock + param_path +  '_' + str(test_size) + '_tl_ytest' + str(seed) + '.npy'
        Ztrain = np.load(xtr_path)
        Y_train = np.load(ytr_path)
        Ztest = np.load(xte_path)
        Y_test = np.load(yte_path)
        ytr_prev_day = prev_day_values[:Y_train.shape[0]]
        yte_prev_day  = prev_day_values[Y_train.shape[0]:]
        yte_prev_day  = yte_prev_day[:yte_prev_day.shape[0]-1]

        for random_state in range(1,21):
            print('seed : {: }, num_clfs : {: }, depth : {: }, random_state : {: }'.format(seed,num_clfs,depth,random_state))
            Y_train_true_labels = np.where((Y_train - ytr_prev_day)>0,1,0)
            Y_test_true_labels = np.where((Y_test - yte_prev_day)>0,1,0)
            ytr_pred, yte_pred, tr_scores, te_scores = clfRF(Ztrain,Y_train_true_labels, Ztest, Y_test_true_labels, n_clf=num_clfs,
                                                                        depth=depth, rnd_state=random_state)
            limit = Ztrain.shape[0]
            precision, recall, f1_score,_ = precision_recall_fscore_support(Y_test_true_labels, yte_pred, pos_label=1, average='binary')
            
            print('F1_score : ',f1_score)

            if f1_score > best_f1_score:
                found = 1
                AR = compAnnualReturns(stock,yte_pred,data_df,window_size,limit)
                print('AR: ',AR)
                fpr, tpr, thresholds = roc_curve(Y_test_true_labels, te_scores[:,pos_label], pos_label = pos_label)
                AUC_val = auc(fpr, tpr)
                best_ar = AR
                best_te_acc = te_acc
                best_f1_score = f1_score
                rf_test_acc_dict[stock]['seed'] = seed
                rf_test_acc_dict[stock]['depth'] = depth
                rf_test_acc_dict[stock]['num_clfs'] = num_clfs
                rf_test_acc_dict[stock]['random_state'] = random_state
                rf_test_acc_dict[stock]['F1_score'] = round(f1_score,3)
                rf_test_acc_dict[stock]['Precision'] = round(precision,3)
                rf_test_acc_dict[stock]['Recall'] = round(recall,3)
                rf_test_acc_dict[stock]['AUC'] = round(AUC_val,3)
                rf_test_acc_dict[stock]['AR'] = best_ar
                rf_test_acc_dict[stock]['found'] = found
                temp_dict['yte_pred'] = yte_pred
                temp_dict['te_scores'] = te_scores
            
    temp_final_df = pd.DataFrame(Y_test,columns=['ytrue'])
    temp_final_df['ypred'] = temp_dict['yte_pred']
    temp_scores_df = pd.DataFrame(temp_dict['te_scores']) 
    temp_final_df = pd.concat([temp_final_df,temp_scores_df],axis = 1)
    temp_final_df['SYMBOL'] = stock
    final_results_df = pd.concat([final_results_df,temp_final_df],axis = 0)
    cnt += 1
    if cnt%log_interval==0:
        df = pd.DataFrame.from_dict(data = rf_test_acc_dict, orient = 'index').reset_index()
        print('rf_test_acc_dict data:')
        print(df.head(2))
        if not os.path.exists(rf_res_file_name):
            df.to_csv(rf_res_file_name,index=None, header='column_names')
        else: # else it exists so append without writing the header
            df.to_csv(rf_res_file_name, mode='a',index=None, header=False)
        if not os.path.exists(rf_pred_file_name):
            final_results_df.to_csv(rf_pred_file_name,index=None, header='column_names')
        else: # else it exists so append without writing the header
            final_results_df.to_csv(rf_pred_file_name, mode='a',index=None, header=False)
        print('final_results_df data:')
        print(final_results_df.head(2))
        rf_test_acc_dict = {}
        final_results_df = pd.DataFrame()
    t11 = time.time()
    print('time taken for one stock grid-search tuning with RF: ' ,datetime.timedelta(seconds = t11 - t01))
    print('*'*100)
t1 = time.time()
print('time taken for all stocks grid-search tuning with RF: ' ,datetime.timedelta(seconds = t1 - t0))

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X.sort_values(by = ['DATE'],inplace=True)


stock :  ABIRLANUVO
seed :  1, num_clfs :  5, depth :  4, random_state :  1
F1_score :  0.6600985221674877


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X.sort_values(by = ['DATE'],inplace=True)


days: 251
AR:  -0.41
seed :  1, num_clfs :  5, depth :  4, random_state :  2
F1_score :  0.5759162303664922
seed :  1, num_clfs :  5, depth :  4, random_state :  3
F1_score :  0.5728155339805825
seed :  1, num_clfs :  5, depth :  4, random_state :  4
F1_score :  0.5656565656565656
seed :  1, num_clfs :  5, depth :  4, random_state :  5
F1_score :  0.5376344086021506
seed :  1, num_clfs :  5, depth :  4, random_state :  6
F1_score :  0.576923076923077
seed :  1, num_clfs :  5, depth :  4, random_state :  7
F1_score :  0.5700483091787439
seed :  1, num_clfs :  5, depth :  4, random_state :  8
F1_score :  0.5520833333333334
seed :  1, num_clfs :  5, depth :  4, random_state :  9
F1_score :  0.6339285714285714
seed :  1, num_clfs :  5, depth :  4, random_state :  10
F1_score :  0.5846153846153845
seed :  1, num_clfs :  5, depth :  4, random_state :  11
F1_score :  0.5560975609756097
seed :  1, num_clfs :  5, depth :  4, random_state :  12
F1_score :  0.582010582010582
seed :  1, num_clfs :

F1_score :  0.5714285714285715
seed :  5, num_clfs :  5, depth :  4, random_state :  19
F1_score :  0.5858585858585857
seed :  5, num_clfs :  5, depth :  4, random_state :  20
F1_score :  0.6226415094339622
stock :  ABIRLANUVO
seed :  6, num_clfs :  5, depth :  4, random_state :  1
F1_score :  0.5572139303482587
seed :  6, num_clfs :  5, depth :  4, random_state :  2
F1_score :  0.4971751412429378
seed :  6, num_clfs :  5, depth :  4, random_state :  3
F1_score :  0.62882096069869
seed :  6, num_clfs :  5, depth :  4, random_state :  4
F1_score :  0.544502617801047
seed :  6, num_clfs :  5, depth :  4, random_state :  5
F1_score :  0.6124401913875598
seed :  6, num_clfs :  5, depth :  4, random_state :  6
F1_score :  0.5578947368421052
seed :  6, num_clfs :  5, depth :  4, random_state :  7
F1_score :  0.6146788990825689
seed :  6, num_clfs :  5, depth :  4, random_state :  8
F1_score :  0.5803108808290155
seed :  6, num_clfs :  5, depth :  4, random_state :  9
F1_score :  0.5727699530

F1_score :  0.6153846153846153
seed :  10, num_clfs :  5, depth :  4, random_state :  16
F1_score :  0.5154639175257733
seed :  10, num_clfs :  5, depth :  4, random_state :  17
F1_score :  0.6325581395348837
seed :  10, num_clfs :  5, depth :  4, random_state :  18
F1_score :  0.5631067961165048
seed :  10, num_clfs :  5, depth :  4, random_state :  19
F1_score :  0.5876288659793815
seed :  10, num_clfs :  5, depth :  4, random_state :  20
F1_score :  0.5933014354066986
rf_test_acc_dict data:
        index  seed  depth  num_clfs  random_state  F1_score  Precision  \
0  ABIRLANUVO     1      4         5             1  0.660099      0.583   

   Recall    AUC    AR  found  
0   0.761  0.572 -0.41      1  
final_results_df data:
     ytrue  ypred         0         1      SYMBOL
0  1390.35      0  0.524921  0.475079  ABIRLANUVO
1  1387.30      0  0.514610  0.485390  ABIRLANUVO
time taken for one stock grid-search tuning with RF:  0:00:06.342249
********************************************

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X.sort_values(by = ['DATE'],inplace=True)


stock :  ACC


FileNotFoundError: [Errno 2] No such file or directory: '../data/Reg2/TL_Train/ACC_op1_4_op2_8_mp1_True_mp2_False_ks1_5_ks2_3_new_0.2_tl_xtrain1.npy'

# Predict other Regression Values

In [15]:
alpha = 0.1
random_state = 1
test_pred_dict = {}
test_measures_dict = {}
cnt = 0
t0 = time.time()
for stock in stocks_list[start:end]: 
    t01 = time.time()
    print('cnt:',cnt)
    print('stock:',stock)
    test_pred_dict[stock] = {}
    test_measures_dict[stock] = {}
    _, _, _, next_day_values = getWindowedDataReg(data_df,stock,window_size)
    next_day_values = next_day_values[0:next_day_values.shape[0]-1]
    seed = int(layers2_res_df.loc[layers2_res_df['index']==stock]['seed'].values.tolist()[0])
    print('seed : ',seed)
    xtr_path = base_path + 'data/Reg2/TL_Train/' + stock + param_path +'_' + str(test_size) + '_tl_xtrain' + str(seed) + '.npy'
    xte_path = base_path + 'data/Reg2/TL_Test/' + stock + param_path +  '_' + str(test_size) + '_tl_xtest' + str(seed) + '.npy'
    Ztrain = np.load(xtr_path)
    Ztest = np.load(xte_path)
    #for i in range(1,5):
    limit = Ztrain.shape[0]
    next_open_prices, next_high_prices, next_low_prices, next_day_volume = next_day_values[:,1],next_day_values[:,2],\
                                                                        next_day_values[:,3], next_day_values[:,4]
    Y_train_op = next_open_prices[0:limit] 
    Y_test_op = next_open_prices[limit:]
    y_pred_op, mae_op, mse_op, rmse_op = ridge_regressor(Ztrain, Y_train_op, Ztest, Y_test_op, alpha = alpha, random_state = random_state)
    mae2_op = np.sum(np.abs(y_pred_op - Y_test_op))/np.sum(Y_test_op)
    test_pred_dict[stock]['True_OP'] = Y_test_op
    test_pred_dict[stock]['Predicted_OP'] = y_pred_op
    test_measures_dict[stock]['MAE_OP'] = mae_op
    test_measures_dict[stock]['MAE2_OP'] = mae2_op
    test_measures_dict[stock]['MSE_OP'] = mse_op
    test_measures_dict[stock]['RMSE_OP'] = rmse_op
    
    
    Y_train_h = next_high_prices[0:limit] 
    Y_test_h = next_high_prices[limit:]
    y_pred_h, mae_h, mse_h, rmse_h = ridge_regressor(Ztrain, Y_train_h, Ztest, Y_test_h, alpha = alpha, random_state = random_state)
    mae2_h = np.sum(np.abs(y_pred_h - Y_test_h))/np.sum(Y_test_h)
    test_pred_dict[stock]['True_HP'] = Y_test_h
    test_pred_dict[stock]['Predicted_HP'] = y_pred_h
    test_measures_dict[stock]['MAE_HP'] = mae_h
    test_measures_dict[stock]['MAE2_HP'] = mae2_h
    test_measures_dict[stock]['MSE_HP'] = mse_h
    test_measures_dict[stock]['RMSE_HP'] = rmse_h
    
    Y_train_l = next_low_prices[0:limit] 
    Y_test_l = next_low_prices[limit:]
    y_pred_l, mae_l, mse_l, rmse_l = ridge_regressor(Ztrain, Y_train_l, Ztest, Y_test_l, alpha = alpha, random_state = random_state)
    mae2_l = np.sum(np.abs(y_pred_l - Y_test_l))/np.sum(Y_test_l)
    test_pred_dict[stock]['True_LP'] = Y_test_l
    test_pred_dict[stock]['Predicted_LP'] = y_pred_l
    test_measures_dict[stock]['MAE_LP'] = mae_l
    test_measures_dict[stock]['MAE2_LP'] = mae2_l
    test_measures_dict[stock]['MSE_LP'] = mse_l
    test_measures_dict[stock]['RMSE_LP'] = rmse_l
    
    Y_train_vol = next_day_volume[0:limit] 
    Y_test_vol = next_day_volume[limit:]
    y_pred_vol, mae_vol, mse_vol, rmse_vol = ridge_regressor(Ztrain, Y_train_vol, Ztest, Y_test_vol, alpha = alpha, random_state = random_state)
    mae2_vol = np.sum(np.abs(y_pred_vol - Y_test_vol))/np.sum(Y_test_vol)
    test_pred_dict[stock]['True_Vol'] = Y_test_vol
    test_pred_dict[stock]['Predicted_Vol'] = y_pred_vol
    test_measures_dict[stock]['MAE_Vol'] = mae_vol
    test_measures_dict[stock]['MAE2_Vol'] = mae2_vol
    test_measures_dict[stock]['MSE_Vol'] = mse_vol
    test_measures_dict[stock]['RMSE_Vol'] = rmse_vol
    
    t11 = time.time()
    print('time taken for one stock with ridge: ' ,datetime.timedelta(seconds = t11 - t01))
    print('*'*100)
t1 = time.time()
print('time taken for all stocks with ridge: ' ,datetime.timedelta(seconds = t1 - t0))

cnt: 0
stock: ABIRLANUVO


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  X.sort_values(by = ['DATE'],inplace=True)


seed :  3
time taken for one stock with ridge:  0:00:00.787045
****************************************************************************************************
cnt: 0
stock: ACC
seed :  1
time taken for one stock with ridge:  0:00:01.047060
****************************************************************************************************
cnt: 0
stock: ADANIENT
seed :  5
time taken for one stock with ridge:  0:00:01.007057
****************************************************************************************************
cnt: 0
stock: ADANIPORTS
seed :  6
time taken for one stock with ridge:  0:00:00.988056
****************************************************************************************************
cnt: 0
stock: ADANIPOWER
seed :  10
time taken for one stock with ridge:  0:00:00.996057
****************************************************************************************************
cnt: 0
stock: AJANTPHARM
seed :  2
time taken for one stock with ridge:  0:00:00.730042


seed :  3
time taken for one stock with ridge:  0:00:01.040059
****************************************************************************************************
cnt: 0
stock: EICHERMOT
seed :  8
time taken for one stock with ridge:  0:00:00.912052
****************************************************************************************************
cnt: 0
stock: ENGINERSIN
seed :  1
time taken for one stock with ridge:  0:00:00.855049
****************************************************************************************************
cnt: 0
stock: EXIDEIND
seed :  1
time taken for one stock with ridge:  0:00:01.042059
****************************************************************************************************
cnt: 0
stock: FEDERALBNK
seed :  1
time taken for one stock with ridge:  0:00:01.067061
****************************************************************************************************
cnt: 0
stock: GAIL
seed :  7
time taken for one stock with ridge:  0:00:01.062060
*

seed :  6
time taken for one stock with ridge:  0:00:01.056060
****************************************************************************************************
cnt: 0
stock: L&TFH
seed :  10
time taken for one stock with ridge:  0:00:00.995057
****************************************************************************************************
cnt: 0
stock: LICHSGFIN
seed :  6
time taken for one stock with ridge:  0:00:01.062061
****************************************************************************************************
cnt: 0
stock: LT
seed :  4
time taken for one stock with ridge:  0:00:01.034059
****************************************************************************************************
cnt: 0
stock: LUPIN
seed :  9
time taken for one stock with ridge:  0:00:01.054060
****************************************************************************************************
cnt: 0
stock: M&M
seed :  7
time taken for one stock with ridge:  0:00:01.064061
*****************

seed :  6
time taken for one stock with ridge:  0:00:00.995057
****************************************************************************************************
cnt: 0
stock: TATAPOWER
seed :  6
time taken for one stock with ridge:  0:00:00.977056
****************************************************************************************************
cnt: 0
stock: TATASTEEL
seed :  3
time taken for one stock with ridge:  0:00:00.969055
****************************************************************************************************
cnt: 0
stock: TCS
seed :  1
time taken for one stock with ridge:  0:00:00.987056
****************************************************************************************************
cnt: 0
stock: TECHM
seed :  6
time taken for one stock with ridge:  0:00:01.011058
****************************************************************************************************
cnt: 0
stock: TITAN
seed :  1
time taken for one stock with ridge:  0:00:00.990057
***********

In [16]:
res_file_name = base_path+'Results2/Reg2/res_reg2_' + param_path + 'final.csv'
pred_file_name = base_path+'Results2/Reg2/res_reg2_' + param_path + '_pred_global_final.csv'
if os.path.exists(res_file_name):
    os.remove(res_file_name)
if os.path.exists(pred_file_name):
    os.remove(pred_file_name)

In [17]:
measures_df = pd.DataFrame.from_dict(data = test_measures_dict, orient = 'index').reset_index()
test_pred_df = pd.DataFrame.from_dict(data = test_pred_dict, orient = 'index').reset_index()

In [18]:
measures_df.to_csv(res_file_name,index=None, header='column_names')
test_pred_df.to_csv(pred_file_name,index=None, header='column_names')