In [3]:

import torch 
import torch.nn as nn
import numpy as np
import pandas as pd 
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
  
raw_data = pd.read_csv('data/targetfirm_prediction_dataset_small.csv')
raw_data = raw_data.fillna(0)
data = np.array(raw_data.values)
data = data[:,1:]

labels = data[:,2]
target_indices  = np.nonzero(labels)
target_indices = target_indices[0]

data_tensor = torch.FloatTensor(data)

def prepare_data_and_split(source_data, window_size, target_indices):
    returndata = [ ]
    for i in target_indices:
        curr_year = i
        prev_year = curr_year - 1
        count = 0 
        while(source_data[:,1][curr_year] > source_data[:,1][prev_year] and count < window_size):
            curr_year-=1
            prev_year = curr_year - 1
            count+=1
        if(curr_year == i):
            continue
        returndata.append((source_data[curr_year:i,3:17], source_data[i,2]))
    test_size = int(np.round(0.3 * len(returndata)))
    train = returndata[:-test_size]
    test = returndata[-test_size:]
    return train, test 

year_window = 5 
train, test = prepare_data_and_split(data_tensor,year_window,target_indices)

input_size = 14
hidden_size = 100 
num_layers = 2 
output_size = 1 
num_epochs = 100 

class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size 
        self.input_size = input_size
        self.num_layers = num_layers
    
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers)
        self.fc = nn.Linear(hidden_size, output_size)
        self.h_cell = (torch.zeros(self.num_layers,1, self.hidden_size),
                       torch.zeros(self.num_layers,1, self.hidden_size))
        
    def forward(self,x): 
        out, self.h_cell = self.lstm(x.view(len(x),1,-1),self.h_cell)
        output = self.fc(out.view(len(x),-1))
        return output[-1]
       
class GRU(nn.Module):
    def __init__(self, input_size, hidden_size ,num_layers, output_size):
        super(GRU, self).__init__()
        self.hidden_size = hidden_size 
        self.input_size = input_size
        self.num_layers = num_layers
        
        self.gru = nn.GRU(input_size, hidden_size, num_layers)
        self.fc = nn.Linear(hidden_size, output_size) 
        self.h_cell = torch.zeros(self.num_layers,1, self.hidden_size)
        
    def forward(self, x):
        out, self.h_cell = self.gru(x.view(len(x),1,-1),self.h_cell)
        output = self.fc(out.view(len(x),-1))
        return output[-1]
    
def train_model(model, model_name, train_data,  num_epochs, print_every = 1000, learning_rate = 0.05):
    model.train()
    print("Training " + model_name + f" model with {num_epochs} epochs:")
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), learning_rate)
    for i in range(num_epochs):
        for training_data, y_lstm_targets in train_data:
            optimizer.zero_grad()
            if (model_name == "LSTM"):
                model.h_cell = (torch.zeros(model.num_layers,1, model.hidden_size),
                                torch.zeros(model.num_layers,1, model.hidden_size))
            else:
                model.h_cell = torch.zeros(model.num_layers,1, model.hidden_size)
            y_pred = model(training_data)
            loss = criterion(y_pred, y_lstm_targets)
            loss.backward()
            optimizer.step()
            
        if(i % print_every == 0 and i > 0):
            print(f"Epoch:{i} loss: {loss.item():10.8f}")
        print(f"Epoch {i} loss : {loss.item():10.10f}")

lstm_model = LSTM(input_size = input_size, hidden_size = hidden_size, num_layers = num_layers, output_size = output_size)
train_model(lstm_model, "LSTM", train, num_epochs)

gru_model = GRU(input_size = input_size, hidden_size = hidden_size, num_layers = num_layers, output_size = output_size)
train_model(gru_model, "GRU", train, num_epochs)

Training LSTM model with 100 epochs:


  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 0 loss : 0.0000018946
Epoch 1 loss : 0.0000574173
Epoch 2 loss : 0.0003795095
Epoch 3 loss : 0.0000005094
Epoch 4 loss : 0.0000000295
Epoch 5 loss : 0.0004931306
Epoch 6 loss : 0.0001669609
Epoch 7 loss : 0.0000797699
Epoch 8 loss : 0.0003386066
Epoch 9 loss : 0.0003888956
Epoch 10 loss : 0.0004903444
Epoch 11 loss : 0.0004585612
Epoch 12 loss : 0.0000634198
Epoch 13 loss : 0.0012398710
Epoch 14 loss : 0.0009581835
Epoch 15 loss : 0.0013662095
Epoch 16 loss : 0.0013200453
Epoch 17 loss : 0.0013402490
Epoch 18 loss : 0.0013338849
Epoch 19 loss : 0.0013293781
Epoch 20 loss : 0.0013273968
Epoch 21 loss : 0.0013283091
Epoch 22 loss : 0.0013288131
Epoch 23 loss : 0.0013283351
Epoch 24 loss : 0.0013279659
Epoch 25 loss : 0.0013280050
Epoch 26 loss : 0.0000211243
Epoch 27 loss : 0.0000006862
Epoch 28 loss : 0.0002457601
Epoch 29 loss : 0.2156550139
Epoch 30 loss : 0.0000014737
Epoch 31 loss : 0.0006424822
Epoch 32 loss : 0.0016129023
Epoch 33 loss : 0.0013670733
Epoch 34 loss : 0.001434