# Training the Model

In this notebook, we build a simple Neural Network and train it on the data generated by `data-integration.ipynb`.

We then save it as `bang_model`. The saved model can then be copied into the deployment folder so the deployment script can use the generated model to make predictions accordingly.

In [None]:
import torch
import torch.nn as nn
import torch.utils.data as data_utils
import pandas as pd
import numpy as np
import os

In [None]:
df = pd.read_csv('bb_final.csv')
df = df.append(pd.read_csv('amc_final.csv'), ignore_index=True)
df = df.append(pd.read_csv('nok_final.csv'), ignore_index=True)
df = df.append(pd.read_csv('gme_final.csv'), ignore_index=True)

In [None]:
df

Unnamed: 0.1,Unnamed: 0,Date,accumulated_n,accumulated_s,accumulated_r,accumulated_sentiment,positive_count,negative_count,neutral_count,price_change
0,0,2021-01-04,9.000000,1.000000,0.600000,2.000000,1,0,0,-0.017910
1,1,2021-01-05,126.000000,1449.500000,0.940000,2.000000,2,0,0,0.022659
2,2,2021-01-06,30.000000,1.500000,0.520000,1.500000,2,0,0,0.000000
3,3,2021-01-07,142.666667,231.333333,0.826667,2.000000,3,0,0,0.045926
4,4,2021-01-08,45.666667,36.666667,0.850000,1.000000,2,1,0,0.047091
...,...,...,...,...,...,...,...,...,...,...
507,123,2021-06-24,19.384615,38.153846,0.857179,0.358974,20,14,5,-0.040016
508,124,2021-06-25,71.650000,65.300000,0.815000,1.550000,17,2,1,-0.020981
509,125,2021-06-28,24.772727,49.909091,0.839091,1.090909,17,4,1,0.009467
510,126,2021-06-29,22.000000,73.120000,0.870000,1.280000,20,2,3,-0.012688


In [None]:
df = df[['accumulated_n', 'accumulated_s', 'accumulated_r', 'accumulated_sentiment', 'price_change']]
df

Unnamed: 0,accumulated_n,accumulated_s,accumulated_r,accumulated_sentiment,price_change
0,9.000000,1.000000,0.600000,2.000000,-0.017910
1,126.000000,1449.500000,0.940000,2.000000,0.022659
2,30.000000,1.500000,0.520000,1.500000,0.000000
3,142.666667,231.333333,0.826667,2.000000,0.045926
4,45.666667,36.666667,0.850000,1.000000,0.047091
...,...,...,...,...,...
507,19.384615,38.153846,0.857179,0.358974,-0.040016
508,71.650000,65.300000,0.815000,1.550000,-0.020981
509,24.772727,49.909091,0.839091,1.090909,0.009467
510,22.000000,73.120000,0.870000,1.280000,-0.012688


In [None]:
#shuffle here
df = df.sample(frac = 1)

In [None]:
def train_test_split(df):

    percentage_train = 0.9
    percentage_test = 1 - percentage_train

    train_df = df.iloc[int(len(df) * (1 - percentage_train)):]
    test_df = df.iloc[:int(len(df) * percentage_test)]
    
    return train_df, test_df

train_df, test_df = train_test_split(df)

In [None]:
train_target = torch.tensor(train_df['price_change'].values.astype(np.float32))
train = torch.tensor(train_df.drop('price_change', axis = 1).values.astype(np.float32)) 
train_tensor = data_utils.TensorDataset(train, train_target) 
train_loader = data_utils.DataLoader(dataset = train_tensor, batch_size = 8, shuffle = True)


test_target = torch.tensor(test_df['price_change'].values.astype(np.float32))
test = torch.tensor(test_df.drop('price_change', axis = 1).values.astype(np.float32)) 
test_tensor = data_utils.TensorDataset(test, test_target) 
test_loader = data_utils.DataLoader(dataset = test_tensor, batch_size = 8, shuffle = True)

In [None]:
class FeedforwardNeuralNetModel(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(FeedforwardNeuralNetModel, self).__init__()
        
        hidden_dim1 = 4
        hidden_dim2 = 2
        
        # Linear function
        self.fc1 = nn.Linear(input_dim, hidden_dim1) 

        # Linear function
        self.fc2 = nn.Linear(hidden_dim1, hidden_dim2) 
        
        # Linear function
        self.fc3 = nn.Linear(hidden_dim2, output_dim) 
    

    def forward(self, x):
        
        out = self.fc1(x)
        out = self.fc2(out)
        out = self.fc3(out)
    
        return out

In [None]:
model = FeedforwardNeuralNetModel(4,1)
#criterion = nn.CrossEntropyLoss()
criterion = nn.MSELoss()
learning_rate = 0.01
num_epochs = 20

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)  
for parameter in model.parameters():
    print(parameter)

Parameter containing:
tensor([[-0.0858, -0.1880,  0.2764,  0.2023],
        [ 0.1701, -0.3416, -0.1855, -0.2859],
        [ 0.0607,  0.0095,  0.3775,  0.0980],
        [-0.1837,  0.2091,  0.3222,  0.0162]], requires_grad=True)
Parameter containing:
tensor([ 0.2056, -0.2588,  0.1123, -0.0746], requires_grad=True)
Parameter containing:
tensor([[ 0.1043, -0.2275, -0.2595,  0.1389],
        [-0.2573, -0.3383,  0.3099, -0.2891]], requires_grad=True)
Parameter containing:
tensor([-0.2928,  0.2395], requires_grad=True)
Parameter containing:
tensor([[-0.5258, -0.3553]], requires_grad=True)
Parameter containing:
tensor([0.2899], requires_grad=True)


In [None]:
iter_count = 0
for epoch in range(num_epochs):
    for i, (data, price_change) in enumerate(train_loader):

        #Clear gradients 
        optimizer.zero_grad()
        #print(data)
        #Forward pass to get output
        outputs = model(data)
        
        #print(outputs)
        #print(price_change)
        #print("\n")

        #Calculate Loss with cross entropy loss function
        loss = criterion(outputs, price_change)

        #Getting gradients and updating parameters with backpropagation
        loss.backward()
        optimizer.step()

        iter_count += 1
        
        if iter_count % 100 == 0:    
            total = 0
            same_sign = 0
            for data, price_change in test_loader:

                #Forward pass to get output
                outputs = model(data)
                #check for equivalent signs as an indicator of model's effectiveness
                for index in range(len(outputs)):
                  total += 1
                  if (outputs[index] * price_change[index] >= 0):
                    same_sign += 1
                loss = criterion(outputs, price_change)
                
            print('Iteration: {}. Loss: {}. Same-sign accuracy: {}'.format(iter_count, loss.item(), same_sign/total))
        

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


Iteration: 100. Loss: 11.627667427062988. Same-sign accuracy: 0.6470588235294118
Iteration: 200. Loss: 0.10956484824419022. Same-sign accuracy: 0.6274509803921569
Iteration: 300. Loss: 0.04278528690338135. Same-sign accuracy: 0.5882352941176471
Iteration: 400. Loss: 0.005025673191994429. Same-sign accuracy: 0.5882352941176471
Iteration: 500. Loss: 0.00043049504165537655. Same-sign accuracy: 0.6274509803921569
Iteration: 600. Loss: 0.0009650642750784755. Same-sign accuracy: 0.6078431372549019
Iteration: 700. Loss: 0.0016456940211355686. Same-sign accuracy: 0.6274509803921569
Iteration: 800. Loss: 0.0013848482631146908. Same-sign accuracy: 0.5686274509803921
Iteration: 900. Loss: 0.001629339181818068. Same-sign accuracy: 0.5294117647058824
Iteration: 1000. Loss: 0.00016336588305421174. Same-sign accuracy: 0.5686274509803921
Iteration: 1100. Loss: 0.0013650002656504512. Same-sign accuracy: 0.5686274509803921


In [None]:
#save model
torch.save(model.state_dict(), 'bang_model')

## Deployment code

In [None]:
bang_bang = FeedforwardNeuralNetModel(4,1)

bang_bang.load_state_dict(torch.load('bang_model'))

<All keys matched successfully>

In [None]:
new_input = [10.0,10.10,90.90,-1.0]

output = bang_bang(torch.tensor(torch.tensor([new_input])))
print(output)

tensor([[0.3483]], grad_fn=<AddmmBackward>)


  This is separate from the ipykernel package so we can avoid doing imports until


## Retraining

In [None]:
class FeedforwardNeuralNetModel(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(FeedforwardNeuralNetModel, self).__init__()
        
        hidden_dim1 = 4
        hidden_dim2 = 2
        
        # Linear function
        self.fc1 = nn.Linear(input_dim, hidden_dim1) 

        # Linear function
        self.fc2 = nn.Linear(hidden_dim1, hidden_dim2) 
        
        # Linear function
        self.fc3 = nn.Linear(hidden_dim2, output_dim) 
    

    def forward(self, x):
        
        out = self.fc1(x)
        out = self.fc2(out)
        out = self.fc3(out)
    
        return out

In [None]:
bang_bang = FeedforwardNeuralNetModel(4,1)

bang_bang.load_state_dict(torch.load('bang_model'))

<All keys matched successfully>

In [None]:
new_input = [10.0,10.100,90.90,2.0]
actual_change = 0.5

In [None]:
criterion = nn.MSELoss()
learning_rate= 0.01
optimizer = torch.optim.Adam(bang_bang.parameters(), lr=learning_rate)  

#Clear gradients 
optimizer.zero_grad()
#print(data)
#Forward pass to get output
outputs = bang_bang(torch.tensor(torch.tensor([new_input])))

#print(outputs)
#print(price_change)
#print("\n")

#Calculate Loss with cross entropy loss function
loss = criterion(outputs, torch.tensor([actual_change]))

#Getting gradients and updating parameters with backpropagation
loss.backward()
optimizer.step()

  if __name__ == '__main__':
  return F.mse_loss(input, target, reduction=self.reduction)


In [None]:
#save model
torch.save(model.state_dict(), 'bang_model')