In [1]:
! pip install torchviz -q
! pip install torch-summary

  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for torchviz (setup.py) ... [?25l[?25hdone
Collecting torch-summary
  Downloading torch_summary-1.4.5-py3-none-any.whl (16 kB)
Installing collected packages: torch-summary
Successfully installed torch-summary-1.4.5


In [2]:
# namespaces
import sklearn
import numpy as np
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
import pandas as pd
import plotly.io as pio
pio.renderers.default = 'notebook'
import seaborn as sns
import time
import torch
import torch.nn as nn
import torch.nn.functional as F

# functions
from sklearn.preprocessing import MinMaxScaler
from torchsummary import summary

In [None]:
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.fc1 = nn.Linear(4, 16)
        self.fc2 = nn.Linear(16, 32)
        self.fc3 = nn.Linear(32, 1)

    def forward(self, x):
        x = torch.sigmoid(self.fc1(x))
        x = torch.sigmoid(self.fc2(x))
        x = self.fc3(x)

        return x

In [3]:
# download data and select the close column
# https://raw.githubusercontent.com/markstiles/us-data-analysis/main/places_data.csv
places_df = pd.read_csv('https://raw.githubusercontent.com/markstiles/us-data-analysis/main/places_data.csv')

In [5]:
print(places_df.columns.unique())

Index(['Unnamed: 0', 'GeoId', 'Place_Name', 'State_Abbr', 'State_Name', 'Type',
       'All_Employers', 'All_Employees', 'All_Payroll', 'All_Revenue',
       ...
       'Wholesale_Employee_Per_Employer', 'Wholesale_Revenue_Per_Employer',
       'Wholesale_Avg_Payroll_Per_Employee',
       'Wholesale_Population_Per_Employer', 'Income_Per_Revenue',
       'Industry_Count', 'Revenue_Per_Person', 'Profit_Per_Person',
       'Performance', 'Population_Range'],
      dtype='object', length=301)


In [None]:
# fit, transform and reshape data
fit_data = MinMaxScaler().fit(gme).transform(gme).reshape(-1)

In [None]:
# creating sequential data
x_data, y_data = create_sequences(fit_data, 4)

# split data into ranges for training, testing and validation set
pos_one = 4600
pos_two = 4900

x_train = x_data[:pos_one]
y_train = y_data[:pos_one]

x_val = x_data[pos_one:pos_two]
y_val = y_data[pos_one:pos_two]

x_test = x_data[pos_two:]
y_test = y_data[pos_two:]

In [None]:
model = Model()

summary(Model(), input_size = (4022,4))

In [None]:
# the goal would be to try to generate a series of networks to find the optimal one by randomly selecting the number of layers and neurons per layer. Also could iterate through the loss functions
# need to run a vanilla network to set a baseline

In [None]:
# train settings
num_epochs = 300
learning_rate = 0.01
batch_size = 10
epoch_batch_size = 10

# list for storing loss
train_lss = []
val_loss = []

# loss functiom
criterion = torch.nn.MSELoss() # mean-squared error for regression

# initialization of Adam optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
#optimizer = torch.optim.SGD(lstm.parameters(), lr=learning_rate)

# train the model
for epoch in range(num_epochs):

    # take a batch at a time
    for i in range(0, len(x_train), batch_size):

      # pull the current batch
      x_batch = x_train[i:i+batch_size]
      y_batch = y_train[i:i+batch_size].reshape(10,1)

      # calculate the loss
      loss = criterion(model(x_batch), y_batch)

      # run back prop
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

    # store and print the training and validation loss every epoch batch
    if epoch % epoch_batch_size == 0:
      train_lss.append(loss.item())

      #accuracy = (y_pred.round() == y).float().mean()

      # calculate and store the validation loss (and don't calculate gradient because this is not training data)
      with torch.no_grad():
          val_lss = criterion(model(x_batch).float(), y_batch)
          val_loss.append(val_lss.item())

      print(f"Epoch: {epoch:d}, training loss: {loss.item():1.5f} , validation loss: {val_lss.item():1.5f}")

In [None]:
# print out loss
blue_patch = mpatches.Patch(color = 'blue', label = 'Train MSE')
green_patch = mpatches.Patch(color = 'orange', label = 'Validation MSE')

plt.figure(figsize=(10,6))

sns.lineplot(x=range(1,int(num_epochs/10)+1),y = train_lss)
sns.lineplot(x=range(1,int(num_epochs/10)+1),y = val_loss)

plt.xlabel('EPOCH')
plt.ylabel('MSE')
plt.legend(handles = [blue_patch,green_patch])
plt.title('Training and Validation loss');