In [1]:
import torch
from torch.utils.data import DataLoader
from datetime import datetime
from utils.data.CountyDataset import (
    CountyDataset,
)
from torch.utils.data import random_split
from torch import nn

In [None]:
### 
def get_dataset(master_path):
    dataset = CountyDataset(master_path)
    X, Y = next(iter(dataset))

    print('-'*89)
    print('Total Dataset Summary')
    print('-'*89)

    print(f'total samples {len(dataset):,} '
          f'X {X.size()} | Y {Y.size()}')

    return dataset

def get_numpy(dataset):
    data = []
    for i in range(len(dataset)):
        X, Y = dataset[i]
        data.append((X.numpy(), Y.numpy()))

    print('-'*89)
    print(f'Numpy Dataset Summary')
    print('-'*89)
    X, Y = zip(*data)
    print(f'total samples {len(X)} '
          f'| input time periods {len(X[0])} '
          f'| input features {X[0].shape[1]} '
          f'| output time periods {len(Y[0])}'
          f'| target col {dataset.target_col}')

    return data, dataset.xcols, dataset.target_col, dataset.date_index



In [None]:
### LOAD DATASET FROM CHRIS'S THING ###
master_path = 'utils/data/county_table.csv'
data = get_numpy(get_dataset(master_path))


import pickle
with open('dataset.pickle', 'wb') as handle:
    pickle.dump(data[0], handle)

In [2]:
### LOAD DATASET FROM PICKLE ###
import pickle
with open('dataset.pickle', 'rb') as handle:
    full_dataset = pickle.load(handle)

In [3]:
# full_dataset = data[0] ## comment this if loaded from pickle
train_dataset, test_dataset = torch.utils.data.random_split(full_dataset,
                                                            [1267, 317])
train_loader = DataLoader(train_dataset, batch_size=128,
                          shuffle=True, pin_memory=False,
                          num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=128,
                          shuffle=True, pin_memory=False,
                          num_workers=4)



In [4]:
### TRAINING ###
### PICK A MODEL ###
from models.LSTM import LSTM
model = LSTM(input_dim=235, hidden_dim=10, output_dim=10, num_layers=2)

# from models.RNN import RNN
# model = RNN(D_in=235, H=10, D_out=10, L=1, dropout=0.0)

num_epochs = 100
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

for i in range(num_epochs):
    for idx, data in enumerate(train_loader):
        X_train, y_train = data
        
        # zero out gradient between epochs
        optimizer.zero_grad()

        # forward pass
        y_pred = model(X_train)

        # calculate loss
        loss = loss_function(y_pred, y_train)
        if idx == len(train_loader) - 1:
            print("Epoch: ", i, "RMSE: ", torch.sqrt(loss).item())
        # backward pass
        loss.backward()

        # update params
        optimizer.step()
#         print('here')

Epoch:  0 RMSE:  0.9375051856040955
Epoch:  1 RMSE:  0.6939074993133545
Epoch:  2 RMSE:  0.6970921754837036
Epoch:  3 RMSE:  0.8845365643501282
Epoch:  4 RMSE:  0.7640889286994934
Epoch:  5 RMSE:  0.8462446928024292
Epoch:  6 RMSE:  0.6250738501548767
Epoch:  7 RMSE:  0.6101812720298767
Epoch:  8 RMSE:  0.516707718372345
Epoch:  9 RMSE:  0.6501412391662598
Epoch:  10 RMSE:  1.1952619552612305
Epoch:  11 RMSE:  0.5555658936500549
Epoch:  12 RMSE:  0.4847874045372009
Epoch:  13 RMSE:  0.6915591955184937
Epoch:  14 RMSE:  0.7074861526489258
Epoch:  15 RMSE:  0.35185757279396057
Epoch:  16 RMSE:  0.41349104046821594
Epoch:  17 RMSE:  0.46522027254104614
Epoch:  18 RMSE:  0.424944669008255
Epoch:  19 RMSE:  0.3326265513896942
Epoch:  20 RMSE:  0.6706739068031311
Epoch:  21 RMSE:  0.4908897876739502
Epoch:  22 RMSE:  0.4214400351047516
Epoch:  23 RMSE:  0.29118970036506653
Epoch:  24 RMSE:  0.26830029487609863
Epoch:  25 RMSE:  0.2999345660209656
Epoch:  26 RMSE:  0.3490277826786041
Epoch:  

In [5]:
### TESTING ###
import numpy as np
with torch.no_grad():
    rmses = []
    for idx, data in enumerate(test_loader):
        X_test, y_test = data
#         X_test = X_test.permute(1, 0, 2)
        y_pred = model(X_test)
        loss = loss_function(y_pred, y_test)
        rmse_i = torch.sqrt(loss).item()
        rmses += [rmse_i]
    print(np.mean(rmses))

0.4458644986152649
