In [165]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
from torch.utils.data import Dataset

In [166]:
#torch data set
class dataSetAll(Dataset):
    def __init__(self, yearLow, yearHigh):
        #import data from CDC
        self.df = pd.read_csv("data\FluViewPhase2Data\WHO_NREVSS_Combined_prior_to_2015_16.csv")
        self.df = self.df[(yearLow <= self.df["YEAR"]) & (self.df["YEAR"] < yearHigh)]["TOTAL"]
        #turn data into features and output
        #features: 5 previous + one from last year for predicted
        #output: prediction for next time

        #create test data
        self.numFeat = 5 #------------------------
        self.numOut = 1
        self.data = np.asarray(self.df,dtype=np.float32)

    def __len__(self):
        return len(self.data)-self.numFeat-self.numOut
    
    def __getitem__(self,idx):
        return self.data[idx:idx+self.numFeat],self.data[idx+self.numFeat]

In [167]:
#create data loaders
train_data = dataSetAll(1900,2013)
test_data = dataSetAll(2013,2100)
train_dataloader = DataLoader(train_data, batch_size=64)
test_dataloader = DataLoader(test_data, batch_size=64)

In [168]:
# create our RNN based network with an RNN followed by a linear layer
inputSize = 1
sequenceLength = 5
numLayers = 1
hiddenSize = 256
batchSize = 64

class RNN(nn.Module):
    def __init__(self, inputSize, hiddenSize, numLayers,sequenceLength=1):
        super(RNN, self).__init__()
        self.inputSize = inputSize
        self.hiddenSize = hiddenSize
        self.numLayers = numLayers
        self.RNN = nn.RNN(inputSize,hiddenSize,numLayers,nonlinearity='relu',batch_first=True)
        self.fc = nn.Linear(hiddenSize*sequenceLength,1)
        
    def forward(self,x):
        h0 = torch.zeros(self.numLayers,x.size(0),self.hiddenSize)
        out, _ = self.RNN(x,h0)
        out = self.fc(out[:,-1,:])
        return out

model = RNN(inputSize,hiddenSize,numLayers)

In [169]:
#train/test loop
def train_loop(dataloader, model, loss_fn, optimizer,t):
    size = len(dataloader.dataset)
    for batch, (X, y) in enumerate(dataloader):
        # Compute prediction and loss
        X = X[:,:,None]
        # print(X.size())
        pred = model(X).squeeze()
        loss = loss_fn(pred, y)
        # print("pred",pred)
        # print("Y",y)
        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % size == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss({t}): {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [170]:
learning_rate = .0001
batch_size = 64
epochs = 500
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# print(list(model.parameters()))
for t in range(epochs):
    # print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss_fn, optimizer,t)
    # test_loop(test_dataloader, model, loss_fn)

print("Done!")
# print(list(model.parameters()))

loss: 7895528.500000  [    0/  787]
loss: 7446405.000000  [    0/  787]
loss: 7134137.000000  [    0/  787]
loss: 6802432.500000  [    0/  787]
loss: 6427969.000000  [    0/  787]
loss: 5983659.500000  [    0/  787]
loss: 5415042.500000  [    0/  787]
loss: 4665453.500000  [    0/  787]
loss: 3686128.000000  [    0/  787]
loss: 2554547.500000  [    0/  787]
loss: 1622550.000000  [    0/  787]
loss: 1337516.125000  [    0/  787]
loss: 1297299.500000  [    0/  787]
loss: 1262819.250000  [    0/  787]
loss: 1213488.500000  [    0/  787]
loss: 1164117.375000  [    0/  787]
loss: 1121127.125000  [    0/  787]
loss: 1078239.125000  [    0/  787]
loss: 1035556.625000  [    0/  787]
loss: 992582.250000  [    0/  787]
loss: 949375.437500  [    0/  787]
loss: 906107.875000  [    0/  787]
loss: 862471.500000  [    0/  787]
loss: 819443.625000  [    0/  787]
loss: 778086.062500  [    0/  787]
loss: 739185.375000  [    0/  787]
loss: 702710.750000  [    0/  787]
loss: 670236.312500  [    0/  787]
l

In [179]:
with torch.no_grad():
    model.eval()
    yearLow = 2013
    yearHigh = 2020
    
    for i in range(90,110):
        feat = test_data[i:i+1][0].numpy()
        pred = model(test_data[i:i+1][0]).numpy()
        act = test_data[i:i+1][1].numpy()
        full_back = data[i-10:i+5]
        print(full_back)
        print(feat)
        print(pred)
        print(act)
        # plt.plot(full_back)
        plt.plot(np.append(full_back,pred))
        plt.plot(np.append(full_back,act))
        plt.show()

def test_loop(dataloader, model):
    df = pd.read_csv("data\FluViewPhase2Data\WHO_NREVSS_Combined_prior_to_2015_16.csv")
    df = df[(yearLow <= df["YEAR"]) & (df["YEAR"] < yearHigh)]["TOTAL"]
    data = np.array(df)

    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    
    with torch.no_grad():
        for X, y in dataloader:
            X = X[:,:,None]
            pred = model(X).squeeze().numpy()
            act = y.numpy()
            

test_loop(test_dataloader,model)


tensor(-62.8506)
tensor(-322.9737)
tensor(-316.4032)
