In [99]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
from torch.utils.data import Dataset

In [100]:
#torch data set
class dataSetAll(Dataset):
    def __init__(self, yearLow, yearHigh,numFeat,numOut):
        #import data from CSV
        self.df = pd.read_csv("data\FluViewPhase2Data\WHO_NREVSS_Combined_prior_to_2015_16.csv")
        self.df = self.df[(yearLow <= self.df["YEAR"]) & (self.df["YEAR"] < yearHigh)][["TOTAL","PERCENT POSITIVE"]]

        self.numFeat = numFeat #------------------------
        self.numOut = numOut
        
        self.data = np.asarray(self.df,dtype=np.float32)
        # print(self.data)
        self.norm = np.linalg.norm(self.data, axis=0)
        # print(self.norm)
        self.data = self.data / self.norm
        self.data = torch.as_tensor(self.data)

    def __len__(self):
        return len(self.data)-self.numFeat-self.numOut
    
    def __getitem__(self,idx):
        # idx = 0
        return self.data[idx:idx+self.numFeat],self.data[idx+self.numFeat:idx+self.numFeat+self.numOut]
    
    def getNorm(self):
        return self.norm

In [101]:
#create data loaders
numFeat = 10
numOut = 4
batchSize = 64
train_data = dataSetAll(1900,2009,numFeat,numOut)
print(train_data[0])
train_dataloader = DataLoader(train_data, batch_size=batchSize,drop_last=False,shuffle=True)

(tensor([[0.0176, 0.0017],
        [0.0200, 0.0021],
        [0.0222, 0.0011],
        [0.0232, 0.0014],
        [0.0267, 0.0036],
        [0.0236, 0.0065],
        [0.0274, 0.0061],
        [0.0325, 0.0126],
        [0.0359, 0.0282],
        [0.0353, 0.0499]]), tensor([[0.0557, 0.0764],
        [0.0579, 0.0828],
        [0.0652, 0.1014],
        [0.0708, 0.1111]]))


In [102]:
# create our RNN based network with an RNN followed by a linear layer
inputSize = 2
sequenceLength = numFeat
numLayers = 1
hiddenSize = 16

class RNN(nn.Module):
    def __init__(self,inputSize,hiddenSize,numLayers,numOut,sequenceLength,future=0):
        super(RNN, self).__init__()
        self.inputSize = inputSize
        self.hiddenSize = hiddenSize
        self.numLayers = numLayers
        self.numOut = numOut
        self.future = future
        # print(batchSize,sequenceLength,inputSize)
        # self.LSTM = nn.LSTM(inputSize,hiddenSize,numLayers,batch_first=True)
        self.rnn = nn.RNNCell(inputSize,hiddenSize,nonlinearity='relu')
        # self.rnn2 = nn.RNNCell(1,hiddenSize,nonlinearity='tanh')
        self.fc = nn.Linear(hiddenSize,inputSize)
        
    def forward(self,x):
        outputs = []
        nSamples = x.size(0)
        h_1 = torch.zeros(nSamples, self.hiddenSize, dtype=torch.float32)
        h_2 = torch.zeros(nSamples, self.hiddenSize, dtype=torch.float32)

        for input in x.split(1,dim=1):
            print(input[:,0,:].size())
            h_1 = self.rnn(input[:,0,:], h_1)
            out = self.fc(h_1)
        outputs.append(out)
            # h_2 = h_1
        for i in range(self.numOut-1):
            h_1 = self.rnn(out, h_1)
            out = self.fc(h_1)
            outputs.append(out)

        outputs = torch.cat(outputs, dim=1)
        return outputs

model = RNN(inputSize,hiddenSize,numLayers,numOut,sequenceLength)

In [103]:
#train/test loop
def train_loop(dataloader, model, loss_fn, optimizer,t):
    size = len(dataloader.dataset)
    for batch, (X, y) in enumerate(dataloader):
        # Compute prediction and loss
        # print(X.size())
        # X = X[:,:,None]
        print(X.size())
        pred = model(X)
        break
        # print(pred.size())
        # print(y.size())
        # print("pred",pred.size())
        # print("Y",y.size())
        loss = loss_fn(pred, y)
        
        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % size == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss({t}): {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [104]:
learning_rate = .001
epochs = 200
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# print(list(model.parameters()))

# print(list(model.parameters()))
for t in range(epochs):
    # print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss_fn, optimizer,t)
    break
    # test_loop(test_dataloader, model, loss_fn)

print("Done!")
# print(list(model.parameters()))

torch.Size([64, 10, 2])
torch.Size([64, 2])


ValueError: too many values to unpack (expected 2)

In [None]:
def test_loop(model):
    test_data = dataSetAll(2010,2100,numFeat,numOut)

    with torch.no_grad():
        for idx in range(len(test_data)):
            X = test_data[idx][0]
            Y = test_data[idx][1]
            feat = X[None,:]
            pred = model(feat)[0]
            plt.plot(Y)
            plt.plot(pred)
            
            plt.show()


def graph(model):
    test_data = dataSetAll(2010,2100,numFeat,numOut)
    norm = test_data.getNorm()
    with torch.no_grad():
        predY = []
        actY = []
        for idx in range(len(test_data)):
            X = test_data[idx][0]
            y = test_data[idx][1]*norm
            feat = X[None,:]

            pred = model(feat)[0]*norm
            future = 3

            # predY.append(pred[numFeat-1::][future])
            # actY.append(y[numFeat-1::][future])    
            predY.append(pred[future])
            actY.append(y[future])    
            
        plt.plot(actY) 
        plt.plot(predY)
        plt.show()

graph(model)
# test_loop(model)


ValueError: operands could not be broadcast together with shapes (4,) (2,) 