In [56]:
import numpy as np
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import pandas as pd
import utils

np.random.seed(0)
torch.manual_seed(0)

<torch._C.Generator at 0x11a8c00d0>

In [57]:
rows = ['Zonal_Winds', 'Meridional_Winds', 'Humidity', 'Air_Temp', 'Sea_Surface_Temp']
train_data = pd.read_csv('../../Cleaning-the-data/tao-cleaned.csv')[rows]

train_data = (train_data - train_data.mean())/train_data.std()
train_data = train_data.values[:15000]
train_data.shape

(15000, 5)

In [53]:
class Sequence(nn.Module):
    def __init__(self):
        super(Sequence,self).__init__()
        self.lstm1 = nn.LSTM(5,50, 3)
        self.lstm2 = nn.LSTM(50,1, 2)
        
 
    def forward(self, seq):
        lstm1_out, _ = self.lstm1(seq, None)
        lstm2_out, _ = self.lstm2(lstm1_out, None)
        return lstm2_out.squeeze(1)

In [63]:
# https://lirnli.wordpress.com/2017/09/01/simple-pytorch-rnn-examples/
class Sequence(nn.Module):
    def __init__(self):
        super(Sequence,self).__init__()
        self.lstm1 = nn.LSTM(5,16, 2)
        self.lstm2 = nn.LSTM(16,1)
 
    def forward(self,seq, hc = None):
        out = []
        if hc == None:
            hc1, hc2 = None, None
        else:
            hc1, hc2 = hc
        
        for X in seq.chunk(seq.size(1),dim=0):
#             print(X)
            tmp, hc1 = self.lstm1(X, hc1)
            X_in, hc2 = self.lstm2(tmp, hc2)
            out.append(X_in)
        return torch.stack(out).squeeze(1),(hc1,hc2)

In [70]:
seq = nn.LSTM(5,1, 6)
seq.double()
test_data = torch.from_numpy(train_data[:32]).view(-1, 2, 5)
seq(test_data)

(tensor([[[-0.0617],
          [-0.0617]],
 
         [[-0.0749],
          [-0.0749]],
 
         [[-0.0775],
          [-0.0775]],
 
         [[-0.0780],
          [-0.0780]],
 
         [[-0.0781],
          [-0.0781]],
 
         [[-0.0781],
          [-0.0781]],
 
         [[-0.0781],
          [-0.0781]],
 
         [[-0.0781],
          [-0.0781]],
 
         [[-0.0782],
          [-0.0782]],
 
         [[-0.0782],
          [-0.0782]],
 
         [[-0.0782],
          [-0.0782]],
 
         [[-0.0782],
          [-0.0782]],
 
         [[-0.0782],
          [-0.0782]],
 
         [[-0.0782],
          [-0.0782]],
 
         [[-0.0782],
          [-0.0782]],
 
         [[-0.0782],
          [-0.0782]]], dtype=torch.float64, grad_fn=<CatBackward>),
 (tensor([[[-0.0853],
           [-0.0825]],
  
          [[-0.2546],
           [-0.2546]],
  
          [[ 0.0875],
           [ 0.0875]],
  
          [[ 0.1799],
           [ 0.1799]],
  
          [[-0.1975],
           [-0.1975]],

In [71]:
criterion = nn.MSELoss()
lr = 0.2
optimizer = optim.Adam(seq.parameters(), lr=lr)
# optimizer = optim.LBFGS(seq.parameters(), lr=lr)
seq_size = 128
batch_size = 1

In [72]:
for epoch in range(200):
    running_loss = 0
    running_error = 0
    iterations = 0
    for i in range(10000, 15000, seq_size):
        data = train_data[i:i+seq_size+1]
        xs = torch.from_numpy(data[:-1]).view(-1, batch_size, 5)
        ys = torch.from_numpy(data[1:][:,-1]).view(-1, batch_size)
    
        optimizer.zero_grad()
        lstm_out, _ = seq(xs)
        loss = criterion(lstm_out.view(1, -1), ys.view(1, -1))
        loss.backward()
        optimizer.step()
        
        iterations += 1
        with torch.no_grad():
            running_loss += loss.item()
            error = utils.get_error(lstm_out.view(1, -1), ys.view(1, -1))
            running_error += error 

    # once the epoch is finished we divide the "running quantities"
    # by the number of batches
    if epoch % 5 == 0:
        total_loss = running_loss/iterations
        total_error = running_error/iterations
        print('epoch=',epoch, '\t loss=', total_loss, '\t error={:2.2%}'.format(total_error))

epoch= 0 	 loss= 1.5287090306810291 	 error=62.46%
epoch= 5 	 loss= 1.1095753196015639 	 error=74.36%
epoch= 10 	 loss= 1.1095753381331137 	 error=74.36%
epoch= 15 	 loss= 1.1095753379436952 	 error=74.36%
epoch= 20 	 loss= 1.1095753377054764 	 error=74.36%


KeyboardInterrupt: 

In [73]:
def predict_all(n):
    data = train_data[-n:]
    xs = torch.from_numpy(data[:-1]).view(-1, 1, 5)
    ys = torch.from_numpy(data[1:][:,-1]).view(-1, 1)
    predictions, _ = seq(xs)
    return predictions.view(-1).detach().numpy(), ys.view(-1).detach().numpy()

pred, actual = predict_all(1000)
p = pd.Series(pred.T)
a = pd.Series(actual.T)
df = pd.DataFrame()
df['Predictions'] = p
df['Actual'] = a
df.head()
df.to_csv('./compared.csv')