In [3]:
import numpy as np
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import pandas as pd
import utils

np.random.seed(0)
torch.manual_seed(0)

<torch._C.Generator at 0x106d6f0d0>

In [2]:
rows = ['Zonal_Winds', 'Meridional_Winds', 'Humidity', 'Air_Temp', 'Sea_Surface_Temp']
train_data = pd.read_csv('../../Cleaning-the-data/tao-cleaned.csv')[rows]

train_data = (train_data - train_data.mean())/train_data.std()
train_data = train_data.values[:17000]
train_data.shape

(17000, 5)

In [11]:
class Sequence(nn.Module):
    def __init__(self):
        super(Sequence,self).__init__()
        self.lstm1 = nn.LSTM(5,50, 3)
        self.lstm2 = nn.LSTM(50,1, 2)
        
 
    def forward(self, seq):
        lstm1_out, _ = self.lstm1(seq, None)
        lstm2_out, _ = self.lstm2(lstm1_out, None)
        return lstm2_out.squeeze(1)

In [143]:
# https://github.com/pytorch/examples/blob/master/time_sequence_prediction/train.py
class Sequence(nn.Module):
    def __init__(self):
        super(Sequence, self).__init__()
#         self.linear1 = nn.Linear(5, 1)
        self.lstm1 = nn.LSTMCell(5, 51)
        self.lstm2 = nn.LSTMCell(51, 51)
        self.linear = nn.Linear(51, 1)

    def forward(self, input, future = 0):
        outputs = []
        h_t = torch.zeros(input.size(0), 51, dtype=torch.double)
        c_t = torch.zeros(input.size(0), 51, dtype=torch.double)
        h_t2 = torch.zeros(input.size(0), 51, dtype=torch.double)
        c_t2 = torch.zeros(input.size(0), 51, dtype=torch.double)

        for i, input_t in enumerate(input.chunk(input.size(1), dim=1)):
#             input_t = self.linear1(input_t)
            h_t, c_t = self.lstm1(input_t, (h_t, c_t))
            h_t2, c_t2 = self.lstm2(h_t, (h_t2, c_t2))
            output = self.linear(h_t2)
            outputs += [output]
        for i in range(future):# if we should predict the future
            h_t, c_t = self.lstm1(output, (h_t, c_t))
            h_t2, c_t2 = self.lstm2(h_t, (h_t2, c_t2))
            output = self.linear(h_t2)
            outputs += [output]
        outputs = torch.stack(outputs, 1).squeeze(2)
        return outputs

In [144]:
# https://lirnli.wordpress.com/2017/09/01/simple-pytorch-rnn-examples/
class Sequence(nn.Module):
    def __init__(self):
        super(Sequence,self).__init__()
        self.lstm1 = nn.LSTM(5,64, 2)
        self.lstm2 = nn.LSTM(64,1)
 
    def forward(self,seq, hc = None):
        out = []
        if hc == None:
            hc1, hc2 = None, None
        else:
            hc1, hc2 = hc
        
        for X in seq.chunk(seq.size(1),dim=0):
#             print(X)
            tmp, hc1 = self.lstm1(X, hc1)
            X_in, hc2 = self.lstm2(tmp, hc2)
            out.append(X_in)
        return torch.stack(out).squeeze(1),(hc1,hc2)

In [145]:
seq = Sequence()
seq.double()
test_data = torch.from_numpy(train_data[:32]).view(-1, 2, 5)
seq(test_data)

(tensor([[[[-0.0080],
           [-0.0091]],
 
          [[-0.0231],
           [-0.0261]],
 
          [[-0.0422],
           [-0.0438]],
 
          [[-0.0580],
           [-0.0592]],
 
          [[-0.0712],
           [-0.0694]],
 
          [[-0.0776],
           [-0.0755]],
 
          [[-0.0809],
           [-0.0797]],
 
          [[-0.0851],
           [-0.0805]]],
 
 
         [[[-0.0845],
           [-0.0789]],
 
          [[-0.0808],
           [-0.0749]],
 
          [[-0.0751],
           [-0.0700]],
 
          [[-0.0691],
           [-0.0746]],
 
          [[-0.0734],
           [-0.0839]],
 
          [[-0.0828],
           [-0.0955]],
 
          [[-0.0939],
           [-0.1056]],
 
          [[-0.1042],
           [-0.1144]]]], dtype=torch.float64, grad_fn=<SqueezeBackward1>),
 ((tensor([[[ 0.0413,  0.1396,  0.1010,  0.0795,  0.0717,  0.1796,  0.1217,
             -0.0779,  0.1609,  0.0360,  0.1251,  0.0682,  0.0109,  0.0449,
             -0.1989, -0.1712, -0.2070,  0.

In [148]:
criterion = nn.MSELoss()
lr = 0.1
optimizer = optim.Adam(seq.parameters(), lr=lr)
# optimizer = optim.LBFGS(seq.parameters(), lr=lr)
seq_size = 128
batch_size = 4

In [None]:
for epoch in range(200):
    running_loss = 0
    running_error = 0
    iterations = 0
    for i in range(0, 12800, seq_size):
        data = train_data[i:i+seq_size+1]
        xs = torch.from_numpy(data[:-1]).view(-1, batch_size, 5)
        ys = torch.from_numpy(data[1:][:,-1]).view(-1, batch_size)
    
        optimizer.zero_grad()
        lstm_out, _ = seq(xs)
        loss = criterion(lstm_out.view(1, -1), ys.view(1, -1))
        loss.backward()
        optimizer.step()
        
        iterations += 1
        with torch.no_grad():
            running_loss += loss.item()
            error = utils.get_error(lstm_out.view(1, -1), ys.view(1, -1))
            running_error += error 

    # once the epoch is finished we divide the "running quantities"
    # by the number of batches
    if epoch % 5 == 0:
        total_loss = running_loss/iterations
        total_error = running_error/iterations
        print('epoch=',epoch, '\t loss=', total_loss, '\t error={:2.2%}'.format(total_error) ,'percent')

epoch= 0 	 loss= 1.4754435234243772 	 error=72.91% percent
epoch= 5 	 loss= 1.6262214025295234 	 error=71.34% percent
epoch= 10 	 loss= 1.4581966626345175 	 error=70.63% percent
epoch= 15 	 loss= 1.4723582172141254 	 error=73.02% percent
epoch= 20 	 loss= 1.4538356176133107 	 error=71.27% percent
epoch= 25 	 loss= 1.3638736767095532 	 error=68.03% percent
epoch= 30 	 loss= 1.4617069275700316 	 error=73.23% percent
