In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
from torch import nn, optim
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from sklearn import preprocessing

In [2]:
directory = '/home/roboto/Documents/GitHub/tutorials/data/FordA'

In [3]:
class SeriesDataset(Dataset):
    def __init__(self, dataset_array):
        super(SeriesDataset, self).__init__()
        self.generate_samples(dataset_array)
        
    def generate_samples(self, dataset_array):
        classes = torch.tensor(np.vectorize((lambda x: 0 if x == -1 else 1))(dataset_array[:, 0]), dtype = torch.long)
        series  = torch.tensor(self.normalize(dataset_array[:, 1:]), dtype = torch.float)
        series_length = series.shape[1]
        series = series.reshape(-1, series_length, 1)
        self.samples = list(zip(series, classes))
        
    def normalize(self, array):
        mean = np.expand_dims(np.mean(array, 1), 1)
        var = np.expand_dims(np.var(array, 1), 1)
        array = (array - mean)/var

        return array
        
    def __len__(self):
        return len(self.samples)
    
    def __getitem__(self, idx):
        return self.samples[idx]
               

In [5]:
train_dataset = SeriesDataset(np.loadtxt(directory+'/FordA_TRAIN.txt'))[:500]
# test_dataset  = SeriesDataset(np.loadtxt(directory+'/FordA_TEST.txt'))[:200]

In [6]:
batch_size = 100
train_dataloader = DataLoader(train_dataset, shuffle = True, batch_size = batch_size)
# test_dataloader  = DataLoader(test_dataset, shuffle = True, batch_size = batch_size)

In [7]:
sample = next(iter(train_dataloader))

In [8]:
print(sample[0].shape, sample[1].shape)

torch.Size([100, 500, 1]) torch.Size([100])


In [9]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, n_classes):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.n_layers = num_layers
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first = True)
        self.output = nn.Sequential(nn.Linear(hidden_size, n_classes),
                                    nn.Softmax(dim = 1))
                            
        
    def forward(self, x, hidden):
        out, hidden = self.rnn(x, hidden)
        out = hidden[-1]
        out = self.output(out)
#         out = out.argmax(1).unsqueeze(1)
        return out, hidden

In [10]:
len(train_dataloader.dataset)

500

In [11]:
def train_rnn(model, dataloader, loss_function, optimizer, epochs, scheduler = None):
    epoch_losses = []
    epoch_accuracy = []
    mean = lambda xs : sum(xs)/len(xs) 
    acc  = lambda xs : sum(xs)/len(dataloader.dataset)
    for i in range(1, epochs+1):
        temp_losses = []
        temp_accuracy = []
        for sample in dataloader: 
                optimizer.zero_grad()
                hidden = None
                
                inputs = sample[0]
                targets = sample[1]
                output, hidden = model(inputs, hidden)
                _, pred = torch.max(output, 1)
                loss = loss_function(output, targets)
                
                temp_accuracy.append(torch.sum(pred == targets.data, dtype = float))
                temp_losses.append(loss.item())
#                 print(loss)
                loss.backward(retain_graph = True)
                optimizer.step()
                
        if scheduler != None:
            scheduler.step()
            
        epoch_losses.append(mean(temp_losses))
        epoch_accuracy.append(acc(temp_accuracy))
        if i%10 == 0:
            print('Epoch: {}/{}.............'.format(i, epochs), end=' ')
            print("Loss: {:.4f} ; Accuracy: {:.4f}".format(epoch_losses[-1], epoch_accuracy[-1]))
    
    return epoch_losses

In [14]:
model = RNN(1, 50, 10, 2)
# input_size (number of features) : 1 (temperature)
# hidden_size : 10
# num_layers : ??
criterion = nn.CrossEntropyLoss()
lr = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr = lr, momentum = 0.9)
# scheduler = lr_scheduler.StepLR(optimizer, step_size = 50, gamma=0.5)
epochs = 125

In [None]:
losses = train_rnn(model, train_dataloader, criterion, optimizer, epochs)