In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import Dataset, DataLoader, Subset

In [85]:
class TimeSeriesDataset(Dataset):
    def __init__(self, df):
        self.data = df.groupby('id').apply(lambda x: x.drop(['supplier', 'id'], axis=1).values)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return torch.tensor(self.data[idx], dtype=torch.float)

def pad_collate(batch):
    return pad_sequence(batch, batch_first=True, padding_value=0)

In [167]:
class Seq2SeqAutoencoder(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers):
        super(Seq2SeqAutoencoder, self).__init__()
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers

        self.encoder = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.decoder = nn.LSTM(hidden_dim, input_dim, num_layers, batch_first=True)

    def forward(self, x):
        # Encode
        _, (hidden, _) = self.encoder(x)

        # Repeat hidden state for each time step in the sequence
        hidden = hidden.repeat(x.shape[1], 1, 1).permute(1, 0, 2)

        # Decode
        output, _ = self.decoder(hidden)

        return output

In [177]:
data = pd.read_csv('../data/final_data/data.csv')
dataset = TimeSeriesDataset(data)
dataloader = DataLoader(dataset, batch_size=8, collate_fn=pad_collate, shuffle=False)

In [200]:
model = Seq2SeqAutoencoder(5, 10, 1)

In [215]:
# define loss function and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

In [216]:
# training loop
for epoch in range(500):
    for sequences in dataloader:
        output = model(sequences)
        loss = criterion(output, sequences)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    # print(loss.item())

KeyboardInterrupt: 