In [61]:
import pandas as pd
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
import torchvision
import pickle

In [62]:
device = torch.device('cuda')
device

device(type='cuda')

# Load Data

In [63]:
from torch.utils.data import Dataset


def binary(x, bits):
    mask = 2**torch.arange(bits).to(x.device, x.dtype)
    return x.unsqueeze(-1).bitwise_and(mask).ne(0).float()


class VariableLenDataset(Dataset):
    def __init__(self, data, targets):
        self.X = [torch.tensor(x, dtype=torch.float32) for x in data]
        self.y = torch.tensor(targets, dtype=torch.long)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [64]:
train_transform = transforms.Compose(
    [transforms.ToTensor()])

In [65]:
with open("data/train.pkl", "rb") as train_file:
    dataset = pickle.load(train_file)

In [66]:
df = pd.DataFrame(dataset)
df

Unnamed: 0,0,1
0,"[-1.0, -1.0, -1.0, -1.0, 144.0, 144.0, 144.0, ...",0
1,"[-1.0, -1.0, 144.0, 144.0, 34.0, 0.0, 4.0, 2.0...",0
2,"[66.0, 100.0, 148.0, 148.0, 146.0, 64.0, 146.0...",0
3,"[147.0, 65.0, 64.0, 146.0, 8.0, 50.0, 160.0, 5...",0
4,"[144.0, 144.0, 144.0, 144.0, 144.0, 64.0, 64.0...",0
...,...,...
2934,"[144.0, 144.0, 144.0, 66.0, 82.0, 132.0, 8.0, ...",4
2935,"[32.0, 33.0, 162.0, 121.0, 68.0, 68.0, 120.0, ...",4
2936,"[33.0, 33.0, 33.0, 33.0, 78.0, 2.0, 68.0, 34.0...",4
2937,"[96.0, 96.0, 96.0, 96.0, 176.0, 176.0, 176.0, ...",4


In [67]:
train_dataset = VariableLenDataset(df[0].values, df[1].values)

In [68]:
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence

pad = -1000

def pad_collate(batch, pad_value=0):
    xx, yy = zip(*batch)
    x_lens = [len(x) if x.dim() > 0 else 0 for x in xx]
    y_lens = [len(y) if y.dim() > 0 else 0 for y in yy]

    xx = [x.unsqueeze(0) if x.dim() == 0 else x for x in xx]
    yy = [y.unsqueeze(0) if y.dim() == 0 else y for y in yy]

    xx_pad = pad_sequence(xx, batch_first=False, padding_value=pad_value)
    yy_pad = pad_sequence(yy, batch_first=False, padding_value=pad_value)

    return xx_pad, yy_pad, x_lens, y_lens

In [69]:
train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True, collate_fn=pad_collate)

In [70]:
for a, b, c, d in train_loader:
    print(a)
    print(b)
    break

tensor([[ -1.,  32.,  -1.,  ...,  74.,  64.,  -1.],
        [ -1.,  32.,  -1.,  ...,  60., 131.,  -1.],
        [ -1.,  45., 144.,  ...,  78.,  34.,  -1.],
        ...,
        [  0.,   0.,   0.,  ...,   0.,   0.,   0.],
        [  0.,   0.,   0.,  ...,   0.,   0.,   0.],
        [  0.,   0.,   0.,  ...,   0.,   0.,   0.]])
tensor([[0, 2, 0, 0, 1, 0, 1, 0, 0, 2, 1, 3, 3, 0, 3, 4, 4, 1, 2, 0, 4, 0, 3, 0,
         0, 0, 0, 3, 1, 0, 1, 2, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 4,
         0, 0, 0, 0, 1, 0, 3, 1, 0, 0, 4, 4, 3, 0, 1, 1, 1, 3, 3, 0, 0, 0, 3, 4,
         3, 0, 0, 0, 0, 0, 1, 0, 3, 0, 3, 2, 2, 2, 3, 2, 0, 0, 3, 0, 0, 0, 0, 3,
         1, 0, 0, 0, 3, 0, 0, 2, 0, 0, 0, 2, 2, 3, 4, 4, 3, 0, 3, 3, 0, 0, 0, 0,
         0, 0, 0, 3, 0, 0, 0, 0, 1, 0, 1, 0, 3, 4, 1, 1, 0, 0, 1, 0, 3, 0, 0, 0,
         4, 0, 1, 2, 0, 3, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 3, 4, 0, 4, 0, 0, 3,
         1, 0, 0, 1, 0, 2, 4, 0, 4, 4, 0, 2, 0, 4, 0, 3, 0, 3, 1, 0, 0, 0, 0, 0,
         3, 2, 0, 1, 0, 0,

# Model

In [71]:
class ComposerClasifier(nn.Module):

    def __init__(self, input_size, hidden_size, num_layers, out_size, bidirectional = False):
        super().__init__()
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        if bidirectional:
            self.bidirectional = 2
        else:
            self.bidirectional = 1
        self.lstm = nn.LSTM(input_size = input_size, hidden_size = hidden_size, num_layers = num_layers, bidirectional=bidirectional, dropout=0.4)
        self.fc = nn.Linear(hidden_size, out_size)

    def init_hidden(self, batch_size):
        hidden = torch.zeros(self.num_layers*self.bidirectional , batch_size, self.hidden_size)
        state = torch.zeros(self.num_layers*self.bidirectional , batch_size, self.hidden_size)
        return hidden, state

    def forward(self, x):
        all_outputs, (hidden, cn) = self.lstm(x)
        # out = torch.flatten(all_outputs,1)
        # print(all_outputs.shape)
        x = self.fc(hidden[-1])
        return x, hidden


In [72]:
hidden_dim = 6
layer_dim = 8
input_dim = 1
out_dim = df[1].nunique()
model = ComposerClasifier(input_dim, hidden_dim, layer_dim, out_dim)
model.to(device)

ComposerClasifier(
  (lstm): LSTM(1, 6, num_layers=8, dropout=0.4)
  (fc): Linear(in_features=6, out_features=5, bias=True)
)

In [73]:
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)
loss_fun = nn.CrossEntropyLoss(torch.tensor([1.0, 4.0, 4.0, 8.0, 9.0]).to(device))

In [74]:
# Training loop
for epoch in range(101):
    loss_sum = 0
    for x, targets, x_lens, target_lens in train_loader:
        x = x.to(device).unsqueeze(2)
        x = torch.transpose(x,0,1)
        targets = torch.transpose(targets,0,1)
        targets = targets.to(device).squeeze(1)

        x = pack_padded_sequence(x, x_lens, batch_first=True, enforce_sorted=False)
        preds, _ = model(x)
        preds = preds.squeeze(1)
        optimizer.zero_grad()
        loss = loss_fun(preds, targets)
        loss.backward()
        loss_sum += loss.item()
        optimizer.step()
    if epoch % 10 == 0 or epoch < 10:
        print(f"Epoch: {epoch}, loss: {loss_sum:.3}")

Epoch: 0, loss: 19.8
Epoch: 1, loss: 19.5


In [15]:
with torch.no_grad():
    for x, targets, x_len, target_len in train_loader:
        x = x.to(device).unsqueeze(2)
        targets = targets.to(device)
        hidden, state = model.init_hidden(x.shape[0])
        hidden, state = hidden.to(device), state.to(device)

#         x = torch.transpose(x, 0, 1)
#         preds, _ = model(x, (hidden, state))
#         preds = torch.transpose(preds, 0, 1)

        # x_packed = pack_padded_sequence(x, x_len, batch_first=True, enforce_sorted=False)
        preds, _ = model(x, (hidden, state))
        # preds, pred_len = pad_packed_sequence(preds_packed, batch_first=True, padding_value=pad)

        # preds = preds.squeeze(2)
        mask_tgt = targets != pad
        print(targets)
        print(preds)
        # print(torch.abs(preds[mask_tgt] - targets[mask_tgt]).mean())
        print()

tensor([[0, 3, 0, 1, 0, 3, 0, 0, 0, 3, 0, 1, 0, 4, 1, 0, 0, 3, 0, 2, 3, 4, 0, 0,
         3, 0, 3, 0, 0, 0, 1, 0, 0, 0, 0, 0, 3, 0, 0, 3, 3, 3, 3, 4, 1, 0, 0, 1,
         1, 0]], device='cuda:0')
tensor([[ 1.2212,  2.1442,  1.2251, -2.2661, -2.6063],
        [ 1.2902,  2.2527,  1.2693, -2.4418, -2.7908],
        [ 1.2221,  2.1997,  1.2457, -2.3306, -2.6878],
        ...,
        [ 1.2807,  2.2434,  1.2712, -2.4408, -2.7784],
        [ 1.2732,  2.2447,  1.2673, -2.4199, -2.7724],
        [ 1.2681,  2.2347,  1.2615, -2.4113, -2.7602]], device='cuda:0')

tensor([[0, 0, 0, 3, 3, 3, 3, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 3, 4, 0, 3, 1, 0, 0,
         3, 1, 0, 4, 0, 0, 0, 1, 3, 0, 0, 2, 3, 3, 0, 1, 1, 4, 0, 2, 0, 0, 0, 4,
         1, 0]], device='cuda:0')
tensor([[ 1.2665,  2.2231,  1.2599, -2.4038, -2.7439],
        [ 1.2793,  2.2325,  1.2610, -2.4200, -2.7629],
        [ 1.2851,  2.2492,  1.2702, -2.4356, -2.7830],
        ...,
        [ 1.2855,  2.2524,  1.2729, -2.4454, -2.7897],
        [ 1.2