In [231]:
import torch
import torch.nn as nn
import torch.optim as optimizer
import numpy as np

from torch.utils.data import Dataset, DataLoader

In [232]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [233]:
# custom dataset class
data = [
    [0, 1, 2, 3],
    [4, 5, 6, 7],
    [8, 9, 10, 11]
]

labels = [0, 1, 0]

data, labels = torch.tensor(data, dtype=torch.long), torch.tensor(labels, dtype=torch.long)

class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels
    def __len__(self):
        return len(self.data)
    def __getitem__(self, index):
        return self.data[index].to(device), self.labels[index].to(device)

dataset = CustomDataset(data, labels)
dataloader = DataLoader(dataset, batch_size=2, shuffle=True)

In [234]:
# rnn model class
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(RNN, self).__init__()
        self.hidden = hidden_size
        self.RNN = nn.RNN(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)
    def forward(self, x, h0=None):
        if h0 == None:
            h0 = torch.zeros(1, x.size(0), self.hidden).to(x.device)
        out, _ = self.RNN(x, h0)
        out = self.fc(out[:, -1, :])
        return out

In [235]:
# define the model
model = RNN(12, 8, 2)
criterion = nn.CrossEntropyLoss()
optim = optimizer.Adam(model.parameters(), lr=0.01)

model.to(device)

RuntimeError: CUDA error: unspecified launch failure
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [None]:
# training
for epoch in range(5):
    epoch_losses = 0
    for batch_data, batch_labels in dataloader:
        batch_data = torch.nn.functional.one_hot(batch_data, num_classes=12).float()
        preds = model(batch_data)
        losses = criterion(preds, batch_labels)
        epoch_losses += losses.item()
        optim.zero_grad()
        losses.backward()
        optim.step()
    print(f"Epoch {epoch}, losses: {epoch_losses}")

Epoch 0, losses: 1.2585248351097107
Epoch 1, losses: 1.1206048130989075
Epoch 2, losses: 0.9930064678192139
Epoch 3, losses: 0.8816869556903839
Epoch 4, losses: 0.9785471260547638


In [222]:
with torch.no_grad():
    for data, label in zip(data, labels):
        data = torch.nn.functional.one_hot(data, num_classes=12).float()
        data = data.unsqueeze(0)
        data = data.to(device)
        preds = model(data)
        prediction = torch.argmax(preds, 1).item()
        print(f"input: {data}, model pred: {prediction} and answer: {label}")

input: tensor([[[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.]]], device='cuda:0'), model pred: 0 and answer: 0
input: tensor([[[0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.]]], device='cuda:0'), model pred: 1 and answer: 1
input: tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]]], device='cuda:0'), model pred: 0 and answer: 0


In [223]:
for i in data:
    print(i)

tensor([[0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]], device='cuda:0')


In [229]:
print(torch.randn(3, 4))

tensor([[-0.3494, -0.9174, -0.1603, -0.1243],
        [ 0.7697,  0.2627, -0.6702, -0.0033],
        [ 0.1780,  0.4627,  0.1028, -1.2646]])


In [228]:
print(torch.randn(3, 4).unsqueeze(0))

tensor([[[ 0.5067, -0.5689,  0.7198,  0.2690],
         [-0.4862, -0.3179, -1.6379, -0.4124],
         [-1.1420,  0.9365, -0.7519,  0.5378]]])


In [230]:
print(torch.rand(3, 4).unsqueeze(2))

tensor([[[0.0317],
         [0.1701],
         [0.0448],
         [0.8054]],

        [[0.9560],
         [0.9487],
         [0.9379],
         [0.9601]],

        [[0.5440],
         [0.0466],
         [0.8501],
         [0.2512]]])


In [226]:
for data, labels in dataloader:
    print(torch.nn.functional.one_hot(data, num_classes=12))

tensor([[[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
         [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]],

        [[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0]]], device='cuda:0')
tensor([[[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]]], device='cuda:0')


In [224]:
for data, labels in dataloader:
    print(torch.nn.functional.one_hot(data, num_classes=12).float().unsqueeze(0))

tensor([[[[0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
          [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
          [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]],

         [[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
          [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
          [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.]]]], device='cuda:0')
tensor([[[[0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.]]]], device='cuda:0')


In [225]:
# evaluation
with torch.no_grad():
    for data, label in zip(data, labels):
        data = torch.nn.functional.one_hot(data, num_classes=12).float()
        data = data.unsqueeze(0)
        preds = model(data)
        prediction = torch.argmax(preds, 1).item()
        print(f"input: {data}, model pred: {prediction} and answer: {label}")

input: tensor([[[0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.]]], device='cuda:0'), model pred: 1 and answer: 1
