# Training Playground

In [1]:
import torch
import torch.nn as nn

from constants import *
from os.path import join
from torch.utils.data import DataLoader
from messenger_dataset import MessengerDataset
from tqdm import tqdm

In [14]:
LEARNING_RATE = 1e-3
BATCH_SIZE = 4096
EPOCHS = 5

In [3]:
torch.cuda.device_count()

2

In [4]:
device = "cuda:1" if torch.cuda.is_available() else "cpu"
# device = "cpu"
print(f"Device: {device}")

Device: cuda:1


In [5]:
ds = MessengerDataset(join(MERGED_DIR, TRAIN_FILE), window_size=7)
dl = DataLoader(ds, batch_size=BATCH_SIZE)

  0%|          | 0/6 [00:00<?, ?it/s]

>>> Data loaded.


100%|██████████| 6/6 [00:53<00:00,  8.85s/it]


>>> Normalization complete.


In [6]:
print(ds[43457])

(tensor([[[-0.5304,  0.9792,  0.1287],
         [-1.0022, -0.6590,  0.0381],
         [ 0.9120,  0.7436, -1.0008],
         [ 0.3739, -0.0550, -0.0728],
         [-0.1141,  0.2617,  0.2578],
         [ 0.0132,  0.0026,  0.1002]],

        [[-0.5304,  0.9792,  0.1287],
         [-1.0022, -0.6590,  0.0381],
         [ 0.9120,  0.7437, -1.0007],
         [ 0.3758, -0.0517, -0.0567],
         [ 0.0757,  0.0641,  0.4429],
         [ 0.0132,  0.0026,  0.1002]],

        [[-0.5304,  0.9792,  0.1287],
         [-1.0022, -0.6590,  0.0381],
         [ 0.9120,  0.7437, -1.0007],
         [ 0.3708, -0.0122, -0.0698],
         [-0.0347,  0.4038,  0.4409],
         [ 0.0132,  0.0026,  0.1002]],

        [[-0.5304,  0.9792,  0.1287],
         [-1.0022, -0.6590,  0.0381],
         [ 0.9121,  0.7438, -1.0007],
         [ 0.3862, -0.0220, -0.0767],
         [-0.2242, -0.0250, -0.0880],
         [ 0.0132,  0.0026,  0.1002]],

        [[-0.5304,  0.9792,  0.1287],
         [-1.0022, -0.6590,  0.0381],
   

In [7]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten(-2, -1)
        self.linear1 = nn.Linear(3 * len(COLS_3D), 9)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(9, 5)

    def forward(self, x):
        x = self.flatten(x)
        x = self.linear1(x)
        x = self.relu(x)
        x = self.linear2(x)
        return x


net = Net()
net.to(device)
net.double()  # TODO maybe rather convert input to float
print(net)

Net(
  (flatten): Flatten(start_dim=-2, end_dim=-1)
  (linear1): Linear(in_features=18, out_features=9, bias=True)
  (relu): ReLU()
  (linear2): Linear(in_features=9, out_features=5, bias=True)
)


In [8]:
print(next(net.parameters()).is_cuda)

True


In [9]:
sample, label = ds[16604]

print(sample)
print(label)

tensor([[[-0.5076,  0.9924,  0.1277],
         [-1.0211, -0.6348,  0.0418],
         [ 0.7661,  0.9682,  0.7297],
         [-0.5027,  0.3149,  0.2412],
         [ 0.9384, -0.0698,  1.1794],
         [ 0.0297,  0.0254,  0.1379]],

        [[-0.5076,  0.9924,  0.1277],
         [-1.0211, -0.6348,  0.0418],
         [ 0.7660,  0.9681,  0.7298],
         [-0.5610,  0.2096,  0.1142],
         [ 0.9294,  1.4560,  0.5762],
         [ 0.0297,  0.0254,  0.1379]],

        [[-0.5076,  0.9924,  0.1277],
         [-1.0211, -0.6348,  0.0418],
         [ 0.7659,  0.9681,  0.7300],
         [-0.4673,  0.1037,  0.1492],
         [ 0.3662,  0.6644,  0.5978],
         [ 0.0297,  0.0254,  0.1380]],

        [[-0.5076,  0.9924,  0.1277],
         [-1.0211, -0.6348,  0.0418],
         [ 0.7658,  0.9680,  0.7302],
         [-0.4267,  0.0605,  0.2339],
         [ 0.2587, -0.1269,  0.0420],
         [ 0.0297,  0.0254,  0.1380]],

        [[-0.5076,  0.9924,  0.1277],
         [-1.0211, -0.6348,  0.0418],
    

In [10]:
sample = sample.to(device)
out = net(sample)
print(out)
print(out.shape)

tensor([[ 0.6064, -0.3969,  0.0175,  0.0672, -0.1668],
        [ 0.6293, -0.4663, -0.0184,  0.0745, -0.0751],
        [ 0.5899, -0.3653,  0.0762,  0.1257, -0.1497],
        [ 0.5208, -0.2739,  0.2015,  0.1516, -0.2326],
        [ 0.6151, -0.3746,  0.0707,  0.1307, -0.1426],
        [ 0.6298, -0.4497,  0.0253,  0.1143, -0.0351],
        [ 0.6633, -0.4082,  0.0469,  0.1397, -0.1129]], device='cuda:1',
       dtype=torch.float64, grad_fn=<AddmmBackward>)
torch.Size([7, 5])


In [11]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=LEARNING_RATE)

In [12]:
sum(p.numel() for p in net.parameters())

221

In [None]:
for epoch in range(EPOCHS):
    print(f"Epoch {epoch + 1}/{EPOCHS}\n-------------------------------")
    size = len(dl)
    for batch, (X, y) in enumerate(tqdm(dl)):
        X = X.to(device)
        y = y.to(device)
        pred = net(X)
        pred = pred.permute(0, 2, 1)
        loss = criterion(pred, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 10 == 0:
            loss, current = loss.item(), batch
            # print(f"loss: {loss:>7f}  [{current:>10d}/{size:>5d}]")

print("DONE.")

  0%|          | 0/1595611 [00:00<?, ?it/s]

Epoch 1/5
-------------------------------


  0%|          | 5977/1595611 [31:20<137:50:16,  3.20it/s]

In [None]:
torch.save(net, "model.pth")

In [None]:
pred = torch.tensor([[0., 1], [1, 0]])
target = torch.tensor([1, 0])
print(pred)
print(target)
criterion(pred, target)