# Training Playground

In [2]:
import torch
import torch.nn as nn
import utils

from constants import *
from os.path import join
from torch.utils.data import DataLoader
from messenger_dataset import ClassificationDataset

from models import BaseNet

Hyperparameters

In [2]:
EPOCHS = 5
BATCH_SIZE = 4096
LEARNING_RATE = 1e-3
NUM_WORKERS = 8

WINDOW_SIZE = 7

Choose device

In [8]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Device: {device}", f"#GPUs: {torch.cuda.device_count()}", sep="\n")

Device: cuda
#GPUs: 8


Ensure reproducability

In [5]:
utils.apply_global_seed(42)

Load training data

In [6]:
ds = ClassificationDataset(join(MERGED_DIR, TRAIN_FILE), window_size=WINDOW_SIZE) #, partial=True)
train_ds, test_ds = ds.split(0.2)

In [7]:
print(len(train_ds))
print(len(test_ds))

79337823
19770192


In [8]:
train_dl = DataLoader(train_ds, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, pin_memory=True)
test_dl = DataLoader(test_ds, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, pin_memory=True)

Prepare model

In [12]:
model = BaseNet()
if device != "cpu" and torch.cuda.device_count() > 1:
    model = nn.DataParallel(model)
model.to(device)
# net.double()  # TODO maybe rather convert input to float
print(model)

DataParallel(
  (module): BaseNet(
    (flatten): Flatten(start_dim=-2, end_dim=-1)
    (linear1): Linear(in_features=6, out_features=9, bias=True)
    (relu): ReLU()
    (linear2): Linear(in_features=9, out_features=5, bias=True)
  )
)


Check device the model resides on

In [13]:
print(next(model.parameters()).is_cuda)

True


View a sample

In [14]:
sample, label = ds[16604]

print(sample)
print(label)

tensor([[[-0.5459,  0.3407,  0.2613],
         [ 0.9859,  1.2614,  0.0596]],

        [[-0.6090,  0.2266,  0.1237],
         [ 0.9858,  1.2613,  0.0598]],

        [[-0.5075,  0.1119,  0.1617],
         [ 0.9857,  1.2612,  0.0601]],

        [[-0.4636,  0.0651,  0.2534],
         [ 0.9856,  1.2611,  0.0603]],

        [[-0.5077, -0.0035,  0.3700],
         [ 0.9855,  1.2610,  0.0605]],

        [[-0.4241,  0.0400,  0.5428],
         [ 0.9854,  1.2609,  0.0608]],

        [[-0.4270,  0.2957,  0.5386],
         [ 0.9853,  1.2608,  0.0610]]])
tensor([0, 0, 0, 1, 1, 1, 1])


In [15]:
sample = sample.to(device) 
with torch.no_grad():
    out = model(sample)
print(out)
print(out.shape)

tensor([[ 0.2852, -0.3059,  0.4336, -0.2418, -0.2868],
        [ 0.2949, -0.3099,  0.4685, -0.2361, -0.2803],
        [ 0.2538, -0.2719,  0.4209, -0.2696, -0.2576],
        [ 0.2234, -0.2481,  0.3780, -0.2918, -0.2490],
        [ 0.1934, -0.2354,  0.3466, -0.3161, -0.2417],
        [ 0.1602, -0.2164,  0.2914, -0.3436, -0.2344],
        [ 0.2138, -0.2511,  0.3203, -0.2917, -0.2737]], device='cuda:0')
torch.Size([7, 5])


Define optimization evironment

In [16]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

Count model parameters

In [17]:
sum(p.numel() for p in model.parameters())

113

Training loop

In [18]:
for epoch in range(EPOCHS):
    print(f"Epoch {epoch + 1}/{EPOCHS}\n-------------------------------")
    size = len(train_dl)

    for batch, (X, y) in enumerate(train_dl):
        X = X.to(device)
        y = y.to(device)
  
        pred = model(X)
        pred = pred.permute(0, 2, 1) # rather add to model definition, compare permute and transpose
        loss = criterion(pred, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 10 == 0:
            loss = loss.item()
            print(f"loss: {loss:>7f}  [{batch:>10d}/{size:>5d}]")

print("DONE.")

Epoch 1/5
-------------------------------
loss: 1.552299  [         0/19370]
loss: 1.416142  [        10/19370]
loss: 1.439756  [        20/19370]
loss: 1.376310  [        30/19370]
loss: 1.358946  [        40/19370]
loss: 1.715465  [        50/19370]
loss: 1.256953  [        60/19370]
loss: 1.282374  [        70/19370]
loss: 1.121977  [        80/19370]
loss: 1.514401  [        90/19370]
loss: 1.166446  [       100/19370]
loss: 1.243490  [       110/19370]
loss: 1.103608  [       120/19370]
loss: 1.102670  [       130/19370]
loss: 1.635991  [       140/19370]
loss: 0.881132  [       150/19370]
loss: 1.378297  [       160/19370]
loss: 1.839020  [       170/19370]
loss: 1.821632  [       180/19370]
loss: 0.805479  [       190/19370]
loss: 0.873678  [       200/19370]
loss: 1.832850  [       210/19370]
loss: 0.767278  [       220/19370]
loss: 0.623160  [       230/19370]
loss: 0.479124  [       240/19370]
loss: 0.595882  [       250/19370]
loss: 1.112457  [       260/19370]
loss: 0.50531

Save the model

In [19]:
torch.save(model, "models/model.pth")

In [20]:
pred = torch.tensor([[0, 1, 0, 0], [0, 0, 1, 0]], dtype=torch.float)
target = torch.tensor([1, 3])
print(pred)
print(target)
criterion(pred, target)

tensor([[0., 1., 0., 0.],
        [0., 0., 1., 0.]])
tensor([1, 3])


tensor(1.2437)

In [21]:
x, y = test_ds[98347]

In [22]:
print(x)

tensor([[[-0.1080, -0.1275,  0.2128],
         [ 0.6720, -0.1290, -2.4018]],

        [[-0.1115, -0.1231,  0.2152],
         [ 0.6721, -0.1289, -2.4017]],

        [[-0.1053, -0.1213,  0.2201],
         [ 0.6722, -0.1289, -2.4017]],

        [[-0.1068, -0.1193,  0.2218],
         [ 0.6723, -0.1289, -2.4017]],

        [[-0.1054, -0.1154,  0.2241],
         [ 0.6724, -0.1289, -2.4017]],

        [[-0.1064, -0.1122,  0.2272],
         [ 0.6725, -0.1288, -2.4017]],

        [[-0.1059, -0.1176,  0.2247],
         [ 0.6726, -0.1288, -2.4017]]])


In [23]:
print(y)

tensor([0, 0, 0, 0, 0, 0, 0])


In [24]:
pred = torch.nn.functional.softmax(model(x), dim=1)
pred

tensor([[9.9998e-01, 2.4854e-05, 2.1472e-08, 2.7317e-19, 9.5700e-28],
        [9.9998e-01, 2.4835e-05, 2.1501e-08, 2.7690e-19, 9.4915e-28],
        [9.9998e-01, 2.4855e-05, 2.1496e-08, 2.7544e-19, 9.5085e-28],
        [9.9998e-01, 2.4849e-05, 2.1517e-08, 2.7757e-19, 9.4724e-28],
        [9.9998e-01, 2.4819e-05, 2.1476e-08, 2.7679e-19, 9.4032e-28],
        [9.9998e-01, 2.4816e-05, 2.1504e-08, 2.7943e-19, 9.3598e-28],
        [9.9998e-01, 2.4834e-05, 2.1507e-08, 2.7822e-19, 9.4219e-28]],
       device='cuda:0', grad_fn=<SoftmaxBackward>)

In [25]:
torch.argmax(pred, dim=1)

tensor([0, 0, 0, 0, 0, 0, 0], device='cuda:0')