In [1]:
from tqdm import tqdm
import math
import numpy as np
import torch
import matplotlib.pyplot as plt

from sklearn.datasets import make_classification
from tsa import TSAScheduleCrossEntropy

import time

In [2]:
N_EPOCHS = 5
N_SAMPLES_TRAIN = 500000
N_SAMPLES_TEST = N_SAMPLES_TRAIN // 10
K_CLASSES = 3
batch_size = 512
N_STEPS = N_SAMPLES_TRAIN // batch_size
N_FEATURES = 150

x, y = make_classification(n_samples=N_SAMPLES_TRAIN+N_SAMPLES_TEST,
                           n_classes=K_CLASSES, n_informative=3, n_features=N_FEATURES)

In [3]:
x_train, x_test = x[:-N_SAMPLES_TEST], x[-N_SAMPLES_TEST:]
y_train, y_test = y[:-N_SAMPLES_TEST], y[-N_SAMPLES_TEST:]

In [4]:
net = torch.nn.Sequential(
    torch.nn.Linear(in_features=N_FEATURES, out_features=50),
    torch.nn.ReLU(),
    torch.nn.Linear(in_features=50, out_features=K_CLASSES))

optimizer = torch.optim.Adam(net.parameters())

criterion = torch.nn.CrossEntropyLoss()
tsa = TSAScheduleCrossEntropy(total_steps=N_EPOCHS * N_STEPS,
                              n_classes=K_CLASSES,
                              schedule_type='log')

In [5]:
for n_epoch in range(N_EPOCHS):

#     pg = tqdm(total=N_STEPS, desc=f'Epoch: {n_epoch}')
    
    train_loss = []
    
    for i in range(N_STEPS):
        
        optimizer.zero_grad()

        x_batch = torch.Tensor(x_train[i:i + batch_size])
        y_batch = torch.Tensor(y_train[i:i + batch_size]).long()
        
        pred = net(x_batch)
        
        correct_pred, correct_y = tsa(pred, y_batch)
        
        if correct_pred.shape[0] == 0:
            continue
        
        loss = criterion(correct_pred, correct_y)
        
        loss.backward()
        optimizer.step()
        
#         pg.update()
        
#         pg.set_postfix(loss=loss.item())
        
        train_loss.append(loss.item())
        
#         time.sleep(0.01)
        
        
    test_loss = []

    for i in range(N_SAMPLES_TEST // batch_size):

        x_batch = torch.Tensor(x_test[i:i + batch_size])
        y_batch = torch.Tensor(y_test[i:i + batch_size]).long()

        with torch.no_grad():
            pred = net(x_batch)

        loss = criterion(pred, y_batch)

        test_loss.append(loss.item())
        
    print(f'Train loss: {np.mean(train_loss):.3f} | Test loss: {np.mean(test_loss):.3f}')
        
# pg.close()

Train loss: 0.779 | Test loss: 1.054
Train loss: 0.332 | Test loss: 1.405
Train loss: 0.120 | Test loss: 1.500
Train loss: 0.042 | Test loss: 1.586
Train loss: 0.014 | Test loss: 1.658


In [6]:
net = torch.nn.Sequential(
    torch.nn.Linear(in_features=N_FEATURES, out_features=50),
    torch.nn.ReLU(),
    torch.nn.Linear(in_features=50, out_features=K_CLASSES))

optimizer = torch.optim.Adam(net.parameters())

criterion = torch.nn.CrossEntropyLoss()

In [7]:
for n_epoch in range(N_EPOCHS):
    
    train_loss = []
    
    for i in range(N_STEPS):
        
        optimizer.zero_grad()

        x_batch = torch.Tensor(x_train[i:i + batch_size])
        y_batch = torch.Tensor(y_train[i:i + batch_size]).long()
        
        pred = net(x_batch)
        
        loss = criterion(pred, y_batch)
        
        loss.backward()
        optimizer.step()
        
        train_loss.append(loss.item())
        
        
    test_loss = []

    for i in range(N_SAMPLES_TEST // batch_size):

        x_batch = torch.Tensor(x_test[i:i + batch_size])
        y_batch = torch.Tensor(y_test[i:i + batch_size]).long()

        with torch.no_grad():
            pred = net(x_batch)

        loss = criterion(pred, y_batch)

        test_loss.append(loss.item())
        
    print(f'Train loss: {np.mean(train_loss):.3f} | Test loss: {np.mean(test_loss):.3f}')

Train loss: 0.107 | Test loss: 1.863
Train loss: 0.011 | Test loss: 2.065
Train loss: 0.001 | Test loss: 2.247
Train loss: 0.000 | Test loss: 2.405
Train loss: 0.000 | Test loss: 2.549
