In [1]:
import math

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torch.utils.tensorboard import SummaryWriter

writer = SummaryWriter()
num_epochs = 10 ** 10
batch_size = 32
path = "./cifar10_model.pt"

class EarlyStopping():
    def __init__(self, patience=2, save_path=path):
        self.patience = patience
        self.save_path = save_path
        self.min_loss = float("inf")
        self.count = 0
    
    def should_stop(self, model, loss):
        if loss < self.min_loss:
            self.min_loss = loss
            self.count = 0
            torch.save(model.state_dict(), self.save_path)
        elif loss > self.min_loss:
            self.count += 1
            if self.count >= self.patience:
                return True
        return False

    def load(self, model):
        model.load_state_dict(torch.load(self.save_path))
early_stopper = EarlyStopping(patience=3)

train_data = datasets.CIFAR10("./", download=True, train=True, transform=transforms.ToTensor())
test_data = datasets.CIFAR10("./", download=True, train=False, transform=transforms.ToTensor())
train = DataLoader(train_data, batch_size=batch_size, shuffle=True)
test = DataLoader(test_data, batch_size=batch_size, shuffle=False)

def lecun_normal_(tensor):
    input_size = tensor.shape[-1]
    std = math.sqrt(1 / input_size)
    with torch.no_grad():
        tensor.normal_(-std, std)

class MyModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.input = self.he_initialize(nn.Linear(3 * 32 * 32, 100))
        self.linear_layers = nn.ModuleList([self.he_initialize(nn.Linear(100, 100), nonlinearity="linear") for _ in range(19)])
        self.alpha_dropouts = nn.ModuleList([nn.AlphaDropout() for _ in range(19)])
        self.output = self.he_initialize(nn.Linear(100, 10))
    
    def he_initialize(self, layer, nonlinearity="leaky_relu"):
        nn.init.kaiming_normal_(layer.weight, nonlinearity=nonlinearity)
        nn.init.zeros_(layer.bias)
        return layer
    
    #apply BN before activatioion function, excepts output layer
    def forward(self, x):
        x = self.flatten(x)
        x = F.elu(self.input(x))
        #for linear in self.linear_layers:
        for linear, dropout in zip(self.linear_layers, self.alpha_dropouts):
            x = F.selu(linear(x))
            x = dropout(x)
        x = self.output(x)
        return x
model = MyModel().cuda()
criterion = nn.CrossEntropyLoss()
optimizer = optim.NAdam(model.parameters())
scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.01, steps_per_epoch=len(train), epochs=num_epochs)

'''
print(f"Train on {len(train_data)}, test on {len(test_data)} samples")
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    accuracy = 0
    for datas, labels in train:
        datas = datas.cuda()
        labels = labels.cuda()
        result = model(datas)
        loss = criterion(result, labels)
        correct = torch.sum(result.argmax(dim=1) == labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        accuracy += correct.item()
        total_loss += loss.item()
    writer.add_scalar("Loss/train", total_loss, epoch)
    writer.add_scalar("Accuracy/train", total_loss, epoch)
    print(f"Epoch {epoch + 1}, loss: {total_loss / len(train)}, accuracy: {accuracy / len(train_data)}")
    if early_stopper.should_stop(model, total_loss):
        print(f"EarlyStopping: [Epoch: {epoch - early_stopper.count}]")
        break
'''
writer.close()
early_stopper.load(model)

'''
with torch.no_grad():
    model.eval()
    total_loss = 0
    accuracy = 0
    for datas, labels in test:
        datas = datas.cuda()
        labels = labels.cuda()
        result = model(datas)
        loss = criterion(result, labels)
        correct = torch.sum(result.argmax(dim=1) == labels)
        accuracy += correct.item()
        total_loss += loss.item()
    print(f"test_loss: {total_loss / len(test)}, test_accuracy: {accuracy / len(test_data)}")
'''

with torch.no_grad():
    model.train()

    result = model(test_data[0][0].unsqueeze(0).cuda())
    stacked = torch.stack([F.softmax(model(test_data[0][0].unsqueeze(0).cuda()), dim=-1) for _ in range(100)])
    print(stacked.size())
    print(stacked.mean(dim=0))
    print(test_data[0][1])


Files already downloaded and verified
Files already downloaded and verified
torch.Size([100, 1, 10])
tensor([[0.1002, 0.1055, 0.0962, 0.1043, 0.1052, 0.0951, 0.0942, 0.0967, 0.1161,
         0.0865]], device='cuda:0')
3
