In [1]:
import numpy as np
import os
import gc

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split

import wandb

import tarfile
import pickle
from collections import Counter

import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchvision
import torchvision.transforms as transforms

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [3]:
# import requests

# import urllib

# testfile = urllib.URLopener()
# testfile.retrieve("https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz", "cifar-100-python.tar.gz")

In [3]:

with tarfile.open("cifar-100-python.tar.gz", 'r|gz') as f:
        f.extractall(path="./")
        f.close()

# Extract cifar-100-python.tar.gz to separate data

In [4]:
data = pickle.load(open(os.path.join("cifar-100-python", "train"), 'rb'), encoding='latin1')
X_train_np = data['data']
y_train_np = np.asarray(data['fine_labels'], np.int8)
y_super_train_np = np.asarray(data['coarse_labels'], np.int8)
data = pickle.load(open(os.path.join('cifar-100-python', 'test'), 'rb'), encoding='latin1')
X_test_np = data['data']
y_test_np = np.asarray(data['fine_labels'], np.int8)
y_super_test_np = np.asarray(data['coarse_labels'], np.int8)

In [5]:
# reshape
X_train_np = X_train_np.reshape(-1, 3, 32, 32)
X_test_np = X_test_np.reshape(-1, 3, 32, 32)


In [7]:
# X_train_np.shape
# sns.countplot(y_train_np)

x_max,y_max = 3,3
offset = 0

# f, axarr = plt.subplots(y_max,x_max)
# for y in range(y_max):
#     for x in range(x_max):
#         axarr[y,x].imshow(X_train_np[y*y_max+x+offset])
# print(y_train[i])
# plt.imshow(np.rollaxis(X_train[i], 0, 3))

In [6]:
mean = X_train_np.mean(axis=(0,1,2), keepdims=True).astype(np.float32)
std = X_train_np.std(axis=(0,1,2), keepdims=True).astype(np.float32)

X_train_np = (X_train_np - mean) / std
X_test_np = (X_test_np - mean) / std

#TODO POROVNAT S -MEAN)/MAX

In [7]:
def train_test_split_v2(X_train,y_train,test_size,random_state=None,sample_limit=None):
    total_count = len(X_train)
    if sample_limit:
        _, X_train, _, y_train = train_test_split(X_train, y_train, test_size=1/(total_count/sample_limit), random_state=random_state)
    return train_test_split(X_train, y_train, test_size=test_size, random_state=random_state)

In [8]:
X_train,X_valid,y_train,y_valid= train_test_split_v2(X_train_np, y_train_np,sample_limit=None, test_size=1/6, random_state=42)

print(X_train.shape)
print(X_valid.shape)
print(X_train.shape[0]+X_valid.shape[0])


(41666, 3, 32, 32)
(8334, 3, 32, 32)
50000


In [9]:
class CIFAR(Dataset):
    def __init__(self, X,y,transform=None):
        self.X=X
        self.y = torch.LongTensor(y)
        self.transform = transform

    def __len__(self):
        return len(self.X)

    def __getitem__(self, index):

        image = np.rollaxis(self.X[index],0,3)
        label = self.y[index]

        if self.transform is not None:
            image = self.transform(image)

        return image, label


In [10]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.RandomResizedCrop(32)
])

train_data = CIFAR(X_train,y_train, transform=transform)
valid_data = CIFAR(X_valid,y_valid, transform=transforms.ToTensor())

train_loader = DataLoader(dataset=train_data,batch_size=256)
valid_loader = DataLoader(dataset=valid_data,batch_size=256)


# dataiter=iter(train_loader)
# images, labels = dataiter.next()

# print('images shape: ', images.size())
# print('labels shape: ', labels.size())


# x_max,y_max = 3,3
# offset = 0

# f, axarr = plt.subplots(y_max,x_max)
# for y in range(y_max):
#     for x in range(x_max):
#         axarr[y,x].imshow(images[y*y_max+x].permute(1,2,0))

In [17]:
vgg11 = nn.Sequential(
            nn.Conv2d(3,64, kernel_size=3, padding=1),
            nn.ReLU(True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64,128, kernel_size=3, padding=1),
            nn.ReLU(True),
            nn.Conv2d(128,128, kernel_size=3, padding=1),
            nn.ReLU(True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(128,256, kernel_size=3, padding=1),
            nn.ReLU(True),
            nn.Conv2d(256,256, kernel_size=3, padding=1),
            nn.ReLU(True),
            nn.Conv2d(256,256, kernel_size=3, padding=1),
            nn.ReLU(True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(256,512, kernel_size=3, padding=1),
            nn.ReLU(True),
            nn.Conv2d(512,512, kernel_size=3, padding=1),
            nn.ReLU(True),
            nn.Conv2d(512,512, kernel_size=3, padding=1),
            nn.ReLU(True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(512,512, kernel_size=3, padding=1),
            nn.ReLU(True),
            nn.Conv2d(512,512, kernel_size=3, padding=1),
            nn.ReLU(True),
            nn.Conv2d(512,512, kernel_size=3, padding=1),
            nn.ReLU(True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(512,512, kernel_size=3, padding=1),
            nn.ReLU(True),
        )


vgg11_bn = nn.Sequential(
            nn.Conv2d(3,64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64,128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(True),
            nn.Dropout(),
#             nn.Conv2d(128,128, kernel_size=3, padding=1),
#             nn.BatchNorm2d(128),
#             nn.LeakyReLU(True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(128,256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.LeakyReLU(True),
            nn.Dropout(),
            nn.Conv2d(256,256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.LeakyReLU(True),
#             nn.Conv2d(256,256, kernel_size=3, padding=1),
#             nn.BatchNorm2d(256),
#             nn.LeakyReLU(True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(256,512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(True),
            nn.Dropout(),
            nn.Conv2d(512,512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(True),
#             nn.Conv2d(512,512, kernel_size=3, padding=1),
#             nn.BatchNorm2d(512),
#             nn.LeakyReLU(True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(512,512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(True),
            nn.Dropout(),
            nn.Conv2d(512,512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(True),
            nn.MaxPool2d(kernel_size=2, stride=2)
#             nn.Conv2d(512,512, kernel_size=3, padding=1),
#             nn.BatchNorm2d(512),
#             nn.LeakyReLU(True)
        )

# vgg13_bn = nn.Sequential(
#     nn.Conv2d(3,64, kernel_size=3, padding=1),
#     nn.BatchNorm2d(64),
#     nn.ReLU(True),
#     # ---
#     nn.Conv2d(64,64, kernel_size=3, padding=1),
#     nn.BatchNorm2d(64),
#     nn.ReLU(True),
#     # ---
#     nn.MaxPool2d(kernel_size=2, stride=2),
#     # ---
#     nn.Conv2d(64,128, kernel_size=3, padding=1),
#     nn.BatchNorm2d(128),
#     nn.ReLU(True),
#     # ---
#     nn.Conv2d(128,128, kernel_size=3, padding=1),
#     nn.BatchNorm2d(128),
#     nn.ReLU(True),
#     # ---
#     nn.MaxPool2d(kernel_size=2, stride=2),
#     # ---
#     nn.Conv2d(128,256, kernel_size=3, padding=1),
#     nn.BatchNorm2d(256),
#     nn.ReLU(True),
#     # ---
#     nn.Conv2d(256,256, kernel_size=3, padding=1),
#     nn.BatchNorm2d(256),
#     nn.ReLU(True),
#     # ---
#     nn.MaxPool2d(kernel_size=2, stride=2),
#     # ---
#     nn.Conv2d(256,512, kernel_size=3, padding=1),
#     nn.BatchNorm2d(512),
#     nn.ReLU(True),
#     # ---
#     nn.Conv2d(512,512, kernel_size=3, padding=1),
#     nn.BatchNorm2d(512),
#     nn.ReLU(True),
#     # ---
#     nn.MaxPool2d(kernel_size=2, stride=2),
#     # ---
#     nn.Conv2d(512,512, kernel_size=3, padding=1),
#     nn.BatchNorm2d(512),
#     nn.ReLU(True),
#     # ---
#     nn.Conv2d(512,512, kernel_size=3, padding=1),
#     nn.BatchNorm2d(512),
#     nn.ReLU(True),
#     # ---
#     nn.MaxPool2d(kernel_size=2, stride=2)
# )

# vgg16_bn = nn.Sequential(
#     nn.Conv2d(3,64, kernel_size=3, padding=1),
#     nn.BatchNorm2d(64),
#     nn.ReLU(True),
#     # ---
#     nn.Conv2d(64,64, kernel_size=3, padding=1),
#     nn.BatchNorm2d(64),
#     nn.ReLU(True),
#     # ---
#     nn.MaxPool2d(kernel_size=2, stride=2),
#     # ---
#     nn.Conv2d(64,128, kernel_size=3, padding=1),
#     nn.BatchNorm2d(128),
#     nn.ReLU(True),
#     # ---
#     nn.Conv2d(128,128, kernel_size=3, padding=1),
#     nn.BatchNorm2d(128),
#     nn.ReLU(True),
#     # ---
#     nn.MaxPool2d(kernel_size=2, stride=2),
#     # ---
#     nn.Conv2d(128,256, kernel_size=3, padding=1),
#     nn.BatchNorm2d(256),
#     nn.ReLU(True),
#     # ---
#     nn.Conv2d(256,256, kernel_size=3, padding=1),
#     nn.BatchNorm2d(256),
#     nn.ReLU(True),
#     # ---
#     nn.Conv2d(256,256, kernel_size=3, padding=1),
#     nn.BatchNorm2d(256),
#     nn.ReLU(True),
#     # ---
#     nn.MaxPool2d(kernel_size=2, stride=2),
#     # ---
#     nn.Conv2d(256,512, kernel_size=3, padding=1),
#     nn.BatchNorm2d(512),
#     nn.ReLU(True),
#     # ---
#     nn.Conv2d(512,512, kernel_size=3, padding=1),
#     nn.BatchNorm2d(512),
#     nn.ReLU(True),
#     # ---
#     nn.Conv2d(512,512, kernel_size=3, padding=1),
#     nn.BatchNorm2d(512),
#     nn.ReLU(True),
#     # ---
#     nn.MaxPool2d(kernel_size=2, stride=2),
#     # ---
#     nn.Conv2d(512,512, kernel_size=3, padding=1),
#     nn.BatchNorm2d(512),
#     nn.ReLU(True),
#     # ---
#     nn.Conv2d(512,512, kernel_size=3, padding=1),
#     nn.BatchNorm2d(512),
#     nn.ReLU(True),
#     # ---
#     nn.Conv2d(512,512, kernel_size=3, padding=1),
#     nn.BatchNorm2d(512),
#     nn.ReLU(True),
#     # ---
#     nn.MaxPool2d(kernel_size=2, stride=2)
# )

# vgg19_bn = nn.Sequential(
#     nn.Conv2d(3,64, kernel_size=3, padding=1),
#     nn.BatchNorm2d(64),
#     nn.ReLU(True),
#     # ---
#     nn.Conv2d(64,64, kernel_size=3, padding=1),
#     nn.BatchNorm2d(64),
#     nn.ReLU(True),
#     # ---
#     nn.MaxPool2d(kernel_size=2, stride=2),
#     nn.Conv2d(64,128, kernel_size=3, padding=1),
#     nn.BatchNorm2d(128),
#     nn.ReLU(True),
#     # ---
#     nn.Conv2d(128,128, kernel_size=3, padding=1),
#     nn.BatchNorm2d(128),
#     nn.ReLU(True),
#     # ---
#     nn.Conv2d(128,128, kernel_size=3, padding=1),
#     nn.BatchNorm2d(128),
#     nn.ReLU(True),
#     # ---
#     nn.MaxPool2d(kernel_size=2, stride=2),
#     nn.Conv2d(128,256, kernel_size=3, padding=1),
#     nn.BatchNorm2d(256),
#     nn.ReLU(True),
#     # ---
#     nn.Conv2d(256,256, kernel_size=3, padding=1),
#     nn.BatchNorm2d(256),
#     nn.ReLU(True),
#     # ---
#     nn.Conv2d(256,256, kernel_size=3, padding=1),
#     nn.BatchNorm2d(256),
#     nn.ReLU(True),
#     # ---
#     nn.Conv2d(256,256, kernel_size=3, padding=1),
#     nn.BatchNorm2d(256),
#     nn.ReLU(True),
#     # ---
#     nn.Conv2d(256,256, kernel_size=3, padding=1),
#     nn.BatchNorm2d(256),
#     nn.ReLU(True),
#     # ---
#     nn.MaxPool2d(kernel_size=2, stride=2),
#     nn.Conv2d(256,512, kernel_size=3, padding=1),
#     nn.BatchNorm2d(512),
#     nn.ReLU(True),
#     # ---
#     nn.Conv2d(512,512, kernel_size=3, padding=1),
#     nn.BatchNorm2d(512),
#     nn.ReLU(True),
#     # ---
#     nn.Conv2d(512,512, kernel_size=3, padding=1),
#     nn.BatchNorm2d(512),
#     nn.ReLU(True),
#     # ---
#     nn.Conv2d(512,512, kernel_size=3, padding=1),
#     nn.BatchNorm2d(512),
#     nn.ReLU(True),
#     # ---
#     nn.Conv2d(512,512, kernel_size=3, padding=1),
#     nn.BatchNorm2d(512),
#     nn.ReLU(True),
#     # ---
#     nn.MaxPool2d(kernel_size=2, stride=2),
#     nn.Conv2d(512,512, kernel_size=3, padding=1),
#     nn.BatchNorm2d(512),
#     nn.ReLU(True),
#     # ---
#     nn.Conv2d(512,512, kernel_size=3, padding=1),
#     nn.BatchNorm2d(512),
#     nn.ReLU(True),
#     # ---
#     nn.Conv2d(512,512, kernel_size=3, padding=1),
#     nn.BatchNorm2d(512),
#     nn.ReLU(True),
#     # ---
#     nn.Conv2d(512,512, kernel_size=3, padding=1),
#     nn.BatchNorm2d(512),
#     nn.ReLU(True),
#     # ---
#     nn.Conv2d(512,512, kernel_size=3, padding=1),
#     nn.BatchNorm2d(512),
#     nn.ReLU(True),
#     # ---
#     nn.MaxPool2d(kernel_size=2, stride=2),
#     nn.Conv2d(512,512, kernel_size=3, padding=1),
#     nn.BatchNorm2d(512),
#     nn.ReLU(True)
# )

In [18]:
class SimpleNet(nn.Module):
    def __init__(self, in_chanels, num_classes):
        super(SimpleNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(in_chanels,128, kernel_size=3, padding=1),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(128,256, kernel_size=3, padding=1),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(256,512, kernel_size=3, padding=1),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(8192,),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(512,256),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(256,num_classes),
            nn.Softmax()
        )

    def forward(self, x):
        x = self.features(x)

        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x


class Net(nn.Module):
    def __init__(self, in_chanels, num_classes):
        super(Net, self).__init__()
        self.features = vgg11_bn
        self.classifier = nn.Sequential(
            nn.Linear(512,2042),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(2042,2042),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(2042,num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

In [19]:

learning_rate=1e-4
betas = (0.9,0.999)
batch_size = 128
sample_limit = 6000
epochs = 50

n_epochs_stop = 6
epochs_no_improve = 0
early_stop = False

In [15]:
#model saving

def save_model(epoch, model, optimizer, train_loss, valid_loss, accuracy):
    PATH = 'current_state.pt'
    torch.save({
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'train_loss': train_loss,
        'valid_loss': valid_loss,
        'accuracy' : accuracy
    }, PATH)

In [20]:
run = wandb.init(project='Zadanie2-CIFAR', entity='xpetricko')
run.name = f"3run-{epochs}e-{batch_size}bs-vgg11_bn"


config = wandb.config
config.learning_rate = learning_rate
config.sample_limit=sample_limit
config.batch_size=batch_size
config.epochs_with_no_improve =  6
config.notes='droput added'


model = Net(3,100)
model.to(device)
optimizer = optim.AdamW(model.parameters(),lr=learning_rate) # pri adamW weight_decay = 1e-2
loss_fn = nn.CrossEntropyLoss()

X_train,X_valid,y_train,y_valid = train_test_split_v2(X_train_np,
                                                      y_train_np,
                                                      sample_limit=sample_limit,
                                                      test_size=1/6,
                                                      random_state=7)

train_data = CIFAR(X_train,y_train, transform=transforms.ToTensor())
valid_data = CIFAR(X_valid,y_valid, transform=transforms.ToTensor())

train_loader = DataLoader(dataset=train_data,batch_size=batch_size)
valid_loader = DataLoader(dataset=valid_data,batch_size=batch_size)

mean_train_losses = []
mean_valid_losses = []
valid_acc_list = []
min_valid_loss = np.Inf


torch.manual_seed(7)

for epoch in range(epochs):
    model.train()

    train_losses = []
    valid_losses = []

    for i, (images,labels) in enumerate(train_loader):
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        output = model.forward(images)
        loss = loss_fn(output,labels)
        loss.backward()
        optimizer.step()

        train_losses.append(loss.item())


    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for i,(images,labels) in enumerate(valid_loader):
            images, labels = images.to(device), labels.to(device)
            output = model.forward(images)
            loss = loss_fn(output,labels)

            valid_losses.append(loss.item())

            _, predicted = torch.max(output.data,1)
            correct += (predicted==labels).sum().item()
            total += labels.size(0)

    mean_train_losses.append(np.mean(train_losses))
    mean_valid_losses.append(np.mean(valid_losses))
    accuracy = 100*correct/total
    valid_acc_list.append(accuracy)
      

    wandb.log({"Train mean loss":mean_train_losses[-1],"Valid mean loss":mean_valid_losses[-1],"Accuracy":accuracy})

    print('Epoch: {}, train loss: {:.4f}, valid loss: {:.4f}, valid acc: {:.2f}%'.format(epoch+1,mean_train_losses[-1],mean_valid_losses[-1],accuracy))
    
    if(mean_valid_losses[-1] < min_valid_loss):
        save_model(epoch, model, optimizer, mean_train_losses[-1], mean_valid_losses[-1], accuracy)
        epochs_no_improve = 0
        min_valid_loss = mean_valid_losses[-1]
    else:
        epochs_no_improve += 1
    
    if epoch > 5 and epochs_no_improve == n_epochs_stop:
        print(f'Early Stopping on epoch {epoch}/{epochs}, min valid loss: {min_valid_loss}, last valid loss: {mean_valid_losses[-1]}')
#         early_stop = True
        break
torch.cuda.empty_cache()        
run.finish()

wandb: wandb version 0.10.25 is available!  To upgrade, please run:
wandb:  $ pip install wandb --upgrade


Epoch: 1, train loss: 4.5858, valid loss: 4.5525, valid acc: 2.00%
Epoch: 2, train loss: 4.4385, valid loss: 4.5106, valid acc: 1.90%
Epoch: 3, train loss: 4.2969, valid loss: 4.6198, valid acc: 2.30%
Epoch: 4, train loss: 4.1825, valid loss: 4.6031, valid acc: 3.00%
Epoch: 5, train loss: 4.1112, valid loss: 4.6315, valid acc: 4.60%
Epoch: 6, train loss: 4.0176, valid loss: 4.5966, valid acc: 4.40%
Epoch: 7, train loss: 3.8916, valid loss: 4.5874, valid acc: 5.30%
Epoch: 8, train loss: 3.8247, valid loss: 4.4556, valid acc: 7.00%
Epoch: 9, train loss: 3.7351, valid loss: 4.4104, valid acc: 6.80%
Epoch: 10, train loss: 3.6738, valid loss: 4.3468, valid acc: 7.40%
Epoch: 11, train loss: 3.6018, valid loss: 4.3403, valid acc: 7.00%
Epoch: 12, train loss: 3.5324, valid loss: 4.2951, valid acc: 7.00%
Epoch: 13, train loss: 3.4674, valid loss: 4.1881, valid acc: 7.60%
Epoch: 14, train loss: 3.3936, valid loss: 4.2035, valid acc: 8.60%
Epoch: 15, train loss: 3.3322, valid loss: 4.1831, valid 

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Train mean loss,2.44824
Valid mean loss,4.07913
Accuracy,13.8
_runtime,150.0
_timestamp,1618266669.0
_step,28.0


0,1
Train mean loss,██▇▇▆▆▆▆▅▅▅▅▄▄▄▄▃▃▃▃▃▃▂▂▂▂▁▁▁
Valid mean loss,▇▇█████▆▆▅▅▅▄▄▄▄▄▄▃▁▂▃▁▃▁▂▁▁▃
Accuracy,▁▁▁▂▃▂▃▄▄▄▄▄▄▅▄▅▅▆▆▆▆▆█▆▇▇▇██
_runtime,▁▁▂▂▂▂▃▃▃▃▃▄▄▄▅▅▅▅▅▆▆▆▇▇▇▇▇██
_timestamp,▁▁▂▂▂▂▃▃▃▃▃▄▄▄▅▅▅▅▅▆▆▆▇▇▇▇▇██
_step,▁▁▁▂▂▂▃▃▃▃▃▄▄▄▅▅▅▅▅▆▆▆▇▇▇▇▇██


In [18]:
print(torch.cuda.memory_summary(device=device))

|                  PyTorch CUDA memory summary, device ID 0                 |
|---------------------------------------------------------------------------|
|            CUDA OOMs: 0            |        cudaMalloc retries: 0         |
|        Metric         | Cur Usage  | Peak Usage | Tot Alloc  | Tot Freed  |
|---------------------------------------------------------------------------|
| Allocated memory      |  321359 KB |    1290 MB |    2740 GB |    2740 GB |
|       from large pool |  314283 KB |    1282 MB |    2734 GB |    2734 GB |
|       from small pool |    7075 KB |      11 MB |       5 GB |       5 GB |
|---------------------------------------------------------------------------|
| Active memory         |  321359 KB |    1290 MB |    2740 GB |    2740 GB |
|       from large pool |  314283 KB |    1282 MB |    2734 GB |    2734 GB |
|       from small pool |    7075 KB |      11 MB |       5 GB |       5 GB |
|---------------------------------------------------------------

In [19]:

# trfs = [
#     # {"name":"No transform", "tr":transforms.ToTensor()},
#     # {
#     #     "name":"Normalize",
#     #     "tr": transforms.Compose([
#     #         transforms.ToTensor(),
#     #         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
#     #     ]),
#     #     "tr_v": transforms.Compose([
#     #         transforms.ToTensor(),
#     #         transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
#     #     ])
#     # },
#     {
#         "name":"Random crop",
#         "tr":transforms.Compose([
#             transforms.ToTensor(),
#             transforms.RandomResizedCrop(32)
#         ]),
#         "tr_v": transforms.Compose([
#             transforms.ToTensor()
#         ])
#     },
#     {
#         "name":"Norm + Crop",
#         "tr":transforms.Compose([
#             transforms.ToTensor(),
#             transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
#             transforms.RandomResizedCrop(32)
#         ]),
#         "tr_v": transforms.Compose([
#             transforms.ToTensor(),
#             transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
#         ])
#     },
#     {
#         "name":"RandFlip + Norm + Crop",
#         "tr":transforms.Compose([
#             transforms.ToTensor(),
#             transforms.RandomHorizontalFlip(),
#             transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
#             transforms.RandomResizedCrop(32)
#         ]),
#         "tr_v": transforms.Compose([
#             transforms.ToTensor(),
#             transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
#         ])
#     }
# ]

# MULTITASK LEARNING PART

In [21]:
class CIFAR_Multitask(Dataset):
    def __init__(self, X,y, y_super,transform=None):
        self.X=X
        self.y = torch.LongTensor(y)
        self.y_super = torch.LongTensor(y_super)
        self.transform = transform

    def __len__(self):
        return len(self.X)

    def __getitem__(self, index):

        image = np.rollaxis(self.X[index],0,3)
        label = self.y[index]
        super_label = self.y_super[index]

        if self.transform is not None:
            image = self.transform(image)

        return image, label, super_label


class NetMultitask(nn.Module):
    def __init__(self, in_chanels, num_classes,num_super_classes):
        super(NetMultitask, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(in_chanels,64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(True),
            # ---
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64,128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(True),
            nn.Dropout(),
            # ---
#             nn.Conv2d(128,128, kernel_size=3, padding=1),
#             nn.BatchNorm2d(128),
#             nn.LeakyReLU(True),
            # ---
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(128,256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.LeakyReLU(True),
            nn.Dropout(),
            # ---
            nn.Conv2d(256,256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.LeakyReLU(True),
            # ---
#             nn.Conv2d(256,256, kernel_size=3, padding=1),
#             nn.BatchNorm2d(256),
#             nn.LeakyReLU(True),
            # ---
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(256,512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(True),
            nn.Dropout(),
            # ---
            nn.Conv2d(512,512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(True),
            # ---
#             nn.Conv2d(512,512, kernel_size=3, padding=1),
#             nn.BatchNorm2d(512),
#             nn.LeakyReLU(True),
            # ---
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(512,512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(True),
            nn.Dropout(),
            # ---
            nn.Conv2d(512,512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(True),
            # ---
            nn.MaxPool2d(kernel_size=2, stride=2),
#             nn.Conv2d(512,512, kernel_size=3, padding=1),
#             nn.BatchNorm2d(512),
#             nn.LeakyReLU(True)
            # ---------------------
#             nn.Conv2d(in_chanels,32, kernel_size=3, padding=1),
#             nn.BatchNorm2d(32),
#             nn.LeakyReLU(True),
#             # ---
#             nn.MaxPool2d(kernel_size=2, stride=2),
#             nn.Conv2d(32,64, kernel_size=3, padding=1),
#             nn.BatchNorm2d(64),
#             nn.LeakyReLU(True),
#             nn.Dropout(inplace=True, p=0.2),
#             # ---
# #             nn.Conv2d(64,64, kernel_size=3, padding=1),
# #             nn.BatchNorm2d(64),
# #             nn.LeakyReLU(True),
#             # ---
#             nn.MaxPool2d(kernel_size=2, stride=2),
#             nn.Conv2d(64,128, kernel_size=3, padding=1),
#             nn.BatchNorm2d(128),
#             nn.LeakyReLU(True),
#             nn.Dropout(inplace=True, p=0.2),
#             # ---
#             nn.Conv2d(128,128, kernel_size=3, padding=1),
#             nn.BatchNorm2d(128),
#             nn.LeakyReLU(True),
#             # ---
#             nn.Conv2d(128,128, kernel_size=3, padding=1),
#             nn.BatchNorm2d(128),
#             nn.LeakyReLU(True),
#             # ---
#             nn.MaxPool2d(kernel_size=2, stride=2),
#             nn.Conv2d(128,256, kernel_size=3, padding=1),
#             nn.BatchNorm2d(256),
#             nn.LeakyReLU(True),
#             nn.Dropout(inplace=True, p=0.2),
#             # ---
#             nn.Conv2d(256,256, kernel_size=3, padding=1),
#             nn.BatchNorm2d(256),
#             nn.LeakyReLU(True),
#             # ---
#             nn.Conv2d(256,256, kernel_size=3, padding=1),
#             nn.BatchNorm2d(256),
#             nn.LeakyReLU(True),
#             # ---
#             nn.MaxPool2d(kernel_size=2, stride=2),
#             nn.Conv2d(256,256, kernel_size=3, padding=1),
#             nn.BatchNorm2d(256),
#             nn.LeakyReLU(True),
#             nn.Dropout(inplace=True, p=0.2),
#             # ---
#             nn.Conv2d(256,256, kernel_size=3, padding=1),
#             nn.BatchNorm2d(256),
#             nn.LeakyReLU(True),
#             # ---
#             nn.MaxPool2d(kernel_size=2, stride=2),
#             nn.Conv2d(256,256, kernel_size=3, padding=1),
#             nn.BatchNorm2d(256),
#             nn.LeakyReLU(True),
        )
        self.classifier = nn.Sequential(
            nn.Linear(512, 2048),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(2048,2048),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(2048,num_classes),
            nn.Softmax()
        )

        self.super_clasifier = nn.Sequential(
            nn.Linear(512,2048),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(2048,2048),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(2048,num_super_classes),
            nn.Softmax()
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x_c = self.classifier(x)
        x_sc = self.super_clasifier(x)
        return x_c,x_sc

In [22]:
learning_rate=1e-4
betas = (0.9,0.999)
batch_size = 128
sample_limit = 6000
epochs = 50

n_epochs_stop = 6
epochs_no_improve = 0
early_stop = False

In [23]:
run = wandb.init(project='Zadanie2-CIFAR', entity='xpetricko')
run.name = f"Multitask run 9-{epochs}e-{batch_size}bs-vgg11_bn"


config = wandb.config
config.learning_rate = learning_rate
config.sample_limit=sample_limit
config.batch_size=batch_size
config.epochs_with_no_improve = n_epochs_stop
config.notes='upd arch'


model = NetMultitask(3,100,20)
model.to(device)
optimizer = optim.AdamW(model.parameters(),lr=learning_rate, weight_decay = 0.1) # pri adamW weight_decay = 1e-2
#optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.2, nesterov=True, weight_decay = 0.1, dampening=0)
loss_fn = nn.CrossEntropyLoss()

X_train,X_valid,y_train,y_valid = train_test_split_v2(X_train_np,
                                                      y_train_np,
                                                      sample_limit=sample_limit,
                                                      test_size=1/6,
                                                      random_state=7)

_,_,y_super_train,y_super_valid = train_test_split_v2(X_train_np,
                                                      y_super_train_np,
                                                      sample_limit=sample_limit,
                                                      test_size=1/6,
                                                      random_state=7)

train_data = CIFAR_Multitask(X_train,y_train,y_super_train, transform=transforms.ToTensor())
valid_data = CIFAR(X_valid,y_valid, transform=transforms.ToTensor())

train_loader = DataLoader(dataset=train_data,batch_size=batch_size)
valid_loader = DataLoader(dataset=valid_data,batch_size=batch_size)

mean_train_losses = []
mean_valid_losses = []
valid_acc_list = []
min_valid_loss = np.Inf


torch.manual_seed(7)

for epoch in range(epochs):
    model.train()

    train_losses = []
    valid_losses = []

    for i, (images,labels, super_labels) in enumerate(train_loader):
        images, labels, super_labels = images.to(device), labels.to(device), super_labels.to(device)
        optimizer.zero_grad()
        output, super_output = model.forward(images)
        loss1 = loss_fn(output,labels)
        loss2 = loss_fn(super_output,super_labels)
        loss= loss1+loss2
        loss.backward()
        optimizer.step()

        train_losses.append(loss.item())


    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for i,(images,labels) in enumerate(valid_loader):
            images, labels = images.to(device), labels.to(device)
            output,_ = model.forward(images)
            loss = loss_fn(output,labels)

            valid_losses.append(loss.item())

            _, predicted = torch.max(output.data,1)
            correct += (predicted==labels).sum().item()
            total += labels.size(0)

    mean_train_losses.append(np.mean(train_losses))
    mean_valid_losses.append(np.mean(valid_losses))
    accuracy = 100*correct/total
    valid_acc_list.append(accuracy)


    wandb.log({"Train mean loss":mean_train_losses[-1],"Valid mean loss":mean_valid_losses[-1],"Accuracy":accuracy})

    print('Epoch: {}, train loss: {:.4f}, valid loss: {:.4f}, valid acc: {:.2f}%'.format(epoch+1,mean_train_losses[-1],mean_valid_losses[-1],accuracy))

    if(mean_valid_losses[-1] < min_valid_loss):
        save_model(epoch, model, optimizer, mean_train_losses[-1], mean_valid_losses[-1], accuracy)
        epochs_no_improve = 0
        min_valid_loss = mean_valid_losses[-1]
    else:
        epochs_no_improve += 1

    if epoch > 5 and epochs_no_improve == n_epochs_stop:
        print(f'Early Stopping on epoch {epoch}/{epochs}, min valid loss: {min_valid_loss}, last valid loss: {mean_valid_losses[-1]}')
#         early_stop = True
        break
torch.cuda.empty_cache()
run.finish()

wandb: wandb version 0.10.25 is available!  To upgrade, please run:
wandb:  $ pip install wandb --upgrade


  input = module(input)


Epoch: 1, train loss: 7.5814, valid loss: 4.5984, valid acc: 1.90%
Epoch: 2, train loss: 7.5408, valid loss: 4.5997, valid acc: 1.60%
Epoch: 3, train loss: 7.5204, valid loss: 4.5974, valid acc: 2.70%
Epoch: 4, train loss: 7.5071, valid loss: 4.5958, valid acc: 2.20%
Epoch: 5, train loss: 7.4928, valid loss: 4.5971, valid acc: 2.00%
Epoch: 6, train loss: 7.4855, valid loss: 4.5880, valid acc: 3.20%
Epoch: 7, train loss: 7.4704, valid loss: 4.5920, valid acc: 2.60%
Epoch: 8, train loss: 7.4599, valid loss: 4.5871, valid acc: 3.00%
Epoch: 9, train loss: 7.4441, valid loss: 4.5811, valid acc: 3.70%
Epoch: 10, train loss: 7.4289, valid loss: 4.5843, valid acc: 3.50%
Epoch: 11, train loss: 7.4181, valid loss: 4.5767, valid acc: 4.60%
Epoch: 12, train loss: 7.4002, valid loss: 4.5788, valid acc: 4.20%
Epoch: 13, train loss: 7.4065, valid loss: 4.5771, valid acc: 4.50%
Epoch: 14, train loss: 7.3874, valid loss: 4.5811, valid acc: 4.00%
Epoch: 15, train loss: 7.3821, valid loss: 4.5782, valid 

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Train mean loss,7.21098
Valid mean loss,4.55575
Accuracy,6.6
_runtime,244.0
_timestamp,1618267515.0
_step,43.0


0,1
Train mean loss,█▇▇▇▆▆▆▆▅▅▅▅▄▄▄▄▄▄▄▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▂▁▁
Valid mean loss,█████▇▇▇▆▆▆▆▆▆▆▅▅▄▄▅▅▄▄▄▄▄▅▄▄▃▃▃▂▃▁▂▄▃▂▄
Accuracy,▁▁▂▂▁▂▂▂▃▃▃▄▃▃▄▄▄▄▅▄▄▅▅▅▅▅▄▅▅▆▆▆▇▆█▇▅▆▆▆
_runtime,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_timestamp,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_step,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
