In [13]:
import numpy as np
import pandas as pd
import os
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm.notebook import tqdm
from PIL import Image
import wandb

from torch.utils.data import Dataset, DataLoader, random_split, SubsetRandomSampler

import torchvision.transforms as transforms
import csv

In [14]:
from pathlib import Path
Path('/kaggle/working/model').mkdir(parents=True, exist_ok=True)

In [15]:
path = '/kaggle/input/bhw-dl/bhw1'
train_img_dir = os.path.join(path, 'trainval/')
test_img_dir = os.path.join(path, 'test/')
labels_filename = os.path.join(path, 'labels.csv')

In [16]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [17]:
class ImagesDataset(Dataset):
    def __init__(self, img_dir, labels=None, transform=None, train=True):
        self.samples = []
        if train==True:
            for label in labels:
                img_path = os.path.join(img_dir, label[0])
                self.samples.append((img_path, int(label[1])))
        else:
            for filename in os.listdir(img_dir):
                img_path = os.path.join(img_dir, filename)
                self.samples.append((img_path, filename))
            

        self.transform = transform

    def __len__(self):
        return len(self.samples)
    
    def load_image(self, img_path):
        img = Image.open(img_path).convert('RGB')
        if self.transform:
            img = self.transform(img)
        return img

    def __getitem__(self, index):
        img_path, label = self.samples[index]
        img = self.load_image(img_path)
        return img, label

In [18]:
def loaders(train_img_dir, labels_filename, train_transform, test_transform, batch_size=64, val_size=0.5):
    labels = None
    with open(labels_filename, "r") as labels_file:
        csvreader = csv.reader(labels_file)
        labels = []
        cnt = 0
        for row in csvreader:
            if (cnt == 0):
                cnt += 1
            else:
                labels.append(row)
            
    
    train_idx, val_idx = random_split(torch.arange(len(labels)), (1 - val_size, val_size))
    train_dataset = ImagesDataset(train_img_dir, map(labels.__getitem__, train_idx.indices), transform=train_transform, train=True)
    val_dataset = ImagesDataset(train_img_dir,map(labels.__getitem__, val_idx.indices), transform=test_transform, train=True)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    
    return train_loader, val_loader

In [19]:
batch_size = 128

train_transform = transforms.Compose([
    transforms.RandomCrop(40, padding=4, padding_mode='reflect'), 
    transforms.RandomHorizontalFlip(), 
    transforms.RandomRotation(degrees=(0, 10)),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
    
])


test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
])

    
train_loader, val_loader = loaders(train_img_dir, labels_filename, train_transform, test_transform, batch_size=batch_size, val_size=0.2)


In [20]:
test_dataset = ImagesDataset(test_img_dir, transform=test_transform, train=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [21]:
def save_model(epoch, model, optimizer, loss, scheduler = None, PATH = "/kaggle/working/model/checkpoint.pth"):
    torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'scheduler_state_dict' : scheduler.state_dict() if scheduler else {},
            'loss': loss,
            }, PATH)

In [22]:
def load_model(model, optimizer, scheduler = None, PATH = "/kaggle/working/model/checkpoint.pth"):
    checkpoint = torch.load(PATH)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    epoch = checkpoint['epoch']
    loss = checkpoint['loss']
    if scheduler:
        scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
    return model, optimizer, epoch, scheduler, loss

In [47]:
@torch.no_grad()
def test(model,criterion, loader, tqdm_desc):
    model.eval()
    acc = 0.0 
    loss = 0.0
    for data, target in tqdm(loader, desc=tqdm_desc):
        data = data.to(device)
        target = target.to(device)

        output = model(data)
        
        cur_loss = criterion(output, target)

        acc += (output.argmax(dim=1) == target).sum().item()
        loss += cur_loss.item() * target.shape[0]

    acc /= len(val_loader.dataset)
    loss /= len(val_loader.dataset)

    return loss, acc

def train_epoch(model, optimizer, criterion, train_loader, tqdm_desc):

    model.train()
    acc = 0.0
    loss = 0.0
    for data, target in tqdm(train_loader, desc=tqdm_desc):
        data = data.to(device)
        target = target.to(device)

        optimizer.zero_grad()
        output = model(data)

        l = criterion(output, target)
        l.backward()
        
        acc += (output.argmax(dim=1) == target).sum()
        loss += l.item() * target.shape[0]
        optimizer.step()
    acc /= len(train_loader.dataset)
    loss /= len(train_loader.dataset)
    return loss, acc

def train(model, optimizer, criterion, scheduler,train_loader,  val_loader, n_epochs):
    train_loss = 0.0
    train_acc = 0.0
    
    val_loss = 0.0
    val_acc = 0.0
    best_acc = 0
    for epoch in range(n_epochs):
        train_loss, train_acc = train_epoch(model, optimizer, criterion, train_loader,  f'Training epoch {epoch + 1}/{n_epochs}')
        val_loss, val_acc = test(model,criterion, val_loader,  f'Validating epoch {epoch + 1}/{n_epochs}')
        if epoch % 5 == 0:
            save_model(epoch, model, optimizer, val_loss, scheduler)
            print('saved model')
        if val_acc > best_acc:
            best_acc = val_acc
            save_model(epoch, model, optimizer, val_loss, scheduler, PATH = "/kaggle/working/model/best_model.pth")
            print('saved better model')

        print(f"Epoch {epoch+1}")
        print(f" train loss: {train_loss}, train acc: {train_acc}")
        print(f" val loss: {val_loss}, val acc: {val_acc}\n")

        if scheduler is not None:
            scheduler.step(val_loss)
        wandb.log({'train_loss': train_loss, 'train_acc': train_acc, 'val_loss': val_loss, 'val_acc': val_acc})


In [26]:
import torch.nn

class Block(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size=3):
        super().__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size,  padding="same", bias=False)
        self.bn = nn.BatchNorm2d(out_channels)
        
#         nn.init.xavier_uniform_(self.conv.weight)


    def forward(self, x):
        x = torch.nn.functional.relu(self.bn(self.conv(x)))
        return x
    
def space_to_depth(x):
    block_size = 2
    N, C, H, W = x.size()
    unf_x = torch.nn.functional.unfold(x, block_size, stride=block_size)
    return unf_x.view(N, C * block_size ** 2, H // block_size, W // block_size)


class CustomWideModel(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.num_classes = num_classes
        self.block1 = nn.Sequential(
            Block(3, 32),
            Block(32, 64),
            Block(64, 128),
            Block(128, 256),
            Block(256, 512),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.block2 = nn.Sequential(
            Block(512, 64),
            Block(64, 128),
            Block(128, 256),
            Block(256, 512),
            Block(512, 1024),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
            
        self.block3 = nn.Sequential(
            Block(3072, 32),
            Block(32, 128),
            Block(128, 256),
            Block(256, 512),
            Block(512, 1024),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
            

        self.final_classification = nn.Sequential(
            nn.Conv2d(13312, num_classes, kernel_size=1, bias=False),
            nn.BatchNorm2d(num_classes),
            nn.AdaptiveAvgPool2d(1)
        )
#         for m in self.modules():
#             if isinstance(m, nn.Conv2d):
#                 nn.init.xavier_uniform_(m.weight)



    def forward(self, x):
        skip1 = self.block1(x)
        x = self.block2(skip1)

        skip1 = space_to_depth(skip1)
        x = torch.cat((x, skip1), dim=1)
        skip2 = x
        x = self.block3(x)
        skip2 = space_to_depth(skip2)
        x = torch.cat((x,skip2), dim=1)
        x = self.final_classification(x)
        x = torch.flatten(x, 1)
        return x

In [31]:
run = wandb.init(project="bhw1", entity = "polina-kadeyshvili")

[34m[1mwandb[0m: Currently logged in as: [33mpolina-kadeyshvili[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [32]:
model = CustomWideModel(200).to(device)

optimizer = optim.SGD(model.parameters(), lr=0.5)




n_epochs = 27
lr_warmup_epochs = 5
lr_warmup_decay = 0.01

main_lr_scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=n_epochs - lr_warmup_epochs, eta_min=0)
warmup_lr_scheduler = optim.lr_scheduler.LinearLR(optimizer, start_factor=lr_warmup_decay, total_iters=lr_warmup_epochs)

scheduler = torch.optim.lr_scheduler.SequentialLR(
    optimizer, schedulers=[warmup_lr_scheduler, main_lr_scheduler], milestones=[lr_warmup_epochs], verbose=True
)
criterion = nn.CrossEntropyLoss()

train(model, optimizer, criterion, scheduler, train_loader, val_loader, n_epochs)


Training epoch 1/27:   0%|          | 0/625 [00:00<?, ?it/s]

Validating epoch 1/27:   0%|          | 0/157 [00:00<?, ?it/s]

saved model
saved better model
Epoch 1
 train loss: 4.89284186630249, train acc: 0.04923749715089798
 val loss: 4.73124247970581, val acc: 0.06745



Training epoch 2/27:   0%|          | 0/625 [00:00<?, ?it/s]

Validating epoch 2/27:   0%|          | 0/157 [00:00<?, ?it/s]

saved better model
Epoch 2
 train loss: 4.523900067901612, train acc: 0.09349999576807022
 val loss: 4.241651620483398, val acc: 0.12665



Training epoch 3/27:   0%|          | 0/625 [00:00<?, ?it/s]

Validating epoch 3/27:   0%|          | 0/157 [00:00<?, ?it/s]

saved better model
Epoch 3
 train loss: 4.088504000473023, train acc: 0.14606249332427979
 val loss: 3.948042236328125, val acc: 0.15625



Training epoch 4/27:   0%|          | 0/625 [00:00<?, ?it/s]

Validating epoch 4/27:   0%|          | 0/157 [00:00<?, ?it/s]

saved better model
Epoch 4
 train loss: 3.7096623653411864, train acc: 0.2005624920129776
 val loss: 3.6595038703918457, val acc: 0.20095



Training epoch 5/27:   0%|          | 0/625 [00:00<?, ?it/s]

Validating epoch 5/27:   0%|          | 0/157 [00:00<?, ?it/s]

saved better model
Epoch 5
 train loss: 3.410859957885742, train acc: 0.24198749661445618
 val loss: 3.571474597930908, val acc: 0.2116



Training epoch 6/27:   0%|          | 0/625 [00:00<?, ?it/s]

Validating epoch 6/27:   0%|          | 0/157 [00:00<?, ?it/s]

saved model
saved better model
Epoch 6
 train loss: 3.1864252769470216, train acc: 0.27787500619888306
 val loss: 3.286305950546265, val acc: 0.2559



Training epoch 7/27:   0%|          | 0/625 [00:00<?, ?it/s]

Validating epoch 7/27:   0%|          | 0/157 [00:00<?, ?it/s]

saved better model
Epoch 7
 train loss: 2.9446265636444093, train acc: 0.3205749988555908
 val loss: 3.2525047790527344, val acc: 0.2658



Training epoch 8/27:   0%|          | 0/625 [00:00<?, ?it/s]

Validating epoch 8/27:   0%|          | 0/157 [00:00<?, ?it/s]

saved better model
Epoch 8
 train loss: 2.7677571517944335, train acc: 0.35189998149871826
 val loss: 2.9195181076049805, val acc: 0.32465



Training epoch 9/27:   0%|          | 0/625 [00:00<?, ?it/s]

Validating epoch 9/27:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 9
 train loss: 2.623229961013794, train acc: 0.3795749843120575
 val loss: 2.988152321624756, val acc: 0.32175



Training epoch 10/27:   0%|          | 0/625 [00:00<?, ?it/s]

Validating epoch 10/27:   0%|          | 0/157 [00:00<?, ?it/s]

saved better model
Epoch 10
 train loss: 2.4879010627746583, train acc: 0.4059875011444092
 val loss: 2.7622142498016355, val acc: 0.3628



Training epoch 11/27:   0%|          | 0/625 [00:00<?, ?it/s]

Validating epoch 11/27:   0%|          | 0/157 [00:00<?, ?it/s]

saved model
Epoch 11
 train loss: 2.3647033123016357, train acc: 0.4302624762058258
 val loss: 2.7996716102600097, val acc: 0.36085



Training epoch 12/27:   0%|          | 0/625 [00:00<?, ?it/s]

Validating epoch 12/27:   0%|          | 0/157 [00:00<?, ?it/s]

saved better model
Epoch 12
 train loss: 2.2514193626403807, train acc: 0.45362499356269836
 val loss: 2.7069067962646485, val acc: 0.3797



Training epoch 13/27:   0%|          | 0/625 [00:00<?, ?it/s]

Validating epoch 13/27:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 13
 train loss: 2.141164287567139, train acc: 0.4737749993801117
 val loss: 2.7485998832702636, val acc: 0.3757



Training epoch 14/27:   0%|          | 0/625 [00:00<?, ?it/s]

Validating epoch 14/27:   0%|          | 0/157 [00:00<?, ?it/s]

saved better model
Epoch 14
 train loss: 2.030530623435974, train acc: 0.4972374737262726
 val loss: 2.6554602138519288, val acc: 0.38985



Training epoch 15/27:   0%|          | 0/625 [00:00<?, ?it/s]

Validating epoch 15/27:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 15
 train loss: 1.921744550895691, train acc: 0.5214874744415283
 val loss: 2.6620146530151367, val acc: 0.39665



Training epoch 16/27:   0%|          | 0/625 [00:00<?, ?it/s]

Validating epoch 16/27:   0%|          | 0/157 [00:00<?, ?it/s]

saved model
saved better model
Epoch 16
 train loss: 1.80878835811615, train acc: 0.5460749864578247
 val loss: 2.475108197402954, val acc: 0.4284



Training epoch 17/27:   0%|          | 0/625 [00:00<?, ?it/s]

Validating epoch 17/27:   0%|          | 0/157 [00:00<?, ?it/s]

saved better model
Epoch 17
 train loss: 1.6913613719940186, train acc: 0.5711749792098999
 val loss: 2.4596848533630373, val acc: 0.4336



Training epoch 18/27:   0%|          | 0/625 [00:00<?, ?it/s]

Validating epoch 18/27:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 18
 train loss: 1.575386520576477, train acc: 0.5984249711036682
 val loss: 2.48339637298584, val acc: 0.43585



Training epoch 19/27:   0%|          | 0/625 [00:00<?, ?it/s]

Validating epoch 19/27:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 19
 train loss: 1.45741285572052, train acc: 0.6263124942779541
 val loss: 2.5216723766326905, val acc: 0.43635



Training epoch 20/27:   0%|          | 0/625 [00:00<?, ?it/s]

Validating epoch 20/27:   0%|          | 0/157 [00:00<?, ?it/s]

saved better model
Epoch 20
 train loss: 1.3333507364273072, train acc: 0.6580125093460083
 val loss: 2.416116764640808, val acc: 0.45345



Training epoch 21/27:   0%|          | 0/625 [00:00<?, ?it/s]

Validating epoch 21/27:   0%|          | 0/157 [00:00<?, ?it/s]

saved model
saved better model
Epoch 21
 train loss: 1.2146734455108643, train acc: 0.6879749894142151
 val loss: 2.379168648147583, val acc: 0.46205



Training epoch 22/27:   0%|          | 0/625 [00:00<?, ?it/s]

Validating epoch 22/27:   0%|          | 0/157 [00:00<?, ?it/s]

saved better model
Epoch 22
 train loss: 1.1041375819206238, train acc: 0.7184500098228455
 val loss: 2.3572550800323486, val acc: 0.46815



Training epoch 23/27:   0%|          | 0/625 [00:00<?, ?it/s]

Validating epoch 23/27:   0%|          | 0/157 [00:00<?, ?it/s]

saved better model
Epoch 23
 train loss: 1.002918815612793, train acc: 0.7476499676704407
 val loss: 2.338772137451172, val acc: 0.474



Training epoch 24/27:   0%|          | 0/625 [00:00<?, ?it/s]

Validating epoch 24/27:   0%|          | 0/157 [00:00<?, ?it/s]

saved better model
Epoch 24
 train loss: 0.9185935404777527, train acc: 0.772137463092804
 val loss: 2.319649579620361, val acc: 0.47785



Training epoch 25/27:   0%|          | 0/625 [00:00<?, ?it/s]

Validating epoch 25/27:   0%|          | 0/157 [00:00<?, ?it/s]

saved better model
Epoch 25
 train loss: 0.85895641746521, train acc: 0.7906374931335449
 val loss: 2.3152177055358885, val acc: 0.47785



Training epoch 26/27:   0%|          | 0/625 [00:00<?, ?it/s]

Validating epoch 26/27:   0%|          | 0/157 [00:00<?, ?it/s]

saved model
saved better model
Epoch 26
 train loss: 0.8164782009124756, train acc: 0.8041625022888184
 val loss: 2.30552091255188, val acc: 0.48105



Training epoch 27/27:   0%|          | 0/625 [00:00<?, ?it/s]

Validating epoch 27/27:   0%|          | 0/157 [00:00<?, ?it/s]

Epoch 27
 train loss: 0.7920232939720154, train acc: 0.8144124746322632
 val loss: 2.31318782043457, val acc: 0.48



In [44]:
import gc
torch.cuda.empty_cache()
gc.collect()

336

In [33]:
wandb.finish()

VBox(children=(Label(value='0.050 MB of 0.050 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
train_acc,▁▁▂▂▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇████
train_loss,█▇▇▆▅▅▅▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▁▁▁▁▁
val_acc,▁▂▃▃▃▄▄▅▅▆▆▆▆▆▇▇▇▇▇████████
val_loss,█▇▆▅▅▄▄▃▃▂▂▂▂▂▂▁▁▂▂▁▁▁▁▁▁▁▁

0,1
train_acc,0.81441
train_loss,0.79202
val_acc,0.48
val_loss,2.31319


In [49]:
loaded_model = torch.load('/kaggle/working/model/best_model.pth')
best_model = CustomWideModel(200).to(device)
best_model.load_state_dict(loaded_model['model_state_dict'])


<All keys matched successfully>

In [35]:
def predict(model, test_loader):
    model.eval()
    labels = [["Id", "Category"]]
    
    for images, filenames in tqdm(test_loader, desc='Testing'):
        images = images.to(device)
        
        output = model(images)
        preds = output.argmax(dim=1)
        
        for filename, pred in zip(filenames, preds):
            labels.append([filename, pred.item()])
    
    return labels

In [36]:
labels = predict(best_model, test_loader)

Testing:   0%|          | 0/79 [00:00<?, ?it/s]

In [37]:
with open('labels_test.csv', 'w') as csvfile:
    writer = csv.writer(csvfile)
    
    for label in labels:
        writer.writerow(label)