In [None]:
import torch
import torchvision.transforms as T
from torch.utils.data import Dataset, DataLoader
import os
from PIL import Image
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
import wandb

In [None]:
# wandb.login(key='')

In [None]:
torch.manual_seed(42)

def get_datasets(root, load_to_ram=True, train_transform=None, test_transform=None, val_size=0.2):
    labels = pd.read_csv('/kaggle/input/hse-dl-bhw-1-dataset/bhw1/labels.csv')
    per_category = labels.groupby('Category').agg({'Id':'unique'}).Id.to_numpy()
    train_all_files = []
    train_all_labels = []
    val_all_files = []
    val_all_labels = []

    for i, files in tqdm(enumerate(per_category), total=len(per_category)):
        valid_size = int(val_size * len(files))
        train_size = len(files) - valid_size
        train_files, val_files = torch.utils.data.random_split(files, [train_size, valid_size])
        train_all_files += train_files
        train_all_labels += [i] * len(train_files)

        val_all_files += val_files
        val_all_labels += [i] * len(val_files)
        
    train_dataset = ImageDataset(root, train_all_files, train_all_labels, load_to_ram=load_to_ram, transform=train_transform)
    val_dataset = ImageDataset(root, val_all_files, val_all_labels, load_to_ram=load_to_ram, transform=test_transform)
    
    return train_dataset, val_dataset
    

class ImageDataset(Dataset):
    NUM_CLASSES = 200
    SPLIT_RANDOM_SEED = 42
    
    def __init__(self, root, all_files, all_labels, load_to_ram=True, transform=None):
        super().__init__()
        self.root = root
        self.load_to_ram = load_to_ram
        self.transform = transform
        self.all_files = all_files
        self.all_labels = all_labels
        self.images = []

        self.classes = np.arange(self.NUM_CLASSES)
        if self.load_to_ram:
            self.images += self._load_images(self.all_files)

    def _load_images(self, image_files):
        images = []
        for filename in tqdm(image_files):
            image = Image.open(os.path.join(self.root, filename)).convert('RGB')
            images += [image]

        return images

    def __len__(self):
        return len(self.all_files)

    def __getitem__(self, item):
        label = self.all_labels[item]
        if self.load_to_ram:
            image = self.images[item]
        else:
            filename = self.all_files[item]
            image = Image.open(os.path.join(self.root, filename)).convert('RGB')

        if self.transform is not None:
            image = self.transform(image)

        return image, label
    

In [None]:
class TestImageDataset(Dataset):
    NUM_CLASSES = 200
    SPLIT_RANDOM_SEED = 42
    
    def __init__(self, root, load_to_ram=True, transform=None):
        super().__init__()
        self.root = root
        self.load_to_ram = load_to_ram
        self.transform = transform
        self.to_tensor = T.ToTensor()
        self.all_files = []
        self.images = []

        self.classes = np.arange(self.NUM_CLASSES)
        for file in tqdm(os.listdir(self.root)):
            self.all_files += [file]
            if self.load_to_ram:
                self.images += [Image.open(os.path.join(self.root, file)).convert('RGB')]
                
    def __len__(self):
        return len(self.all_files)

    def __getitem__(self, item):
        file = self.all_files[item]
        if self.load_to_ram:
            image = self.images[item]
        else:
            image = Image.open(os.path.join(self.root, file)).convert('RGB')

        if self.transform:
            image = self.transform(image)

        return image, file

In [None]:
normalize = T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

train_transform = T.Compose([
    T.RandomResizedCrop(40),
    T.RandomHorizontalFlip(),
    T.AugMix(),
    T.ToTensor(),
    normalize,
])

test_transform = T.Compose([
    T.ToTensor(),
    normalize,
])

In [None]:
train_dataset, val_dataset = get_datasets(root='/kaggle/input/hse-dl-bhw-1-dataset/bhw1/trainval', load_to_ram=True, train_transform=train_transform, test_transform=test_transform)

In [None]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, pin_memory=True)
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [None]:
# def log_train(train_loss, train_acc, val_loss, val_acc):   
#     wandb.log({"train_loss": train_loss, 
#                    "train_acc": train_acc,
#                    "val_loss": val_loss, 
#                    "val_acc": val_acc})

In [None]:
best_accuracy = 0.0

In [None]:
@torch.no_grad()
def test(model, loader):
    criterion = nn.CrossEntropyLoss()
    loss_log = []
    acc_log = []
    model.eval()
    
    for data, target in tqdm(loader):
        data = data.to(device)
        target = target.to(device)
        
        logits = model(data)
        loss = criterion(logits, target)
        
        loss_log.append(loss.item())
        
        acc = (logits.argmax(dim=1) == target).sum() / data.shape[0]
        
        acc_log.append(acc.item()) 
        
    return np.mean(loss_log), np.mean(acc_log)

def train_epoch(model, optimizer, train_loader):
    criterion = nn.CrossEntropyLoss()
    loss_log = []
    acc_log = []
    model.train()
    
    for data, target in tqdm(train_loader):
        data = data.to(device)
        target = target.to(device)
        
        optimizer.zero_grad()
        logits = model(data) 
        loss = criterion(logits, target)
        loss.backward()
        optimizer.step()
        
        loss_log.append(loss.item())
        
        acc = (logits.argmax(dim=1) == target).sum() / data.shape[0]
        
        acc_log.append(acc.item()) 
        

    return loss_log, acc_log

def train(model, optimizer, n_epochs, train_loader, val_loader, scheduler):
    best_accuracy = 0.0
    train_loss_log, train_acc_log, val_loss_log, val_acc_log = [], [], [], []

    for epoch in range(n_epochs):
        train_loss, train_acc = train_epoch(model, optimizer, train_loader)
        val_loss, val_acc = test(model, val_loader)
        
        train_loss_log.extend(train_loss)
        train_acc_log.extend(train_acc)
        
        val_loss_log.append(val_loss)
        val_acc_log.append(val_acc)

        print(f"Epoch {epoch}")
        print(f" train loss: {np.mean(train_loss)}, train acc: {np.mean(train_acc)}")
        print(f" val loss: {val_loss}, val acc: {val_acc}\n")
#         log_train(np.mean(train_loss).item(), np.mean(train_acc).item(), val_loss, val_acc)
        
        if scheduler is not None:
            scheduler.step()
            
        if val_acc > best_accuracy:
            best_accuracy = val_acc
            torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'scheduler_state_dict': scheduler.state_dict(),
            }, "best_model.pt")

    return train_loss_log, train_acc_log, val_loss_log, val_acc_log

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class SuperBlockNet(nn.Module):
    def _conv_block(self, in_channels, out_channels, kernel_size=3, stride=1, padding='same', bias=False, relu=True):
        layers = []
        layers.append(nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding, bias=bias))
        layers.append(nn.BatchNorm2d(out_channels))
        if relu:
            layers.append(nn.ReLU())
        return nn.Sequential(*layers)

    def _space_to_depth_x2(self, x):
        batch_size, channels, height, width = x.size()
        unfolded_x = F.unfold(x, 2, stride=2)
        return unfolded_x.view(batch_size, channels * 4, height // 2, width // 2)
    
    def __init__(self, num_classes):
        super(SuperBlockNet, self).__init__()

        self.layer1 = self._conv_block(3, 32)
        self.layer2 = self._conv_block(32, 64)
        self.layer3 = self._conv_block(64, 128)
        self.layer4 = self._conv_block(128, 256)
        self.layer5 = self._conv_block(256, 512)
        self.layer6 = nn.MaxPool2d(2)

        self.layer7 = self._conv_block(512, 64)
        self.layer8 = self._conv_block(64, 128)
        self.layer9 = self._conv_block(128, 256)
        self.layer10 = self._conv_block(256, 512)
        self.layer11 = self._conv_block(512, 1024)
        self.layer12 = nn.MaxPool2d(2)

        self.layer14 = self._conv_block(3072, 32)
        self.layer15 = self._conv_block(32, 128)
        self.layer16 = self._conv_block(128, 256)
        self.layer17 = self._conv_block(256, 512)
        self.layer18 = self._conv_block(512, 1024)
        self.layer19 = nn.MaxPool2d(2)

        self.layer21 = self._conv_block(13312, num_classes, kernel_size=1,relu=False)
        self.layer22 = nn.AdaptiveAvgPool2d(1)

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.layer5(x)
        x = self.layer6(x)

        skip_connection_1 = x

        x = self.layer7(x)
        x = self.layer8(x)
        x = self.layer9(x)
        x = self.layer10(x)
        x = self.layer11(x)
        x = self.layer12(x)

        skip_connection_1 = self._space_to_depth_x2(skip_connection_1)

        x = torch.cat([x, skip_connection_1], dim=1)

        skip_connection_2 = x

        x = self.layer14(x)
        x = self.layer15(x)
        x = self.layer16(x)
        x = self.layer17(x)
        x = self.layer18(x)
        x = self.layer19(x)

        skip_connection_2 = self._space_to_depth_x2(skip_connection_2)

        x = torch.cat([x, skip_connection_2], dim=1)

        x = self.layer21(x)
        x = self.layer22(x)
        x = x.view(x.size(0), -1)

        return x

In [None]:
n_classes = 200
num_epochs = 45

In [None]:
# wandb.init(project="bhw1-dl", config={"dataset": "kaggle-dataset"}, name="SuperBlockNet no validation")
# wandb.config.epochs = num_epochs
# wandb.config.optimizer = "SGD + momentum"
# wandb.config.criterion = "CrossEntropyLoss"
# wandb.config.scheduler = "Linear + CosineAnnealingLR"
# wandb.config.learning_rate = "0.1"

In [None]:
start_epochs = 5

In [None]:
model = SuperBlockNet(n_classes).to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
scheduler1 = torch.optim.lr_scheduler.LinearLR(optimizer, start_factor=0.1, total_iters=start_epochs)
scheduler2 = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs - start_epochs, eta_min=0)
scheduler = torch.optim.lr_scheduler.SequentialLR(optimizer, schedulers=[scheduler1, scheduler2], milestones=[start_epochs])
train_loss_log, train_acc_log, val_loss_log, val_acc_log = train(
    model, optimizer, num_epochs, train_loader, val_loader, scheduler
)

In [None]:
def predict(model, test_loader):
    model.eval()
    predictions = []
    
    for images, files in tqdm(test_loader):
        data = images.to(device)
        logits = model(data)
        preds = logits.argmax(dim=1)
        
        for file, pred in zip(files, preds):
            predictions.append([file, pred.item()])
    
    return predictions

In [None]:
test_dataset = TestImageDataset(root='/kaggle/input/hse-dl-bhw-1-dataset/bhw1/test', load_to_ram=True, transform=test_transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, pin_memory=True)

In [None]:
predictions = predict(model, test_loader)

In [None]:
pd.DataFrame(predictions, columns=['Id', 'Category']).sort_values('Id').to_csv('labels_test.csv', index=False)