In [81]:
# качаем данные
import requests #код взят от семинариста, разрешение на использование получено)

def download_file_from_google_drive(id, destination):
    URL = "https://docs.google.com/uc?export=download"

    session = requests.Session()

    response = session.get(URL, params = { 'id' : id }, stream = True)
    token = get_confirm_token(response)

    if token:
        params = { 'id' : id, 'confirm' : token }
        response = session.get(URL, params = params, stream = True)

    save_response_content(response, destination)    

def get_confirm_token(response):
    for key, value in response.cookies.items():
        if key.startswith('download_warning'):
            return value

    return None

def save_response_content(response, destination):
    CHUNK_SIZE = 32768

    with open(destination, "wb") as f:
        for chunk in response.iter_content(CHUNK_SIZE):
            if chunk: # filter out keep-alive new chunks
                f.write(chunk)

In [71]:
download_file_from_google_drive('1WkbQQuFL5dCaPGQz_YRu5Ne6-OEgj8Qs', 'file.zip')

In [74]:
import zipfile # вроде стандартная либа
with zipfile.ZipFile('file.zip', 'r') as zip_ref:
    zip_ref.extractall()
    

In [356]:
#!g1.1
# ставим, че просили
%pip install torch==1.9.0+cu111 torchvision==0.10.0+cu111 -f https://download.pytorch.org/whl/torch_stable.html
%pip install pandas==1.3.3

In [1009]:
#!g1.1
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import lr_scheduler
# import torchvision.transforms as transforms
from torchvision import transforms
from IPython.display import clear_output
from tqdm.auto import tqdm
import pandas as pd
from PIL import Image
# import matplotlib.pyplot as plt
import numpy as np
from torch.optim.swa_utils import AveragedModel, SWALR
from random import randint
import random
# %matplotlib inline  


In [1182]:
#!g1.1 
# выгружаем данные на куду
def ind_to_path(a):
    s = str(a)
    return 'simple_image_classification/trainval/trainval_' + '0' * (5 - len(s)) + s + '.jpg'

class TrainDataset(torch.utils.data.Dataset):
    def __init__(self, from_=0, to_=90000):
        trans = transforms.ToTensor()
        norm = transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        X_list = []
        for i in range(from_, to_):
            image = ind_to_path(i)
            im=Image.open(image)
            tens = trans(im)
#             tens = norm(tens)
            X_list.append(tens)
        labels = pd.read_csv('simple_image_classification/labels_trainval.csv')
        self.y = torch.tensor(np.array(labels.Category[from_:to_])).cuda()
#         self.X = torch.stack(X_list).cuda()
        self.X = X_list
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [1102]:
#!g1.1 MODEL_1
# DIFFERENT BLOCKS - RESNET18, RESNET50, GOOGLENET

class Debug(nn.Module):
    def forward(self, x):
        print(x.shape)
        return x

class Flatten(nn.Module):
    def forward(self, x):
        return x.view(x.size()[0], -1)

class BasicBlock18(nn.Module):
    def __init__(self, chans, block_depth=2, twice_decrease=False, use_dropout=True, params={'eps': 1e-05, 'momentum': 0.1, 'bias': False, 'p': 0.2}):
        super().__init__()
        self.twice_decrease = twice_decrease
        self.block_depth = block_depth
        self.use_dropout = use_dropout
        k = 1
        if twice_decrease:
            k = 2
        layers = []
        for i in range(self.block_depth):
            k_start = k
            if i == 0:
                k_start = 1
            layers.append(nn.Conv2d(chans*k_start, chans*k, kernel_size=(3, 3), stride=(k - k_start + 1, k - k_start + 1), padding=(1, 1), bias=params['bias']))
            layers.append(nn.BatchNorm2d(chans*k, eps=params['eps'], momentum=params['momentum'], affine=True, track_running_stats=True))
            layers.append(nn.ReLU(inplace=True))
            if use_dropout:
                layers.append(nn.Dropout(params['p']))
        self.layers = nn.Sequential(*layers)
        self.l6_ = nn.Conv2d(chans, chans*2, kernel_size=(1, 1), stride=(2, 2), bias=params['bias'])
        self.l7_ = nn.BatchNorm2d(chans*2, eps=params['eps'], momentum=params['momentum'], affine=True, track_running_stats=True)
    def forward(self, input):
        x = input
        x = self.layers(x)
        if self.twice_decrease:
            x_ = input
            x_ = self.l6_(x_)
            x_ = self.l7_(x_)
            return x + x_
        return x + input

class BasicBlock50(nn.Module):
    def __init__(self, chans, twice_decrease=False, use_dropout=False):
        super().__init__()
        self.twice_decrease = twice_decrease
        self.use_dropout = use_dropout
        k = 1
        if twice_decrease:
            k = 2
        
        layers = []
        layers.append(nn.Conv2d(chans, chans*k//4, kernel_size=(1, 1), stride=(1, 1), padding=(0, 0), bias=False)),
        if use_dropout:
            layers.append(nn.Dropout(0.2))
        layers.append(nn.BatchNorm2d(chans*k//4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)),
        layers.append(nn.ReLU(inplace=True)),
        layers.append(nn.Conv2d(chans*k//4, chans*k//4, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)),
        layers.append(nn.BatchNorm2d(chans*k//4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)),
        if use_dropout:
            layers.append(nn.Dropout(0.2))
        layers.append(nn.ReLU(inplace=True)),
        layers.append(nn.Conv2d(chans*k//4, chans*k, kernel_size=(1, 1), stride=(k, k), padding=(k//2, k//2), bias=False)),
        layers.append(nn.BatchNorm2d(chans*k, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)),
        if use_dropout:
            layers.append(nn.Dropout(0.2))
        
        self.layers = nn.Sequential(*layers)
        self.l6_ = nn.Conv2d(chans, chans*2, kernel_size=(1, 1), stride=(2, 2), padding=(1, 1), bias=False)
        self.l7_ = nn.BatchNorm2d(chans*2, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.activation = nn.ReLU(inplace=True)
    def forward(self, input):
        x = input
        x = self.layers(x)
        if self.twice_decrease:
            x_ = input
            x_ = self.l6_(x_)
            x_ = self.l7_(x_)
            return self.activation(x + x_)
        return self.activation(x + input)

class GoogleBlock(nn.Module):
    def __init__(self, chans, use_dropout=False):
        super().__init__()
        self.inc = chans
        self.use_dropout = use_dropout 
        
        self.l1 = nn.Conv2d(chans, chans//4, kernel_size=1, padding=0)
        self.dr1 = nn.Dropout(0.2)
        self.bn1 = nn.BatchNorm2d(chans//4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        
        self.l2_1 = nn.Conv2d(chans, chans//2, kernel_size=1, padding=0)
        self.dr2_1 = nn.Dropout(0.2)
        self.l2_2 = nn.Conv2d(chans//2, chans//2, kernel_size=3, padding=1)
        self.dr2_2 = nn.Dropout(0.2)
        self.bn2 = nn.BatchNorm2d(chans//2, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        
        self.l3_1 = nn.Conv2d(chans, chans//16, kernel_size=1, padding=0)
        self.dr3_1 = nn.Dropout(0.2)
        self.l3_2 = nn.Conv2d(chans//16, chans//8, kernel_size=5, padding=2)
        self.dr3_2 = nn.Dropout(0.2)
        self.bn3 = nn.BatchNorm2d(chans//8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

        self.l4_1 = nn.MaxPool2d(kernel_size=3, padding=1, stride=1)
        self.dr4_1 = nn.Dropout(0.2)
        self.l4_2 = nn.Conv2d(chans, chans//8, kernel_size=1, padding=0)
        self.dr4_2 = nn.Dropout(0.2)
        self.bn4 = nn.BatchNorm2d(chans//8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        
    def forward(self, input):
        out = []
        x1 = input
        x1 = F.relu(self.bn1(self.l1(x1)))
        if self.use_dropout:
            x1 = self.dr1(x1)       
        out.append(x1)
        x2 = input
        x2 = F.relu(self.l2_1(x2))
        if self.use_dropout:
            x2 = self.dr2_1(x2)
        x2 = F.relu(self.bn2(self.l2_2(x2)))
        if self.use_dropout:
            x2 = self.dr2_2(x2)
        out.append(x2)
        x3 = input
        x3 = F.relu(self.l3_1(x3))
        if self.use_dropout:
            x3 = self.dr3_1(x3)
        x3 = F.relu(self.bn3(self.l3_2(x3)))
        if self.use_dropout:
            x3 = self.dr3_2(x3)   
        out.append(x3)
        x4 = input
        x4 = F.relu(self.l4_1(x4))
        if self.use_dropout:
            x4 = self.dr4_1(x4)
        x4 = F.relu(self.bn4(self.l4_2(x4)))
        if self.use_dropout:
            x4 = self.dr4_2(x4)          
        out.append(x4)
        return torch.cat(out, dim=1)


In [1420]:
#!g1.1

# BEST MODELS OF DIFFERENT TYPES

class BestModel(nn.Module):
    def __init__(self, params={'eps': 1e-05, 'momentum': 0.1, 'bias': False, 'p': 0.3}):
        super().__init__()
        self.model = nn.Sequential(
            nn.Conv2d(3, 80, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=params['bias']), 
            nn.BatchNorm2d(80, eps=params['eps'], momentum=params['momentum'], affine=True, track_running_stats=True), 
            nn.ReLU(),
            BasicBlock18(80, 3, params=params),
            BasicBlock18(80, 2, params=params),
            BasicBlock18(80, 1, params=params),
            BasicBlock18(80, 4, params=params),
            BasicBlock18(80, 2, twice_decrease=True, params=params),
            BasicBlock18(160, 1, params=params),
            BasicBlock18(160, 2, params=params),
            BasicBlock18(160, 3, params=params),
            BasicBlock18(160, 4, params=params),
            BasicBlock18(160, twice_decrease=True, params=params),
            BasicBlock18(320, 1, params=params),
            BasicBlock18(320, 3, params=params),
            BasicBlock18(320, 4, params=params),
            BasicBlock18(320, 2, twice_decrease=True, params=params),
            nn.AdaptiveAvgPool2d(output_size=(1, 1)),
            Flatten(),
            nn.Linear(640, 200)
        )
    def forward(self, input):
        return self.model(input)


# SOTA 0.38 - 5 epochs
# TURN_ON_DROPOUT=True
# model = nn.Sequential(
#     nn.Conv2d(3, 80, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False), #
#     nn.BatchNorm2d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), 
#     nn.ReLU(),
#     BasicBlock18(80, 3, use_dropout=TURN_ON_DROPOUT),
#     BasicBlock18(80, 2, use_dropout=TURN_ON_DROPOUT),
#     BasicBlock18(80, 1, use_dropout=TURN_ON_DROPOUT),
#     BasicBlock18(80, 2, twice_decrease=True, use_dropout=TURN_ON_DROPOUT),
#     BasicBlock18(160, 1, use_dropout=TURN_ON_DROPOUT),
#     BasicBlock18(160, 2, use_dropout=TURN_ON_DROPOUT),
#     BasicBlock18(160, 3, use_dropout=TURN_ON_DROPOUT),
#     BasicBlock18(160, twice_decrease=True, use_dropout=TURN_ON_DROPOUT),
#     BasicBlock18(320, 1, use_dropout=TURN_ON_DROPOUT),
#     BasicBlock18(320, 3, use_dropout=TURN_ON_DROPOUT),
#     BasicBlock18(320, 2, twice_decrease=True, use_dropout=TURN_ON_DROPOUT),
#     nn.AdaptiveAvgPool2d(output_size=(1, 1)),
#     Flatten(),
#     nn.Linear(640, 200)
# )

# resnet50 0.23 - 5 epochs
# model = nn.Sequential(
#     nn.Conv2d(3, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), bias=False), #
#     nn.BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), 
#     nn.ReLU(inplace=True),
#     BasicBlock50(32, twice_decrease=True),
#     BasicBlock50(64, twice_decrease=True),
#     BasicBlock50(128),
#     BasicBlock50(128, twice_decrease=True),
#     BasicBlock50(256),
#     BasicBlock50(256, twice_decrease=True),
#     nn.AdaptiveAvgPool2d(output_size=(1, 1)),
#     Flatten(),
#     nn.Linear(512, 200)
# )



# BASELINE FROM CHECKPOINT resnet18: 0.27 - 3 epochs; 0.33 - 5 epochs
# model = nn.Sequential(
#     nn.Conv2d(3, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), bias=False), #
#     nn.BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), 
#     nn.ReLU(inplace=True),
#     BasicBlock18(32, use_dropout=TORN_ON_DROPOUT),
#     BasicBlock18(32, use_dropout=TORN_ON_DROPOUT),
#     BasicBlock18(32, twice_decrease=True, use_dropout=TORN_ON_DROPOUT),
#     BasicBlock18(64, use_dropout=TORN_ON_DROPOUT),
#     BasicBlock18(64, twice_decrease=True, use_dropout=TORN_ON_DROPOUT),
#     BasicBlock18(128, use_dropout=TORN_ON_DROPOUT),
#     BasicBlock18(128, twice_decrease=True, use_dropout=TORN_ON_DROPOUT),
#     BasicBlock18(256, use_dropout=TORN_ON_DROPOUT),
#     nn.AdaptiveAvgPool2d(output_size=(1, 1)),
#     Flatten(),
#     nn.Linear(256, 200)
# )

# Googlenet 0.27 - 5 epochs

# model = nn.Sequential(
#     nn.Conv2d(3, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), bias=False), #
#     nn.BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), 
#     nn.ReLU(),
    
#     GoogleBlock(32, use_dropout=TURN_ON_DROPOUT),
#     nn.Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False),
#     GoogleBlock(64, use_dropout=TURN_ON_DROPOUT),
#     nn.Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False),
#     GoogleBlock(128, use_dropout=TURN_ON_DROPOUT),
#     nn.Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False),
#     GoogleBlock(256, use_dropout=TURN_ON_DROPOUT),
#     nn.Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False),
#     GoogleBlock(512, use_dropout=TURN_ON_DROPOUT),
    
#     nn.AdaptiveAvgPool2d(output_size=(1, 1)),
#     Flatten(),
#     nn.Linear(512, 200)
# )



In [825]:
#!g1.1
# EXPEREMENTING WITH DIFFERENT MODELS

# 0.3
# model = nn.Sequential(
#     nn.Conv2d(3, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), bias=False), #
#     nn.BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), 
#     nn.ReLU(inplace=True),
#     BasicBlock18(32, block_depth=4),
# #     nn.LayerNorm((32, 40, 40)),
#     BasicBlock18(32, block_depth=5),
#     BasicBlock18(32),
#     BasicBlock18(32, block_depth=3),
# #     nn.LayerNorm((32, 40, 40)),
#     BasicBlock18(32, block_depth=3, twice_decrease=True),
#     BasicBlock18(64, block_depth=4),
#     BasicBlock18(64),
# #     nn.LayerNorm((64, 20, 20)),
#     BasicBlock18(64, twice_decrease=True),
#     BasicBlock18(128, block_depth=3),
#     BasicBlock18(128, block_depth=4),
# #     nn.LayerNorm((128, 10, 10)),
#     BasicBlock18(128, twice_decrease=True),
#     BasicBlock18(256, block_depth=3),
#     BasicBlock18(256),
# #     nn.LayerNorm((256, 5, 5)),
#     BasicBlock18(256, twice_decrease=True),
#     BasicBlock18(512, block_depth=3),
# #     nn.LayerNorm((512, 3, 3)),
#     nn.AdaptiveAvgPool2d(output_size=(1, 1)),
#     Flatten(),
#     nn.Linear(512, 200)
# )

# SOTA at some moment
# model = nn.Sequential(
#     nn.Conv2d(3, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), bias=False), #
#     nn.BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), 
#     nn.ReLU(inplace=True),
#     BasicBlock18(32),
#     BasicBlock18(32),
#     BasicBlock18(32, twice_decrease=True),
#     BasicBlock18(64),
#     BasicBlock18(64, twice_decrease=True),
#     BasicBlock18(128),
#     BasicBlock18(128, twice_decrease=True),
#     BasicBlock18(256),
#     nn.AdaptiveAvgPool2d(output_size=(1, 1)),
#     Flatten(),
#     nn.Linear(512, 200)
# )

# Trying to make several branches with several outputs
# branch_start = nn.Sequential(
#     nn.Conv2d(3, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), bias=False), #
#     nn.BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), 
#     nn.ReLU(inplace=True),
#     BasicBlock18(32),
#     BasicBlock18(32),
#     BasicBlock18(32, twice_decrease=True),
#     BasicBlock18(64),
#     BasicBlock18(64, twice_decrease=True),
# )

# branch1 = nn.Sequential(
#     nn.Conv2d(128, 256, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2), bias=False),
#     nn.BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
#     nn.ReLU(inplace=True),
#     nn.Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False),
#     nn.BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
#     nn.ReLU(inplace=True),
#     nn.AdaptiveAvgPool2d(output_size=(1, 1)),
#     Flatten(), #512
#     nn.LayerNorm((512)),
# )

# branch2 = nn.Sequential(
#     BasicBlock18(128),
#     BasicBlock18(128, twice_decrease=True),
#     BasicBlock18(256),
#     nn.AdaptiveAvgPool2d(output_size=(1, 1)),
#     Flatten(),
#     nn.LayerNorm((256)),
# )

# branch3 = nn.Sequential(
#     BasicBlock18(128, 3),
#     BasicBlock18(128, 3, twice_decrease=True),
#     BasicBlock18(256, 3, twice_decrease=True),
#     nn.AdaptiveAvgPool2d(output_size=(1, 1)),
#     Flatten(),
#     nn.LayerNorm((512)),
# )


# class MyModel(nn.Module):
#     def __init__(self):
#         super().__init__()
#         self.bs = branch_start
#         self.b1 = branch1
#         self.b2 = branch2
#         self.b3 = branch3
#         self.ll = nn.Linear(512+512+256, 200)
# #         self.b2 = branch2
#     def forward(self, input):
#         xs = self.bs(input)
#         x1 = self.b1(xs)
#         x2 = self.b2(xs)
#         x3 = self.b3(xs)
#         return self.ll(torch.cat((x1, x2, x3), 1))

# model = MyModel()

In [1076]:
#!g1.1
# RANDOMLY DEFINED MODEL FOR SEARCHING THE BEST ARCHITECTURE

class FindBlock(nn.Module):
    def __init__(self, chans, use_dropout=False, twice_decrease=False):
        super().__init__()
        self.block = None
        choice_list = ['resnet18'] #['resnet18', 'resnet50', 'googleblock']
        self.type_ = None
        if twice_decrease:
            self.type_ = 'resnet18'
        else:
            self.type_ = random.choice(choice_list)
        if self.type_ == 'resnet18':
            block_depth = random.randint(1, 4)
            self.type_ += '-' + str(block_depth)
            self.block = BasicBlock18(chans, block_depth=block_depth, use_dropout=use_dropout, twice_decrease=twice_decrease)
#         elif self.type_ == 'resnet50':
#             self.block = BasicBlock50(chans, use_dropout=use_dropout, twice_decrease=twice_decrease)
        elif self.type_ == 'googleblock':
            self.block = GoogleBlock(chans, use_dropout=use_dropout)
    def forward(self, input):
        return self.block(input)
    

class FindModel(nn.Module):
    def __init__(self, min_depth=4, max_depth=20, start_num_channes=32, num_downs=3, use_dropout=False):
        super().__init__()
        self.block_names = []
        self.start_num_channes = start_num_channes
        self.num_downs = num_downs
        self.depth = random.randint(min_depth, max_depth)
        downsample_indexes = set(random.sample(range(self.depth), num_downs))
        
        x = start_num_channes
        layers = [
            nn.Conv2d(3, x, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False), #
            nn.BatchNorm2d(x, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), 
            nn.ReLU()
        ]
        
        for i in range(self.depth):
            twice_decrease = False
            if i in downsample_indexes:
                twice_decrease = True
            block = FindBlock(x, use_dropout=use_dropout, twice_decrease=twice_decrease)
            layers.append(block)
            name = block.type_
            if twice_decrease:
                x *= 2
                name += ' x2'
            self.block_names.append(name)
        
        layers += [
            nn.AdaptiveAvgPool2d(output_size=(1, 1)),
            Flatten(),
            nn.Linear(x, 200)
        ]
        
        self.layers = nn.Sequential(*layers)
        
    def represent(self):
        return '\n'.join(self.block_names)
    
    def forward(self, input):
        return self.layers(input)
    

# model = FindModel()

In [1389]:
#!g1.1
# объявляем лоадеры

batch_size = 128
border = 99000

train_loader = torch.utils.data.DataLoader(dataset=TrainDataset(from_=0, to_=border), 
                                           batch_size=batch_size,
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=TrainDataset(from_=border, to_=100000), 
                                          batch_size=batch_size, 
                                          shuffle=True)


In [1398]:
#!g1.1
# Функции обучения
end_softmax = nn.LogSoftmax(dim=-1)

augmentation_list = [
                    transforms.ColorJitter(brightness=.5, hue=.3), transforms.RandomRotation(degrees=(0, 90)), transforms.RandomPosterize(2), 
                    transforms.RandomAdjustSharpness(2), transforms.RandomHorizontalFlip(), 
                    transforms.RandomPerspective(distortion_scale=0.6, p=1.0),
                    transforms.RandomPerspective(distortion_scale=0.6, p=1.0), transforms.RandomRotation(degrees=(0, 60)), transforms.RandomRotation(degrees=(0, 180)),
                    transforms.RandomCrop(size=40), transforms.CenterCrop(size=40), transforms.CenterCrop(size=40), transforms.RandomCrop(size=40),
                    transforms.RandomCrop(size=40), transforms.RandomCrop(size=40), transforms.RandomCrop(size=40), transforms.RandomCrop(size=40), 
                    ]


def batch_to_tensor(batch):
    norm = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    return torch.stack([norm(tens) for tens in batch]).cuda()


def apply_random_aug(batch):
    augmented_list = []
    for tens in batch:
        augment_applier = transforms.Compose([
            transforms.ToPILImage(),
            transforms.RandomChoice(augmentation_list),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        ])
        augmented_list.append(augment_applier(tens))        
    return torch.stack(augmented_list).cuda()


def train_epoch(model, optimizer, scheduler=None, batchsize=batch_size, use_aug=False, use_mixup=False):
    loss_log, acc_log = [], []
    model.train()
    saved_batch = None
    saved_labels = None

    for batch_num, (x_batch, y_batch) in enumerate(tqdm(train_loader)):
#         data = x_batch
        data = batch_to_tensor(x_batch)
        if use_aug:
            data = apply_random_aug(x_batch)
        target = y_batch
        optimizer.zero_grad()
        
        if use_mixup:
            acc_log.append(0.0)
            if batch_num % 2 == 0:
                saved_batch = data
                saved_labels = target
            elif batch_num != border // batch_size:
                l = np.random.rand()
                new_data = data * l + saved_batch * (1 - l)
                output = end_softmax(model(new_data))
                loss = F.nll_loss(output, target) * l + F.nll_loss(output, saved_labels) * (1 - l)
                loss.backward()
                optimizer.step()
                if scheduler:
                    scheduler.step()
                loss = loss.item()
                loss_log.append(loss)
        else:
            output = end_softmax(model(data))
            pred = torch.max(output, 1)[1]
            acc = torch.eq(pred, y_batch).float().mean()
            acc_log.append(acc)
            loss = F.nll_loss(output, target)#.cpu()
            loss.backward()
            optimizer.step()
            if scheduler:
                scheduler.step()
            loss = loss.item()
            loss_log.append(loss)
            
        
                
    return loss_log, acc_log    

def test(model, plato_scheduler=None):
    loss_log, acc_log = [], []
    model.eval()
    for batch_num, (x_batch, y_batch) in enumerate(tqdm(test_loader)):    
        data = batch_to_tensor(x_batch)
#         data = x_batch     
        target = y_batch

#         output = model(data)
        output = end_softmax(model(data))
        loss = F.nll_loss(output, target)#.cpu()

        pred = torch.max(output, 1)[1]
        acc = torch.eq(pred, y_batch).float().mean()
        acc_log.append(acc)
        
        loss = loss.item()
        loss_log.append(loss)
    if plato_scheduler:
        plato_scheduler.step(torch.mean(torch.tensor(loss_log)))
    return loss_log, acc_log




In [1421]:
#!g1.1
# Итоговое обучение

model = BestModel().cuda()

log_accs = []
log_tr_accs = []
opt2 = torch.optim.Adam(model.parameters(), lr=0.0005, weight_decay=1e-5)
for i in range(10):    
    use_aug=True
    print(i)
    train_loss, train_acc = train_epoch(model, opt2, scheduler=None, batchsize=batch_size, use_aug=use_aug)
    clear_output()
    val_loss, val_acc = test(model, plato_scheduler=None)
    acc = torch.mean(torch.tensor(val_acc)).item()
    log_accs.append(acc)
    log_tr_accs.append(torch.mean(torch.tensor(train_acc)).item())


opt2 = torch.optim.Adam(model.parameters(), lr=0.0005, weight_decay=1e-5)
cycle_scheduler = lr_scheduler.OneCycleLR(opt2, max_lr=0.0001, total_steps=30*(border//batch_size+1))
plato_scheduler = lr_scheduler.ReduceLROnPlateau(opt2, 'min', verbose=True, patience=2)
for i in range(30):
    use_aug=True
    print(10+i)
    train_loss, train_acc = train_epoch(model, opt2, scheduler=cycle_scheduler, batchsize=batch_size, use_aug=use_aug)
    clear_output()
    val_loss, val_acc = test(model, plato_scheduler=plato_scheduler)
    acc = torch.mean(torch.tensor(val_acc)).item()
    log_accs.append(acc)
    log_tr_accs.append(torch.mean(torch.tensor(train_acc)).item())
    print(acc)
    
opt1 = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9)
step_scheduler = lr_scheduler.StepLR(opt1, step_size=border//batch_size*20, gamma=0.1)
plato_scheduler = lr_scheduler.ReduceLROnPlateau(opt1, 'min', verbose=True, patience=10)
for i in range(80):
    print(40+i)
    use_mixup=False
    train_loss, train_acc = train_epoch(model, opt1, scheduler=step_scheduler, batchsize=batch_size, use_aug=True, use_mixup=use_mixup)
    clear_output()
    val_loss, val_acc = test(model, plato_scheduler=plato_scheduler)
    acc = torch.mean(torch.tensor(val_acc)).item()
    log_accs.append(acc)
    tr_acc = torch.mean(torch.tensor(train_acc)).item()
    lr = step_scheduler.get_last_lr()[0]
    print(acc )

In [1186]:
# #!g1.1
# # SEARCHING FOR OPTIMAL METHOD OF OPTIMIZATION

# model = BestModel().cuda()

# log_accs = []
# log_tr_accs = []
# opt2 = torch.optim.Adam(model.parameters(), lr=0.0005, weight_decay=1e-5)
# for i in range(10):    
#     use_aug=True
#     print(i)
#     train_loss, train_acc = train_epoch(model, opt2, scheduler=None, batchsize=32, use_aug=use_aug)
#     clear_output()
#     val_loss, val_acc = test(model, plato_scheduler=None)
#     acc = torch.mean(torch.tensor(val_acc)).item()
#     log_accs.append(acc)
#     log_tr_accs.append(torch.mean(torch.tensor(train_acc[-(90000//32):])).item())
# #     plt.plot(log_accs)
# #     plt.plot(log_tr_accs)
# #     plt.show()

# opt2 = torch.optim.Adam(model.parameters(), lr=0.0005, weight_decay=1e-5)
# cycle_scheduler = lr_scheduler.OneCycleLR(opt2, max_lr=0.0001, total_steps=20*90000//32)
# plato_scheduler = lr_scheduler.ReduceLROnPlateau(opt2, 'min', verbose=True, patience=2)
# for i in range(28):
#     use_aug=True
#     print(10+i)
#     train_loss, train_acc = train_epoch(model, opt2, scheduler=cycle_scheduler, batchsize=32, use_aug=use_aug)
#     clear_output()
#     val_loss, val_acc = test(model, plato_scheduler=plato_scheduler)
#     acc = torch.mean(torch.tensor(val_acc)).item()
#     log_accs.append(acc)
#     log_tr_accs.append(torch.mean(torch.tensor(train_acc[-(90000//32):])).item())
# #     plt.plot(log_accs)
# #     plt.plot(log_tr_accs)
# #     plt.show()
#     print(acc)


In [1346]:
#!g1.1
# b_model = BestModel()

# checkpoint = torch.load('model_best.pt')
# b_model.load_state_dict(checkpoint['model_state_dict'])
# # b_model2 = BestModel().load_state_dict(checkpoint['model_state_dict']).cuda()
# b_model = b_model.cuda()

In [1349]:
#!g1.1
def decr_rate(x):
    if x % (90000//32 // 2) == 0:
        print('decr')
        return 0.5
    return 1.0

# model = b_model
# opt1 = torch.optim.SGD(model.parameters(), lr=1e-4, momentum=0.9)
# opt3 = torch.optim.RMSprop(model.parameters(), lr=1e-3, weight_decay=1e-5)
# opt2 = torch.optim.Adam(model.parameters(), lr=1e-6, weight_decay=1e-5)
# cycle_scheduler = lr_scheduler.OneCycleLR(opt2, max_lr=1e-4, steps_per_epoch=90000//32+1, epochs=3)
# step_scheduler = lr_scheduler.StepLR(opt3, step_size=90000//32//2, gamma=0.8)
# cycle_scheduler2 = lr_scheduler.CyclicLR(opt2, base_lr=0.00001, max_lr=0.001, step_size_up=2*90000//32, cycle_momentum=False)
# cos_scheduler = lr_scheduler.CosineAnnealingLR(opt2, T_max=3*90000//32)
# plato_scheduler = lr_scheduler.ReduceLROnPlateau(opt2, 'min', verbose=True, patience=2)
# exp_scheduler = lr_scheduler.MultiplicativeLR(opt2, decr_rate)
# cos_scheduler = lr_scheduler.CosineAnnealingWarmRestarts(opt3, T_0=90000//32, T_mult=2)



# log_accs = []
# log_tr_accs = []

In [1080]:
#!g1.1
# SEARCHING FOR THE BEST ARCHITECTURE

# TURN_ON_DROPOUT = True

# NUM_TO_FIND = 100
# VAL_EPOCHS = 5
# best_model = None
# best_acc = 0
# best_loss = 100
# log_models = []
# log_accs = []
# for i in range(NUM_TO_FIND):
#     num_downs = random.choice([3, 4])
#     model = FindModel(min_depth=6, max_depth=12, start_num_channes=80, num_downs=num_downs, use_dropout=TURN_ON_DROPOUT).cuda()
#     opt2 = torch.optim.Adam(model.parameters(), lr=0.0005, weight_decay=1e-5)
#     loss, acc = train(model, opt2, VAL_EPOCHS, model_no=i)
#     if loss < best_loss:
#         best_model = model
#         best_acc = acc.item()
#         best_loss = loss
#     log_models.append(model.represent())
#     log_accs.append(acc)
# print(best_acc)
# print(best_model.represent())

In [1126]:
#!g1.1
# SEARCHING FOR BEST HYPERPARAMETRS

# VAL_EPOCHS = 5
# best_model = None
# best_acc = 0
# best_loss = 100

# best_params = None

# log_accs = []
# log_params = []
# eps = 1e-05
# for momentum in [0.2, 0.5]:
#     params = {'eps':eps, 'momentum':momentum, 'bias':False, 'p':0.3}
#     model = BestModel(params=params).cuda()
#     opt2 = torch.optim.Adam(model.parameters(), lr=0.0005, weight_decay=1e-5)
#     loss, acc = train(model, opt2, VAL_EPOCHS, model_no=str(params))
#     if loss < best_loss:
#         best_model = model
#         best_acc = acc.item()
#         best_loss = loss
#         best_params = params
#     log_accs.append(acc.item())
#     log_params.append(params)
# print(best_acc)
# print(best_params)

In [1425]:
#!g1.1
#оцениваем на тесте

def ind_to_path_test(a):
    s = str(a)
    return 'simple_image_classification/test/test_' + '0' * (5 - len(s)) + s + '.jpg'

model.eval()
# b_model.eval()
trans = transforms.ToTensor()
norm = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
preds = []
for i in tqdm(range(10000)):
    image = ind_to_path_test(i)
    im=Image.open(image)
    tens = trans(im)
    tens = norm(tens)
    tens = tens.reshape(1, *tens.shape)
#     output = end_softmax(b_model(tens.cuda()))
    output = end_softmax(model(tens.cuda()))
    pred = torch.max(output, 1)[1]
    preds.append(int(pred[0].item())) 

In [1426]:
#!g1.1
#оформляем
vf = np.vectorize(lambda x: 'test_' + '0' * (5 - len(str(x))) + str(x) + '.jpg')
df_ans = pd.DataFrame(vf(np.arange(10000)))
df_ans[1] = pd.Series(preds)
df_ans.columns = ['Id', 'Category']
df_ans['Category'] = df_ans.Category.apply(lambda x: '0' * (4 - len(str(x))) + str(x))
df_ans.set_index('Id').to_csv('labels_test.csv')

In [None]:
#!g1.1
