In [2]:
import torch
from torch import nn
import torchvision
from torchvision import transforms
from torch.utils.data import Dataset
from torchvision.models import inception_v3, Inception_V3_Weights

from PIL import Image
import os

from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np

import time
from tqdm import tqdm

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
!unzip "/content/drive/MyDrive/rus_art_hack/train.zip"

Archive:  /content/drive/MyDrive/rus_art_hack/train.zip
replace train/809e649d6b464b44a956c41c5f3bb5c0.jpeg? [y]es, [n]o, [A]ll, [N]one, [r]ename: 

In [5]:
#from torchsummary import summary
#mod = efficientnet_b3()
#mod = efficientnet_b2()
#summary(mod, (3, 224, 224))
mod = torchvision.models.inception_v3()
mod



Inception3(
  (Conv2d_1a_3x3): BasicConv2d(
    (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_2a_3x3): BasicConv2d(
    (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_2b_3x3): BasicConv2d(
    (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (Conv2d_3b_1x1): BasicConv2d(
    (conv): Conv2d(64, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(80, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_4a_3x3): BasicConv2d(
    (conv): Conv2d(80, 192, kernel_size=(3, 3), stri

In [6]:
#EfficientNet_B3_Weights.IMAGENET1K_V1.transforms()
#EfficientNet_B2_Weights.IMAGENET1K_V1.transforms()
from torchvision.models import Inception_V3_Weights
Inception_V3_Weights.IMAGENET1K_V1.transforms()

ImageClassification(
    crop_size=[299]
    resize_size=[342]
    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]
    interpolation=InterpolationMode.BILINEAR
)

In [7]:
def init_model(device, num_classes):
    pretrained_weights = torchvision.models.Inception_V3_Weights.IMAGENET1K_V1
    model = torchvision.models.inception_v3(weights=pretrained_weights)
    set_requires_grad(model, False)
    model.fc = nn.Sequential(
        nn.Linear(2048, 1000),
        nn.ReLU(),
        nn.Linear(in_features=1000, out_features=num_classes),
        nn.Softmax(dim=1)
    )
    model = model.to(device)
    return model

In [58]:
init_model(device="cpu", num_classes=35)

2
3


Inception3(
  (Conv2d_1a_3x3): BasicConv2d(
    (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_2a_3x3): BasicConv2d(
    (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_2b_3x3): BasicConv2d(
    (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (Conv2d_3b_1x1): BasicConv2d(
    (conv): Conv2d(64, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(80, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_4a_3x3): BasicConv2d(
    (conv): Conv2d(80, 192, kernel_size=(3, 3), stri

In [8]:
def set_requires_grad(model, value=False):
    for param in model.parameters():
        param.requires_grad = value

In [9]:
def train_model(model, dataloaders, criterion, optimizer,
                phases, num_epochs=3):

    scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9, verbose=True)
    start_time = time.time()

    acc_history = {k: list() for k in phases}
    loss_history = {k: list() for k in phases}

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in phases:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()  # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            n_batches = len(dataloaders[phase])
            for inputs, labels in tqdm(dataloaders[phase], total=n_batches):
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)

                    _, preds = torch.max(outputs, 1)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            if phase == 'train':
              scheduler.step()

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double()
            epoch_acc /= len(dataloaders[phase].dataset)

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss,
                                                       epoch_acc))
            loss_history[phase].append(epoch_loss)
            acc_history[phase].append(epoch_acc)

        print()

    time_elapsed = time.time() - start_time
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60,
                                                        time_elapsed % 60))

    return model, acc_history

In [10]:
class ArtDataset(Dataset):
    def __init__(self, root_dir, csv_path=None, transform=None):

        self.transform = transform
        self.files = [os.path.join(root_dir, fname) for fname in os.listdir(root_dir)]
        self.targets = None
        if csv_path:
            df = pd.read_csv(csv_path, sep="\t")
            self.targets = df["label_id"].tolist()
            self.files = [os.path.join(root_dir, fname) for fname in df["image_name"].tolist()]

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        image = Image.open(self.files[idx]).convert('RGB')
        target = self.targets[idx] if self.targets else -1
        if self.transform:
            image = self.transform(image)
        return image, target

In [13]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
#device = "cpu"
print(device)

cuda:0


In [11]:
#if __name__ == "__main__":
def main():
    img_size = 224

    pretrained_weights = torchvision.models.Inception_V3_Weights.IMAGENET1K_V1
    trans = pretrained_weights.transforms()

    dset = ArtDataset(TRAIN_DATASET, TRAIN_CSV, trans)
    labels = dset.targets
    indices = list(range(len(labels)))
    ind_train, ind_test, _, _ = train_test_split(indices, labels, test_size=0.2, random_state=139, stratify=labels)

    trainset = torch.utils.data.Subset(dset, ind_train)
    testset = torch.utils.data.Subset(dset, ind_test)

    batch_size = 120
    num_workers = 2
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                              shuffle=True, num_workers=num_workers)

    testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                             shuffle=False, num_workers=num_workers)

    loaders = {'train': trainloader, 'val': testloader}

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    #device = "cpu"

    model = init_model(device, num_classes=35)

    # pretrain_optimizer = torch.optim.Adam(params=model.parameters(), #.classifier[3].parameters(),
    #                                      lr=0.001)

    train_optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

    criterion = nn.CrossEntropyLoss()

    # Pretrain
    # запустить предобучение модели на две эпохи
    # pretrain_results = train_model(model, loaders, criterion, pretrain_optimizer,
    #                                phases=['train', 'val'], num_epochs=3)

    # Train
    # запустить дообучение модели
    set_requires_grad(model, True)
    train_results = train_model(model, loaders, criterion, train_optimizer,
                                phases=['train', 'val'], num_epochs=15)

    torch.save(model.state_dict(), MODEL_WEIGHTS)

In [14]:
# hardcode
MODEL_WEIGHTS = "/content/baseline.pt"
TRAIN_DATASET = "/content/train"
TRAIN_CSV = "/content/train.csv"

main()

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 0/14
----------


  0%|          | 0/58 [00:09<?, ?it/s]


TypeError: cross_entropy_loss(): argument 'input' (position 1) must be Tensor, not InceptionOutputs

In [None]:
def make_submission():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model = init_model(device, num_classes=35)
    model.load_state_dict(torch.load(MODEL_WEIGHTS))
    model.eval()

    img_size = 224
    # trans = transforms.Compose([
    #     transforms.Resize((img_size, img_size)),
    #     transforms.ToTensor(),
    #     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

    pretrained_vit_weights = torchvision.models.ViT_B_16_Weights.IMAGENET1K_V1
    trans = pretrained_vit_weights.transforms()

    dset = ArtDataset(TEST_DATASET, transform=trans)
    batch_size = 16
    num_workers = 4
    testloader = torch.utils.data.DataLoader(dset, batch_size=batch_size,
                                            shuffle=False, num_workers=num_workers)

    all_image_names = [item.split("/")[-1] for item in dset.files]
    all_preds = []
    model = model.eval()
    with torch.no_grad():
        for idx, (images, _) in enumerate(testloader, 0):
            images = images.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy().tolist())

    with open(SUBMISSION_PATH, "w") as f:
        f.write("image_name\tlabel_id\n")
        for name, cl_id in zip(all_image_names, all_preds):
            f.write(f"{name}\t{cl_id}\n")

In [None]:
MODEL_WEIGHTS = "/content/baseline.pt"
TEST_DATASET = "/content/train/"
SUBMISSION_PATH = "/content/submission.csv"

In [None]:
make_submission()

