In [0]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
import os
import sys
import time
import pickle
import numpy as np
import pandas as pd

from tqdm.notebook import tqdm
from argparse import ArgumentParser

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models

from torch.utils import data
from torch.nn import functional as fnn
from torchvision import transforms

root_path = '/content/drive/My Drive/Colab Notebooks/made_cv/hw1/'
sys.path.append(root_path)

from hack_utils import Timer
from hack_utils import NUM_PTS
from hack_utils import ThousandLandmarksDataset
from hack_utils import restore_landmarks_batch, create_submission
from hack_utils import ScaleMinSideToSize, CropCenter, TransformByKeys

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [0]:
import zipfile


with zipfile.ZipFile(path_to_zip_file, 'r') as zip_ref:
    zip_ref.extractall(directory_to_extract_to)

In [0]:
!mkdir /content/drive/My\ Drive/Colab\ Notebooks/made_cv/hw1/test

In [0]:
!cp -r /content/drive/My\ Drive/Colab\ Notebooks/made_cv/hw1/train /content/drive/My\ Drive/Colab\ Notebooks/made_cv/hw1/c

cp: cannot access '/content/drive/My Drive/Colab Notebooks/made_cv/hw1/train/images': Input/output error


In [0]:
!cp -r /content/drive/My\ Drive/Colab\ Notebooks/made_cv/hw1/data/test /content/drive/My\ Drive/Colab\ Notebooks/made_cv/hw1/test

cp: cannot access '/content/drive/My Drive/Colab Notebooks/made_cv/hw1/data/test/images': Input/output error


# Реализуем вспомогательные функции

Обучение модели

In [0]:
def train(model, loader, loss_fn, optimizer, device):
    model.train()
    train_loss = []
    for batch in tqdm(loader, total=len(loader), desc="training..."):
        images = batch["image"].to(device)  # B x 3 x CROP_SIZE x CROP_SIZE
        landmarks = batch["landmarks"]  # B x (2 * NUM_PTS)

        pred_landmarks = model(images).cpu()  # B x (2 * NUM_PTS)
        loss = loss_fn(pred_landmarks, landmarks, reduction="mean")
        train_loss.append(loss.item())

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    return np.mean(train_loss)

Валидация модели

In [0]:
def validate(model, loader, loss_fn, device):
    model.eval()
    val_loss = []
    for batch in tqdm(loader, total=len(loader), desc="validation..."):
        images = batch["image"].to(device)
        landmarks = batch["landmarks"]

        with torch.no_grad():
            pred_landmarks = model(images).cpu()
        loss = loss_fn(pred_landmarks, landmarks, reduction="mean")
        val_loss.append(loss.item())

    return np.mean(val_loss)

Предсказание моделью

In [0]:
def predict(model, loader, device):
    model.eval()
    predictions = np.zeros((len(loader.dataset), NUM_PTS, 2))
    for i, batch in enumerate(tqdm(loader, total=len(loader), desc="test prediction...")):
        images = batch["image"].to(device)

        with torch.no_grad():
            pred_landmarks = model(images).cpu()
        pred_landmarks = pred_landmarks.numpy().reshape((len(pred_landmarks), NUM_PTS, 2))  # B x NUM_PTS x 2

        fs = batch["scale_coef"].numpy()  # B
        margins_x = batch["crop_margin_x"].numpy()  # B
        margins_y = batch["crop_margin_y"].numpy()  # B
        prediction = restore_landmarks_batch(pred_landmarks, fs, margins_x, margins_y)  # B x NUM_PTS x 2
        predictions[i * loader.batch_size: (i + 1) * loader.batch_size] = prediction

    return predictions

Создание названия эксперимента

In [0]:
def get_exp_name(train_params: dict, name: str = None) -> str:
    cur_time = time.strftime("%b_%d_%Y_%H:%M:%S", time.localtime())
    exp_name = f"{cur_time}__"
    if name is not None:
        exp_name += f"{name}_"
    exp_name += f"bs={train_params['batch_size']}_epochs={train_params['epochs']}_lr={train_params['lr']}"
    return exp_name

# Основная часть

In [0]:
data_dir = os.path.join(root_path, 'data')
CROP_SIZE = 224  # размер изображений во время обучения сети - feature extractor'а на ImageNet
train_params = {'batch_size': 512}

### Подготовим данные для обучения

Пайплайн препроцессинга изображений

In [0]:
train_transforms = transforms.Compose([
    ScaleMinSideToSize((CROP_SIZE, CROP_SIZE)),
    CropCenter(CROP_SIZE),
    TransformByKeys(transforms.ToPILImage(), ("image",)),
    TransformByKeys(transforms.ToTensor(), ("image",)),
    TransformByKeys(transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ("image",)),  # стандартные параметры нормализации для сетей, обученных на ImageNet
])

Загрузим данные для обучения и валидации

In [0]:
with Timer() as t:
    train_dataset = ThousandLandmarksDataset(os.path.join(data_dir, 'train'), train_transforms, split="train")
    train_dataloader = data.DataLoader(
        train_dataset, batch_size=train_params['batch_size'], num_workers=4, 
        pin_memory=True, shuffle=True, drop_last=True,
    )
    val_dataset = ThousandLandmarksDataset(os.path.join(data_dir, 'train'), train_transforms, split="val")
    val_dataloader = data.DataLoader(
        val_dataset, batch_size=train_params['batch_size'], num_workers=4, 
        pin_memory=True, shuffle=False, drop_last=False,
    )
print(f'Reading data took {t.interval:.03f} sec.')

Reading data took 722.456 sec.


## Эксперимент

Зададим параметры эксперимента

In [0]:
train_params.update({
    'epochs': 1,
    'lr': 1e-3,
    'gpu': True,
})
exp_name = get_exp_name(train_params=train_params, name='baseline')
print(exp_name)
exp_dir = os.path.join(root_path, exp_name)

Apr_18_2020_20:02:34__baseline_bs=512_epochs=1_lr=0.001


Создадим модель

In [0]:
device = torch.device("cuda: 0") if train_params['gpu'] else torch.device("cpu")
model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, 2 * NUM_PTS, bias=True)
model.to(device)

optimizer = optim.Adam(model.parameters(), lr=train_params['lr'], amsgrad=True)
loss_fn = fnn.mse_loss

Downloading: "https://download.pytorch.org/models/resnet18-5c106cde.pth" to /root/.cache/torch/checkpoints/resnet18-5c106cde.pth


HBox(children=(IntProgress(value=0, max=46827520), HTML(value='')))




Обучим и провалидируем модель

In [0]:
with Timer() as t:
    best_val_loss = np.inf
    for epoch in range(train_params['epochs']):
        train_loss = train(model, train_dataloader, loss_fn, optimizer, device=device)
        val_loss = validate(model, val_dataloader, loss_fn, device=device)
        print(f"Epoch #{epoch:2}:\ttrain loss: {train_loss:5.2}\tval loss: {val_loss:5.2}")
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            with open(os.path.join(exp_dir, "best_model.pth"), "wb") as fp:
                torch.save(model.state_dict(), fp)
print(f'Training model took {t.interval:.03f} sec.')

HBox(children=(IntProgress(value=0, description='training...', max=615, style=ProgressStyle(description_width=…

error: ignored

In [0]:
1

1

# Предсказание

Загрузим тестовые данные

In [0]:
with Timer() as t:
    test_dataset = ThousandLandmarksDataset(os.path.join(data_dir, 'test'), train_transforms, split="test")
    test_dataloader = data.DataLoader(
        test_dataset, batch_size=train_params['batch_size'], num_workers=4, 
        pin_memory=True, shuffle=False, drop_last=False,
    )
print(f'Reading data took {t.interval:.03f} sec.')

In [0]:
with Timer() as t:
    with open(os.path.join(exp_dir, "best_model.pth"), "rb") as fp:
        best_state_dict = torch.load(fp, map_location="cpu")
        model.load_state_dict(best_state_dict)

    test_predictions = predict(model, test_dataloader, device)
    with open(os.path.join(exp_dir, "test_predictions.pkl"), "wb") as fp:
        pickle.dump({
                "image_names": test_dataset.image_names,
                "landmarks": test_predictions
            }, 
            fp,
        )

    create_submission(data_dir, test_predictions, os.path.join(exp_dir, "submit.csv"))
print(f'Test prediction took {t.interval:.03f} sec.')