# Data loading from Kaggle to Colab

In [0]:
# Upload to Colab:

# kaggle.json

# dataset.py
# funcs.py
# train_validate_predict.py

# utils/train_indices.txt
# utils/val_indices.txt
# test_points.csv

In [0]:
# !ls -alh
# print()

!pip install kaggle
# print()

!pip install pretrainedmodels
# print()

!mkdir ~/.kaggle
# !ls -alh ~/
# print()

!cp kaggle.json ~/.kaggle/
# !ls -alh ~/.kaggle
# print()

!chmod 600 ~/.kaggle/kaggle.json
# !ls -alh ~/.kaggle
# print()

!kaggle datasets list --mine
# print()
!kaggle datasets download -d nikolaygurev/made-thousand-facial-landmarks-data
# print()

!unzip made-thousand-facial-landmarks-data.zip
# print()
!rm made-thousand-facial-landmarks-data.zip
# print()

In [0]:
# Move test_points.csv to data/test/test_points.csv

# Imports

In [0]:
import pickle
import time
from typing import NamedTuple

import numpy as np
import pretrainedmodels
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
from torch.nn import functional as fnn
from torch.utils import data
from torchvision import transforms

from dataset import (
    CROP_SIZE,
    NUM_PTS,
    CropCenter,
    ScaleMinSideToSize,
    ThousandLandmarksDataset,
    TransformByKeys,
)
from train_validate_predict import predict, train, validate
from funcs import create_submission, time_measurer

In [5]:
np.random.seed(0)
torch.manual_seed(0)

<torch._C.Generator at 0x7fbef3d57050>
























# Datasets loading

In [6]:
point_time = time.time()
print("Setting training parameters...")


class TrainingParameters(NamedTuple):
    name: str
    batch_size: int = 512
    train_frac: float = 1.0
    val_frac: float = 1.0
    gpu: bool = False


params = TrainingParameters(
    "resnet101_batch_64",
    batch_size=64,
    train_frac=1.0,
    val_frac=1.0,
    gpu=True,
)
print(params)

point_time = time_measurer(point_time)

Setting training parameters...
TrainingParameters(name='resnet101_batch_64', batch_size=64, train_frac=1.0, val_frac=1.0, gpu=True)
done in 0.001 seconds



In [7]:
print("Creating transforms composition...")
transforms_composition = transforms.Compose(
    [
        # Here: np.ndarray
        # shape=H x W x 3
        # 0 <= values <= 255, dtype=uint8
        
        ScaleMinSideToSize((CROP_SIZE, CROP_SIZE)),
        CropCenter(CROP_SIZE),
        # Here: np.ndarray
        # shape=CROP_SIZE x CROP_SIZE x 3 (H x W x 3)
        # 0 <= values <= 255, dtype=uint8
        
        # torchvision version 0.5 can work without ToPILImage transformation
#         TransformByKeys(transforms.ToPILImage(), ("image",)), 
#         # Here: PIL.Image.Image
#         # 0 <= values <= 255, dtype=int
        
        # ToTensor works the same regardless of whether ToPILImage is used
        TransformByKeys(transforms.ToTensor(), ("image",)),
        # Here: torch.Tensor
        # shape=3 x CROP_SIZE x CROP_SIZE (3 x H x W)
        # 0 <= values <= 1, dtype=torch.float32
        
        TransformByKeys(
            transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]), ("image",),
        ),
        # Here: torch.Tensor
        # shape=3 x CROP_SIZE x CROP_SIZE (3 x H x W)
        # -1 <= values <= 1, dtype=torch.float32
    ]
)

point_time = time_measurer(point_time)

print("Reading train dataset...")
train_dataset = ThousandLandmarksDataset(
    "data/train", transforms_composition, split="train", data_frac=params.train_frac
)


point_time = time_measurer(point_time)

print("Reading valid dataset...")
val_dataset = ThousandLandmarksDataset(
    "data/train", transforms_composition, split="val", data_frac=params.val_frac
)

point_time = time_measurer(point_time)

Creating transforms composition...
done in 3.0 seconds

Reading train dataset...


393931it [09:00, 728.41it/s]


done in 14 minutes, 13 seconds

Reading valid dataset...


393931it [02:16, 2894.86it/s]


done in 3 minutes, 37 seconds



In [8]:
print("Creating train dataloader...")
train_dataloader = data.DataLoader(
    train_dataset,
    batch_size=params.batch_size,
    num_workers=4,
    pin_memory=True,
    shuffle=True,
    drop_last=True,
)

point_time = time_measurer(point_time)

print("Creating valid dataloader...")
val_dataloader = data.DataLoader(
    val_dataset,
    batch_size=params.batch_size,
    num_workers=4,
    pin_memory=True,
    shuffle=False,
    drop_last=False,
)

point_time = time_measurer(point_time)

Creating train dataloader...
done in 0.014 seconds

Creating valid dataloader...
done in 0.0 seconds



# Model creating

In [0]:
# print(pretrainedmodels.model_names, "\n")
# print(pretrainedmodels.pretrained_settings["xception"])

In [0]:
# print("Creating model...")
# model = pretrainedmodels.__dict__["xception"](num_classes=1000, pretrained="imagenet")
# model.last_linear = nn.Linear(model.last_linear.in_features, 2 * NUM_PTS, bias=True)

# point_time = time_measurer(point_time)

In [11]:
print("Creating model...")
model = models.resnet101(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, 2 * NUM_PTS, bias=True)

point_time = time_measurer(point_time)

Creating model...


Downloading: "https://download.pytorch.org/models/resnet101-5d3b4d8f.pth" to /root/.cache/torch/checkpoints/resnet101-5d3b4d8f.pth


HBox(children=(FloatProgress(value=0.0, max=178728960.0), HTML(value='')))


done in 4.7 seconds



In [12]:
print("Moving model to device...")
device = torch.device("cuda: 0") if params.gpu else torch.device("cpu")
print(f"device: {device}")
model.to(device)

point_time = time_measurer(point_time)

Moving model to device...
device: cuda:0
done in 10.0 seconds



# Model training

In [13]:
print("Creating optimizer and loss function...")
optimizer = optim.Adam(model.parameters(), lr=0.001, amsgrad=True)
loss_fn = fnn.mse_loss
best_val_loss = np.inf

point_time = time_measurer(point_time)

Creating optimizer and loss function...
done in 0.01 seconds



In [0]:
def train_and_validate(epoch_range, optimizer_, best_val_loss=None):
    for epoch in epoch_range:
        train_loss = train(model, train_dataloader, loss_fn, optimizer_, device=device)
        val_loss = validate(model, val_dataloader, loss_fn, device=device)
        train_loss = round(train_loss, 3)
        val_loss = round(val_loss, 3)
        print(
            f"\nEpoch #{epoch:2}:\ttrain loss: {train_loss:5}\tval loss: {val_loss:5}\n"
        )

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            with open(f"{params.name}_epoch_{epoch}_best.pth", "wb") as fp:
                torch.save(model.state_dict(), fp)
    
    return best_val_loss

In [15]:
best_val_loss = train_and_validate(range(20), optimizer, best_val_loss)

training...: 100%|██████████| 4927/4927 [24:26<00:00,  3.36it/s]
validation...: 100%|██████████| 1228/1228 [02:24<00:00,  8.51it/s]



Epoch # 0:	train loss: 21.237	val loss: 2.923



training...: 100%|██████████| 4927/4927 [24:14<00:00,  3.39it/s]
validation...: 100%|██████████| 1228/1228 [02:13<00:00,  9.19it/s]



Epoch # 1:	train loss: 2.837	val loss: 2.496



training...: 100%|██████████| 4927/4927 [24:03<00:00,  3.41it/s]
validation...: 100%|██████████| 1228/1228 [02:07<00:00,  9.61it/s]



Epoch # 2:	train loss: 2.365	val loss: 2.172



training...: 100%|██████████| 4927/4927 [24:02<00:00,  3.42it/s]
validation...: 100%|██████████| 1228/1228 [02:07<00:00,  9.62it/s]



Epoch # 3:	train loss: 2.045	val loss: 2.057



training...: 100%|██████████| 4927/4927 [24:03<00:00,  3.41it/s]
validation...: 100%|██████████| 1228/1228 [02:07<00:00,  9.66it/s]



Epoch # 4:	train loss: 1.872	val loss: 1.883



training...: 100%|██████████| 4927/4927 [24:04<00:00,  3.41it/s]
validation...: 100%|██████████| 1228/1228 [02:07<00:00,  9.65it/s]



Epoch # 5:	train loss: 1.747	val loss: 1.811



training...: 100%|██████████| 4927/4927 [24:04<00:00,  3.41it/s]
validation...: 100%|██████████| 1228/1228 [02:07<00:00,  9.63it/s]



Epoch # 6:	train loss: 1.643	val loss:  1.71



training...: 100%|██████████| 4927/4927 [24:04<00:00,  3.41it/s]
validation...: 100%|██████████| 1228/1228 [02:07<00:00,  9.63it/s]



Epoch # 7:	train loss: 1.555	val loss: 1.699



training...: 100%|██████████| 4927/4927 [24:03<00:00,  3.41it/s]
validation...: 100%|██████████| 1228/1228 [02:07<00:00,  9.59it/s]
training...:   0%|          | 0/4927 [00:00<?, ?it/s]


Epoch # 8:	train loss: 1.476	val loss: 1.722



training...: 100%|██████████| 4927/4927 [24:04<00:00,  3.41it/s]
validation...: 100%|██████████| 1228/1228 [02:08<00:00,  9.59it/s]



Epoch # 9:	train loss:  1.41	val loss: 1.647



training...: 100%|██████████| 4927/4927 [24:03<00:00,  3.41it/s]
validation...: 100%|██████████| 1228/1228 [02:07<00:00,  9.65it/s]
training...:   0%|          | 0/4927 [00:00<?, ?it/s]


Epoch #10:	train loss: 1.354	val loss: 1.655



training...: 100%|██████████| 4927/4927 [24:03<00:00,  3.41it/s]
validation...: 100%|██████████| 1228/1228 [02:07<00:00,  9.61it/s]



Epoch #11:	train loss: 1.295	val loss: 1.631



training...: 100%|██████████| 4927/4927 [24:03<00:00,  3.41it/s]
validation...: 100%|██████████| 1228/1228 [02:07<00:00,  9.60it/s]
training...:   0%|          | 0/4927 [00:00<?, ?it/s]


Epoch #12:	train loss: 1.232	val loss: 1.697



training...: 100%|██████████| 4927/4927 [24:02<00:00,  3.41it/s]
validation...: 100%|██████████| 1228/1228 [02:07<00:00,  9.64it/s]
training...:   0%|          | 0/4927 [00:00<?, ?it/s]


Epoch #13:	train loss: 1.187	val loss: 1.662



training...: 100%|██████████| 4927/4927 [24:03<00:00,  3.41it/s]
validation...: 100%|██████████| 1228/1228 [02:08<00:00,  9.55it/s]
training...:   0%|          | 0/4927 [00:00<?, ?it/s]


Epoch #14:	train loss: 1.101	val loss: 1.642



training...: 100%|██████████| 4927/4927 [24:04<00:00,  3.41it/s]
validation...: 100%|██████████| 1228/1228 [02:07<00:00,  9.60it/s]



Epoch #15:	train loss:  1.03	val loss: 1.605



training...: 100%|██████████| 4927/4927 [24:02<00:00,  3.41it/s]
validation...: 100%|██████████| 1228/1228 [02:08<00:00,  9.59it/s]



Epoch #16:	train loss: 0.968	val loss:  1.56



training...: 100%|██████████| 4927/4927 [24:05<00:00,  3.41it/s]
validation...: 100%|██████████| 1228/1228 [02:08<00:00,  9.53it/s]



Epoch #17:	train loss: 0.894	val loss: 1.538



training...: 100%|██████████| 4927/4927 [24:05<00:00,  3.41it/s]
validation...: 100%|██████████| 1228/1228 [02:07<00:00,  9.62it/s]
training...:   0%|          | 0/4927 [00:00<?, ?it/s]


Epoch #18:	train loss: 0.837	val loss: 1.544



training...: 100%|██████████| 4927/4927 [24:03<00:00,  3.41it/s]
validation...: 100%|██████████| 1228/1228 [02:08<00:00,  9.58it/s]


Epoch #19:	train loss: 0.791	val loss:  1.54






In [0]:
best_val_loss = train_and_validate(range(20, 40), optimizer, best_val_loss)

# Test dataset prediction

In [0]:
def make_test_prediction(model_state_file):
    test_dataset = ThousandLandmarksDataset(
        "data/test", transforms_composition, split="test", data_frac=1.0
    )
    test_dataloader = data.DataLoader(
        test_dataset,
        batch_size=params.batch_size,
        num_workers=4,
        pin_memory=True,
        shuffle=False,
        drop_last=False,
    )

    with open(model_state_file, "rb") as fp:
        best_state_dict = torch.load(fp, map_location="cpu")
        print(model.load_state_dict(best_state_dict))

    test_predictions = predict(model, test_dataloader, device)

    with open(f"{params.name}_test_predictions.pkl", "wb") as fp:
        pickle.dump(
            {"image_names": test_dataset.image_names, "landmarks": test_predictions}, fp
        )

    create_submission("data", test_predictions, f"{params.name}_submit.csv")

In [0]:
make_test_prediction(f"{params.name}_epoch_19_best.pth")