# Import everything


In [1]:
from typing import Tuple
from os.path import exists
import torch
import numpy as np
from tqdm.notebook import tqdm
from torch.utils.data import DataLoader
from settings import cfg
from helper import loader
import export_result


# Load everything

In [2]:
config = cfg.TrainConfig()


Param size: 512.227MB
Buffer size: 0.032MB
Stats:
        | Number of not-punching: 34006
        | Number of punching: 6355


# Train

In [3]:
from sklearn.metrics import f1_score


def train(train_idx: np.ndarray) -> 'Tuple(float, float)':
    config.model.train()
    train_dataloader = config.get_dataloader(train_idx)
    total_loss_train = 0

    prediction_array = []
    label_array = []
    for image, label in tqdm(train_dataloader):
        image = image.to(config.device, dtype=torch.float)
        image = config.transforms(image)
        output = config.model(image)

        label = label.to(config.device, dtype=torch.uint8)
        batch_loss = config.criterion(output, label)
        total_loss_train += batch_loss.item()

        prediction = output.argmax(dim=1)

        prediction_array.append(prediction.cpu().numpy())
        label_array.append(label.cpu().numpy())

        config.optimizer.zero_grad()
        batch_loss.backward()
        config.optimizer.step()
        config.model.zero_grad()

    prediction_array = np.concatenate(prediction_array)
    label_array = np.concatenate(label_array)

    total_accumulate_train = (prediction_array == label_array).sum().item()
    f1_score_train = f1_score(label_array, prediction_array, average='macro')

    return (total_loss_train/train_idx.shape[0],
        total_accumulate_train/train_idx.shape[0],
        f1_score_train)


In [4]:
def judge(judge_idx: np.ndarray) -> 'Tuple(float, float)':
    config.model.eval()
    judge_dataloader = config.get_dataloader(judge_idx)
    total_loss_judge = 0

    prediction_array = []
    label_array = []
    with torch.no_grad():
        for image, label in tqdm(judge_dataloader):
            image = image.to(config.device, dtype=torch.float)
            output = config.model(image)
            label = label.to(config.device, dtype=torch.uint8)

            batch_loss = config.criterion(output, label)
            total_loss_judge += batch_loss.item()

            prediction = output.argmax(dim=1)
            prediction_array.append(prediction.cpu().numpy())
            label_array.append(label.cpu().numpy())

    prediction_array = np.concatenate(prediction_array)
    label_array = np.concatenate(label_array)

    total_accumulate_judge = (prediction_array == label_array).sum().item()
    f1_score_judge = f1_score(label_array, prediction_array, average='macro')

    return (total_loss_judge/judge_idx.shape[0],
        total_accumulate_judge/judge_idx.shape[0],
        f1_score_judge)

In [5]:
min_judge_loss = float('inf')
last_submit = 0

print("Starting phase 1")

for epoch, (train_idx, judge_idx) in enumerate(config.get_split()):
    print(f'''
Starting epoch {epoch+1}
    | Train size: {train_idx.shape[0]}
    | Judge size: {judge_idx.shape[0]}''', end="")
    avg_loss_train, avg_accumulate_train, f1_score_train = train(train_idx)
    avg_loss_judge, avg_accumulate_judge, f1_score_judge = judge(judge_idx)

    print(
        f'''
Epoch: {epoch+1} 
    | Train Loss: {avg_loss_train:.3f}
    | Train Accuracy: {avg_accumulate_train:.3f}
    | Train F1 Score: {f1_score_train:.3f}
    _____________________________________________
    | Judge Loss: {avg_loss_judge:.3f}
    | Judge Accuracy: {avg_accumulate_judge:.3f}
    | Judge F1 Score: {f1_score_judge:.3f}''', end="")
    
    if last_submit == 0:
        last_submit = f1_score_judge
    elif f1_score_judge - last_submit > 0.05:
        last_submit = f1_score_judge
        print(f'''
Submitting:
    | F1 Score: {f1_score_judge:.3f}
    | Last Submit: {last_submit:.3f}''', end="")
        export_result.submit(config)
    else:
        print(f'''
Not submitting:
    | F1 Score: {f1_score_judge:.3f}
    | Last Submit: {last_submit:.3f}''', end="")

    if min_judge_loss > avg_loss_judge:
        config.save_checkpoint()
        print(f'''
Judge loss improved:
    | From: {min_judge_loss:.3f}
    | To: {avg_loss_judge:.3f}''', end="")

        min_judge_loss = avg_loss_judge
        under_min = 0
    else:
        under_min += 1
        if under_min > cfg.early_stop:
            print(f'''
Early stop. Not improved for {under_min} epochs.''', end="")
            config.load_best()
            print(f'''
Best model loaded.''', end="")
            under_min = 0


Starting phase 1

Starting epoch 1
    | Train size: 36324
    | Judge size: 4037

  0%|          | 0/1136 [00:00<?, ?it/s]