# Get the dataset

In [1]:
from typing import Tuple
import numpy as np
import config

dataset_image = np.load(config.x_path, mmap_mode="r")
dataset_label = np.load(config.y_path, mmap_mode="r")
print(dataset_image.shape, dataset_label.shape)
assert(dataset_image.shape[0] == dataset_label.shape[0])
print(f'''Stats:
    | Number of punching: {dataset_label.sum()}
    | Number of not-punching: {dataset_label.shape[0] - dataset_label.sum()}''')


  from .autonotebook import tqdm as notebook_tqdm


(73058, 3, 224, 224) (73058,)
Stats:
    | Number of not-punching: 38292
    | Number of punching: 34766


# Boilerplate Code

In [2]:
from sklearn.model_selection import RepeatedKFold
import config

kfold = RepeatedKFold(
    n_splits=config.kfold_nsplits,
    n_repeats=config.kfold_nrepeats,
    random_state=config.seed
)


In [3]:
import torch
class Data(torch.utils.data.Dataset):
    def __init__(self, image: np.ndarray, label: np.ndarray, indices: np.ndarray) -> None:
        assert(image.shape[0] == label.shape[0])
        self.image = image
        self.label = label
        self.indices = indices
    
    def __len__(self) -> int:
        return self.indices.shape[0]
    
    def __getitem__(self, idx: int) -> 'Tuple[np.ndarray, bool]':
        idx = self.indices[idx]
        return self.image[idx], self.label[idx]


# Model

In [4]:
from torch import nn


In [5]:
def print_model(model: nn.Module):
    param_size = 0
    for param in model.parameters():
        param_size += param.nelement() * param.element_size()
    buffer_size = 0
    for buffer in model.buffers():
        buffer_size += buffer.nelement() * buffer.element_size()
    
    param_size  = param_size / 1024**2
    buffer_size = buffer_size / 1024**2
    print(f'Param size: {param_size:.3f}MB')
    print(f'Buffer size: {buffer_size:.3f}MB')

def print_tensor(tensor: torch.Tensor):
    size_gb = tensor.element_size() * tensor.nelement() / (1<<30)
    print(f"{size_gb:.3f}GB")



In [6]:
model = config.get_model()

use_cuda = torch.cuda.is_available()
if not use_cuda:
    print("CUDA not used!")
device = torch.device("cuda" if use_cuda else "cpu")

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters())

model = model.to(device)
criterion = criterion.to(device)


# Train

In [7]:
from tqdm import tqdm
def train(train_idx: np.ndarray) -> 'Tuple(float, float)':
    train = Data(dataset_image, dataset_label, train_idx)
    train_dataloader = torch.utils.data.DataLoader(
        train, batch_size=config.batch_size)
    total_loss_train = 0
    total_accumulate_train = 0
    for image, label in tqdm(train_dataloader):
        image = image.to(device, dtype=torch.float)
        label = label.to(device, dtype=torch.uint8)
        
        output = model(image)
        batch_loss = criterion(output, label)

        total_loss_train += batch_loss

        accumulate = (output.argmax(dim=1) == label).sum()
        total_accumulate_train += accumulate

        optimizer.zero_grad()
        batch_loss.backward()
        optimizer.step()

    total_loss_train = total_loss_train.item()
    total_accumulate_train = total_accumulate_train.item()
    return (total_loss_train, total_accumulate_train)


In [8]:
def judge(judge_idx: np.ndarray) -> 'Tuple(float, float)':
    judge = Data(dataset_image, dataset_label, judge_idx)
    judge_dataloader = torch.utils.data.DataLoader(
        judge, batch_size=config.batch_size)

    total_loss_judge = 0
    total_accumulate_judge = 0
    with torch.no_grad():
        for image, label in tqdm(judge_dataloader):
            image = image.to(device, dtype=torch.float)
            label = label.to(device, dtype=torch.uint8)

            output = model(image)

            batch_loss = criterion(output, label)
            total_loss_judge += batch_loss

            accumulate = (output.argmax(dim=1) == label).sum()
            total_accumulate_judge += accumulate

    total_loss_judge = total_loss_judge.item()
    total_accumulate_judge = total_accumulate_judge.item()
    return total_loss_judge, total_accumulate_judge

In [9]:
min_judge_loss = float('inf')


for epoch, (train_idx, judge_idx) in enumerate(kfold.split(dataset_label)):
    print(f'''Starting epoch {epoch+1}
        | Train size: {train_idx.shape[0]}
        | Judge size: {judge_idx.shape[0]}''')
    total_loss_train, total_accumulate_train = train(train_idx)
    total_loss_judge, total_accumulate_judge = judge(judge_idx)

    print(
        f'''Epoch: {epoch+1} 
        | Train Loss: {total_loss_train / len(train_idx):.3f}
        | Train Accuracy: {total_accumulate_train/len(train_idx):.3f}
        | Val Loss: {total_loss_judge/len(judge_idx):.3f}
        | Val Accuracy: {total_accumulate_judge/len(judge_idx):.3f}'''
        
    )
    if min_judge_loss > total_loss_judge/len(judge_idx):
        min_judge_loss = total_loss_judge/len(judge_idx)
        torch.save(model.state_dict(), config.model_path)
        print(f"Save model because val loss improve loss {min_judge_loss:.3f}")

Starting epoch 1
        | Train size: 65752
        | Judge size: 7306


  return default_collate([torch.as_tensor(b) for b in batch])
100%|██████████| 257/257 [02:45<00:00,  1.55it/s]
100%|██████████| 29/29 [00:08<00:00,  3.43it/s]


Epoch: 1 
        | Train Loss: 0.001
        | Train Accuracy: 0.895
        | Val Loss: 0.193
        | Val Accuracy: 0.528
Save model because val loss improve loss 0.193
Starting epoch 2
        | Train size: 65752
        | Judge size: 7306


100%|██████████| 257/257 [04:09<00:00,  1.03it/s]
100%|██████████| 29/29 [00:09<00:00,  3.04it/s]


Epoch: 2 
        | Train Loss: 0.004
        | Train Accuracy: 0.877
        | Val Loss: 0.181
        | Val Accuracy: 0.518
Save model because val loss improve loss 0.181
Starting epoch 3
        | Train size: 65752
        | Judge size: 7306


100%|██████████| 257/257 [04:18<00:00,  1.00s/it]
100%|██████████| 29/29 [00:09<00:00,  2.99it/s]


Epoch: 3 
        | Train Loss: 0.004
        | Train Accuracy: 0.870
        | Val Loss: 0.128
        | Val Accuracy: 0.521
Save model because val loss improve loss 0.128
Starting epoch 4
        | Train size: 65752
        | Judge size: 7306


100%|██████████| 257/257 [04:16<00:00,  1.00it/s]
100%|██████████| 29/29 [00:09<00:00,  3.20it/s]


Epoch: 4 
        | Train Loss: 0.004
        | Train Accuracy: 0.867
        | Val Loss: 0.098
        | Val Accuracy: 0.530
Save model because val loss improve loss 0.098
Starting epoch 5
        | Train size: 65752
        | Judge size: 7306


100%|██████████| 257/257 [04:15<00:00,  1.01it/s]
100%|██████████| 29/29 [00:09<00:00,  3.09it/s]


Epoch: 5 
        | Train Loss: 0.004
        | Train Accuracy: 0.851
        | Val Loss: 0.081
        | Val Accuracy: 0.516
Save model because val loss improve loss 0.081
Starting epoch 6
        | Train size: 65752
        | Judge size: 7306


100%|██████████| 257/257 [04:14<00:00,  1.01it/s]
100%|██████████| 29/29 [00:09<00:00,  3.13it/s]


Epoch: 6 
        | Train Loss: 0.003
        | Train Accuracy: 0.863
        | Val Loss: 0.027
        | Val Accuracy: 0.525
Save model because val loss improve loss 0.027
Starting epoch 7
        | Train size: 65752
        | Judge size: 7306


100%|██████████| 257/257 [04:15<00:00,  1.01it/s]
100%|██████████| 29/29 [00:09<00:00,  3.13it/s]


Epoch: 7 
        | Train Loss: 0.002
        | Train Accuracy: 0.831
        | Val Loss: 0.027
        | Val Accuracy: 0.530
Starting epoch 8
        | Train size: 65752
        | Judge size: 7306


100%|██████████| 257/257 [04:16<00:00,  1.00it/s]
100%|██████████| 29/29 [00:09<00:00,  3.09it/s]


Epoch: 8 
        | Train Loss: 0.002
        | Train Accuracy: 0.842
        | Val Loss: 0.018
        | Val Accuracy: 0.529
Save model because val loss improve loss 0.018
Starting epoch 9
        | Train size: 65753
        | Judge size: 7305


100%|██████████| 257/257 [04:15<00:00,  1.00it/s]
100%|██████████| 29/29 [00:09<00:00,  3.10it/s]


Epoch: 9 
        | Train Loss: 0.002
        | Train Accuracy: 0.837
        | Val Loss: 0.038
        | Val Accuracy: 0.528
Starting epoch 10
        | Train size: 65753
        | Judge size: 7305


100%|██████████| 257/257 [03:43<00:00,  1.15it/s]
100%|██████████| 29/29 [00:09<00:00,  3.10it/s]


Epoch: 10 
        | Train Loss: 0.003
        | Train Accuracy: 0.835
        | Val Loss: 0.015
        | Val Accuracy: 0.516
Save model because val loss improve loss 0.015
Starting epoch 11
        | Train size: 65752
        | Judge size: 7306


100%|██████████| 257/257 [03:40<00:00,  1.16it/s]
100%|██████████| 29/29 [00:09<00:00,  3.09it/s]


Epoch: 11 
        | Train Loss: 0.002
        | Train Accuracy: 0.806
        | Val Loss: 0.011
        | Val Accuracy: 0.521
Save model because val loss improve loss 0.011
Starting epoch 12
        | Train size: 65752
        | Judge size: 7306


100%|██████████| 257/257 [03:40<00:00,  1.16it/s]
100%|██████████| 29/29 [00:09<00:00,  2.99it/s]


Epoch: 12 
        | Train Loss: 0.002
        | Train Accuracy: 0.759
        | Val Loss: 0.006
        | Val Accuracy: 0.525
Save model because val loss improve loss 0.006
Starting epoch 13
        | Train size: 65752
        | Judge size: 7306


100%|██████████| 257/257 [03:39<00:00,  1.17it/s]
100%|██████████| 29/29 [00:09<00:00,  3.06it/s]


Epoch: 13 
        | Train Loss: 0.003
        | Train Accuracy: 0.534
        | Val Loss: 0.003
        | Val Accuracy: 0.531
Save model because val loss improve loss 0.003
Starting epoch 14
        | Train size: 65752
        | Judge size: 7306


100%|██████████| 257/257 [03:39<00:00,  1.17it/s]
100%|██████████| 29/29 [00:09<00:00,  2.97it/s]


Epoch: 14 
        | Train Loss: 0.003
        | Train Accuracy: 0.486
        | Val Loss: 0.003
        | Val Accuracy: 0.517
Save model because val loss improve loss 0.003
Starting epoch 15
        | Train size: 65752
        | Judge size: 7306


100%|██████████| 257/257 [03:38<00:00,  1.17it/s]
100%|██████████| 29/29 [00:09<00:00,  3.14it/s]


Epoch: 15 
        | Train Loss: 0.003
        | Train Accuracy: 0.478
        | Val Loss: 0.003
        | Val Accuracy: 0.523
Save model because val loss improve loss 0.003
Starting epoch 16
        | Train size: 65752
        | Judge size: 7306


100%|██████████| 257/257 [03:39<00:00,  1.17it/s]
100%|██████████| 29/29 [00:09<00:00,  3.03it/s]


Epoch: 16 
        | Train Loss: 0.003
        | Train Accuracy: 0.477
        | Val Loss: 0.003
        | Val Accuracy: 0.512
Starting epoch 17
        | Train size: 65752
        | Judge size: 7306


100%|██████████| 257/257 [03:39<00:00,  1.17it/s]
100%|██████████| 29/29 [00:09<00:00,  3.02it/s]


Epoch: 17 
        | Train Loss: 0.003
        | Train Accuracy: 0.472
        | Val Loss: 0.003
        | Val Accuracy: 0.532
Save model because val loss improve loss 0.003
Starting epoch 18
        | Train size: 65752
        | Judge size: 7306


100%|██████████| 257/257 [03:38<00:00,  1.18it/s]
100%|██████████| 29/29 [00:09<00:00,  3.04it/s]


Epoch: 18 
        | Train Loss: 0.003
        | Train Accuracy: 0.470
        | Val Loss: 0.003
        | Val Accuracy: 0.529
Save model because val loss improve loss 0.003
Starting epoch 19
        | Train size: 65753
        | Judge size: 7305


100%|██████████| 257/257 [03:38<00:00,  1.17it/s]
100%|██████████| 29/29 [00:09<00:00,  3.13it/s]


Epoch: 19 
        | Train Loss: 0.003
        | Train Accuracy: 0.468
        | Val Loss: 0.003
        | Val Accuracy: 0.521
Starting epoch 20
        | Train size: 65753
        | Judge size: 7305


100%|██████████| 257/257 [03:40<00:00,  1.17it/s]
100%|██████████| 29/29 [00:09<00:00,  3.19it/s]


Epoch: 20 
        | Train Loss: 0.003
        | Train Accuracy: 0.465
        | Val Loss: 0.003
        | Val Accuracy: 0.530
Save model because val loss improve loss 0.003
