### データの読み込み

In [94]:
import random
import numpy as np
import pandas as pd
import torch
from torchvision import transforms
from tqdm import tqdm_notebook as tqdm
from PIL import Image
from sklearn.model_selection import train_test_split
import os
from tqdm import tqdm

from src.datprep_i import DatPreprocess
from src.datasets import ThingsMEGDataset_aug1
#from src.models2 import BasicConvClassifier  # with glu
from src.utils import set_seed, set_lr, CosineScheduler

### 自己教師あり学習の実装

In [95]:
import math
import torch.nn as nn
import torch.optim as optim
from einops.layers.torch import Rearrange
from einops import rearrange

def fix_seed(seed=1234):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)


fix_seed(seed=42)

Attention

In [96]:
class Attention(nn.Module):
    def __init__(self, dim, heads, dim_head, dropout=0.):
        """
        Arguments
        ---------
        dim : int
            入力データの次元数．埋め込み次元数と一致する．
        heads : int
            ヘッドの数．
        dim_head : int
            各ヘッドのデータの次元数．
        dropout : float
            Dropoutの確率(default=0.)．
        """
        super().__init__()

        self.dim = dim
        self.dim_head = dim_head
        inner_dim = dim_head * heads  # ヘッドに分割する前のQ, K, Vの次元数．self.dimと異なっても良い．
        project_out = not (heads == 1 and dim_head == dim)  # headsが1，dim_headがdimと等しければ通常のSelf-Attention

        self.heads = heads
        self.scale = math.sqrt(dim_head)  # ソフトマックス関数を適用する前のスケーリング係数(dim_k)

        self.attend = nn.Softmax(dim=-1)  # アテンションスコアの算出に利用するソフトマックス関数
        self.dropout = nn.Dropout(dropout)

        # Q, K, Vに変換するための全結合層
        self.to_q = nn.Linear(in_features=dim, out_features=inner_dim)
        self.to_k = nn.Linear(in_features=dim, out_features=inner_dim)
        self.to_v = nn.Linear(in_features=dim, out_features=inner_dim)

        # dim != inner_dimなら線形層を入れる，そうでなければそのまま出力
        self.to_out = nn.Sequential(
            nn.Linear(in_features=inner_dim, out_features=dim),
            nn.Dropout(dropout),
        ) if project_out else nn.Identity()

    def forward(self, x):
        """
        B: バッチサイズ
        N: 系列長
        D: データの次元数(dim)
        """
        B, N, D = x.size()

        # 入力データをQ, K, Vに変換する
        # (B, N, dim) -> (B, N, inner_dim)
        q = self.to_q(x)
        k = self.to_k(x)
        v = self.to_v(x)

        # Q, K, Vをヘッドに分割する
        # (B, N, inner_dim) -> (B, heads, N, dim_head)
        q = rearrange(q, "b n (h d) -> b h n d", h=self.heads, d=self.dim_head)
        k = rearrange(k, "b n (h d) -> b h n d", h=self.heads, d=self.dim_head)
        v = rearrange(v, "b n (h d) -> b h n d", h=self.heads, d=self.dim_head)

        # QK^T / sqrt(d_k)を計算する
        # (B, heads, N, dim_head) x (B, heads, dim_head, N) -> (B, heads, N, N)
        dots = torch.matmul(q, k.transpose(-2, -1)) / self.scale

        # ソフトマックス関数でスコアを算出し，Dropoutをする
        attn = self.attend(dots)
        attn = self.dropout(attn)

        # softmax(QK^T / sqrt(d_k))Vを計算する
        # (B, heads, N, N) x (B, heads, N, dim_head) -> (B, heads, N, dim_head)
        out = torch.matmul(attn ,v)

        # もとの形に戻す
        # (B, heads, N, dim_head) -> (B, N, dim)
        out = rearrange(out, "b h n d -> b n (h d)", h=self.heads, d=self.dim_head)

        # 次元が違っていればもとに戻して出力
        # 表現の可視化のためにattention mapも返すようにしておく
        return self.to_out(out), attn

Feed-forward network
=multi layer perceptron

In [97]:
class FFN(nn.Module):
    def __init__(self, dim, hidden_dim, dropout=0.):
        """
        Arguments
        ---------
        dim : int
            入力データの次元数．
        hidden_dim : int
            隠れ層の次元．
        dropout : float
            各全結合層の後のDropoutの確率(default=0.)．
        """
        super().__init__()

        self.net = nn.Sequential(
            nn.Linear(in_features=dim, out_features=hidden_dim),
            nn.GELU(), # Gaussian Error Linear Unit: ReLUに似た形状だがx=0で微分可能
            nn.Dropout(dropout),
            nn.Linear(in_features=hidden_dim, out_features=dim),
            nn.Dropout(dropout),
        )

    def forward(self, x):
        """
        (B, D) -> (B, D)
        B: バッチサイズ
        D: 次元数
        """
        return self.net(x)

Transformer block

In [98]:
class Block(nn.Module):
    def __init__(self, dim, heads, dim_head, mlp_dim, dropout):
        """
        TransformerのEncoder Blockの実装．

        Arguments
        ---------
        dim : int
            埋め込みされた次元数．PatchEmbedのembed_dimと同じ値．
        heads : int
            Multi-Head Attentionのヘッドの数．
        dim_head : int
            Multi-Head Attentionの各ヘッドの次元数．
        mlp_dim : int
            Feed-Forward Networkの隠れ層の次元数．
        dropout : float
            Droptou層の確率p．
        """
        super().__init__()

        self.attn_ln = nn.LayerNorm(dim)  # Attention前のLayerNorm
        self.attn = Attention(dim, heads, dim_head, dropout)
        self.ffn_ln = nn.LayerNorm(dim)  # FFN前のLayerNorm
        self.ffn = FFN(dim, mlp_dim, dropout)

    def forward(self, x, return_attn=False):
        """
        x: (B, N, dim)
        B: バッチサイズ
        N: 系列長
        dim: 埋め込み次元
        """
        y, attn = self.attn(self.attn_ln(x))
        if return_attn:  # attention mapを返す（attention mapの可視化に利用）
            return attn
        x = y + x
        out = self.ffn(self.ffn_ln(x)) + x

        return out

Patch embedding

In [99]:
class PatchEmbedding(nn.Module):
    def __init__(self, image_size, patch_size, in_channels, embed_dim):
        """
        入力画像をパッチごとに埋め込むための層．

        Arguments
        ---------
        image_size : Tuple[int]
            入力画像のサイズ．
        patch_size : Tuple[int]
            各パッチのサイズ．
        in_channels : int
            入力画像のチャネル数．
        embed_dim : int
            埋め込み後の次元数．
        """
        super().__init__()

        image_height, image_width = image_size
        patch_height, patch_width = patch_size

        assert image_height % patch_height == 0 and image_width % patch_width == 0, "パッチサイズは，入力画像のサイズを割り切れる必要があります．"

        num_patches = (image_height // patch_height) * (image_width // patch_width)  # パッチの数
        patch_dim = in_channels * patch_height * patch_width  # 各パッチを平坦化したときの次元数

        self.to_patch_embedding = nn.Sequential(
            Rearrange("b c (h p1) (w p2) -> b (h w) (p1 p2 c)", p1=patch_height, p2=patch_width),  # 画像をパッチに分割して平坦化
            nn.Linear(in_features=patch_dim, out_features=embed_dim),  # 埋め込みを行う
        )

    def forward(self, x):
        """
        B: バッチサイズ
        C: 入力画像のチャネル数
        H: 入力画像の高さ
        W: 入力画像の幅
        """
        return self.to_patch_embedding(x)  # (B, C, H, W) -> (B, num_patches, embed_dim)


Masked autoencoder

In [100]:
def random_indexes(size):
    """
    パッチをランダムに並べ替えるためのindexを生成する関数．

    Argument
    --------
    size : int
        入力されるパッチの数（系列長Nと同じ値）．
    """
    forward_indexes = np.arange(size)  # 0からsizeまでを並べた配列を作成
    np.random.shuffle(forward_indexes)  # 生成した配列をシャッフルすることで，パッチの順番をランダムに決定
    backward_indexes = np.argsort(forward_indexes)  # 並べ替えたパッチをもとの順番に戻すためのidx

    return forward_indexes, backward_indexes

In [101]:
def take_indexes(sequences, indexes):
    """
    パッチを並べ替えるための関数．

    Argument
    --------
    sequences : torch.Tensor
        入力画像をパッチ分割したデータ．(B, N, dim)の形状をしている．
    indexes : np.ndarray
        並べ替えるために利用するindex．
        random_indexesで生成したforward_indexesかbackward_indexesが入ることが想定されている．
    """
    # torch.gather: dim=1の時、indexで指定した位置の入力値を取ってきて並び替える
    return torch.gather(sequences, dim=1, index=indexes.unsqueeze(2).repeat(1, 1, sequences.shape[-1]))

In [102]:
class PatchShuffle(nn.Module):
    def __init__(self, ratio):
        # ratio: Encoderに入力しないパッチの割合
        super().__init__()
        self.ratio = ratio

    def forward(self, patches):
        """
        B: バッチサイズ
        N: 系列長（＝パッチの数）
        dim: 次元数（＝埋め込みの次元数）
        """
        B, N, dim = patches.shape
        remain_N = int(N * (1 - self.ratio))  # Encoderに入力するパッチの数

        indexes = [random_indexes(N) for _ in range(B)]  # バッチごとに異なる順番のindexを作る
        forward_indexes = torch.as_tensor(np.stack([i[0] for i in indexes], axis=-1), dtype=torch.long).T.to(patches.device)  # バッチを並べ替えるときのidx (B, N)
        backward_indexes = torch.as_tensor(np.stack([i[1] for i in indexes], axis=-1), dtype=torch.long).T.to(patches.device)  # 並べ替えたパッチをもとの順番に戻すためのidx  (B, N)

        patches = take_indexes(patches, forward_indexes)  # パッチを並べ替える
        patches = patches[:, :remain_N, :]  # Encoderに入力するパッチを抽出

        return patches, forward_indexes, backward_indexes

In [103]:
class MAE_Encoder(nn.Module):
    def __init__(self, image_size=[290, 290], patch_size=[29, 29], emb_dim=192, num_layer=12,
                 heads=3, dim_head=64, mlp_dim=192, mask_ratio=0.75, dropout=0.):
        """
        Arguments
        ---------

        image_size : List[int]
            入力画像の大きさ．
        patch_size : List[int]
            各パッチの大きさ．
        emb_dim : int
            データを埋め込む次元の数．
        num_layer : int
            Encoderに含まれるBlockの数．
        heads : int
            Multi-Head Attentionのヘッドの数．
        dim_head : int
            Multi-Head Attentionの各ヘッドの次元数．
        mlp_dim : int
            Feed-Forward Networkの隠れ層の次元数．
        mask_ratio : float
            入力パッチのマスクする割合．
        dropout : float
            ドロップアウトの確率．
        """
        super().__init__()
        img_height, img_width = image_size
        patch_height, patch_width = patch_size
        num_patches = (img_height // patch_height) * (img_width // patch_width)

        self.cls_token = torch.nn.Parameter(torch.randn(1, 1, emb_dim))  # class tokenの初期化
        self.pos_embedding = torch.nn.Parameter(torch.randn(1, num_patches, emb_dim))  # positional embedding（学習可能にしている）
        self.shuffle = PatchShuffle(mask_ratio)

        # 入力画像をパッチに分割する
        #self.patchify = PatchEmbedding(image_size, patch_size, 3, emb_dim)
        self.patchify = PatchEmbedding(image_size, patch_size, 1, emb_dim)  # changed

        # Encoder（Blockを重ねる）
        self.transformer = torch.nn.Sequential(*[Block(emb_dim, heads, dim_head, mlp_dim, dropout) for _ in range(num_layer)])

        self.layer_norm = nn.LayerNorm(emb_dim)

        self.init_weight()

    def init_weight(self):
        torch.nn.init.normal_(self.cls_token, std=0.02)
        torch.nn.init.normal_(self.pos_embedding, std=0.02)

    def forward(self, img):
        # 1. 入力画像をパッチに分割して，positional embeddingする
        patches = self.patchify(img)
        patches = patches + self.pos_embedding

        # 2. 分割したパッチをランダムに並べ替えて，必要なパッチのみ得る
        patches, forward_indexes, backward_indexes = self.shuffle(patches)

        # class tokenを結合
        patches = torch.cat([self.cls_token.repeat(patches.shape[0], 1, 1), patches], dim=1)

        # 3. Encoderで入力データを処理する
        features = self.layer_norm(self.transformer(patches))

        return features, backward_indexes

In [104]:
class MAE_Decoder(nn.Module):
    def __init__(self, image_size=[290, 290], patch_size=[29, 29], emb_dim=192, num_layer=4,
                 heads=3, dim_head=64, mlp_dim=192, dropout=0.):
        """
        Arguments
        ---------

        image_size : List[int]
            入力画像の大きさ．
        patch_size : List[int]
            各パッチの大きさ．
        emb_dim : int
            データを埋め込む次元の数．
        num_layer : int
            Decoderに含まれるBlockの数．
        heads : int
            Multi-Head Attentionのヘッドの数．
        dim_head : int
            Multi-Head Attentionの各ヘッドの次元数．
        mlp_dim : int
            Feed-Forward Networkの隠れ層の次元数．
        dropout : float
            ドロップアウトの確率．
        """
        super().__init__()
        img_height, img_width = image_size
        patch_height, patch_width = patch_size
        num_patches = (img_height // patch_height) * (img_width // patch_width)

        self.mask_token = torch.nn.Parameter(torch.rand(1, 1, emb_dim))
        self.pos_embedding = torch.nn.Parameter(torch.rand(1, num_patches+1, emb_dim))

        # Decoder(Blockを重ねる）
        self.transformer = torch.nn.Sequential(*[Block(emb_dim, heads, dim_head, mlp_dim, dropout) for _ in range(num_layer)])

        # 埋め込みされた表現から画像を復元するためのhead
        #self.head = torch.nn.Linear(emb_dim, 3 * patch_height * patch_width)
        self.head = torch.nn.Linear(emb_dim, 1 * patch_height * patch_width)  # changed
        # (B, N, dim)から(B, C, H, W)にreshapeするためのインスタンス
        self.patch2img = Rearrange("b (h w) (c p1 p2) -> b c (h p1) (w p2)", p1=patch_height, p2=patch_width, h=img_height // patch_height)

        self.init_weight()

    def init_weight(self):
        torch.nn.init.normal_(self.mask_token, std=0.02)
        torch.nn.init.normal_(self.pos_embedding, std=0.02)

    def forward(self, features, backward_indexes):
        # 系列長
        T = features.shape[1]

        # class tokenがある分backward_indexesの最初に0を追加する
        # .toはデバイスの変更でよく利用するが，tensorを渡すことでdtypeを変えることができる
        backward_indexes = torch.cat([torch.zeros(backward_indexes.shape[0], 1).to(backward_indexes), backward_indexes+1], dim=1)

        # 1. mask_tokenを結合して並べ替える．
        # (B, N*(1-mask_ratio)+1, dim) -> (B, N+1, dim)
        features = torch.cat([features, self.mask_token.repeat(features.shape[0], backward_indexes.shape[1] - features.shape[1], 1)], dim=1)
        features = take_indexes(features, backward_indexes)
        features = features + self.pos_embedding

        features = self.transformer(features)

        # class tokenを除去する
        # (B, N+1, dim) -> (B, N, dim)
        features = features[:, 1:, :]

        # 2. 画像を再構成する．
        # (B, N, dim) -> (B, N, 3 * patch_height * patch_width)
        patches = self.head(features)

        # MAEではマスクした部分でのみ損失関数を計算するため，maskも一緒に返す
        mask = torch.zeros_like(patches)
        mask[:, T-1:] = 1  # cls tokenを含めていた分ずらしている
        mask = take_indexes(mask, backward_indexes[:, 1:] - 1)

        img = self.patch2img(patches)
        mask = self.patch2img(mask)

        return img, mask

In [105]:
class MAE_ViT(nn.Module):
    def __init__(self, image_size=[490, 490], patch_size=[49, 49], emb_dim=192,
                 enc_layers=12, enc_heads=3, enc_dim_head=64, enc_mlp_dim=768,
                 dec_layers=4, dec_heads=3, dec_dim_head=64, dec_mlp_dim=768,
                 mask_ratio=0.75, dropout=0.):
        """
        Arguments
        ---------
        image_size : List[int]
            入力画像の大きさ．
        patch_size : List[int]
            各パッチの大きさ．
        emb_dim : int
            データを埋め込む次元の数．
        {enc/dec}_layers : int
            Encoder / Decoderに含まれるBlockの数．
        {enc/dec}_heads : int
            Encoder / DecoderのMulti-Head Attentionのヘッドの数．
        {enc/dec}_dim_head : int
            Encoder / DecoderのMulti-Head Attentionの各ヘッドの次元数．
        {enc/dec}_mlp_dim : int
            Encoder / DecoderのFeed-Forward Networkの隠れ層の次元数．
        mask_ratio : float
            入力パッチのマスクする割合．
        dropout : float
            ドロップアウトの確率．
        """
        super().__init__()

        self.encoder = MAE_Encoder(image_size, patch_size, emb_dim, enc_layers,
                                   enc_heads, enc_dim_head, enc_mlp_dim, mask_ratio, dropout)
        self.decoder = MAE_Decoder(image_size, patch_size, emb_dim, dec_layers,
                                   dec_heads, dec_dim_head, dec_mlp_dim, dropout)

    def forward(self, img):
        features, backward_indexes = self.encoder(img)
        rec_img, mask = self.decoder(features, backward_indexes)
        return rec_img, mask

    def get_last_selfattention(self, x):
        patches = self.encoder.patchify(x)
        patches = patches + self.encoder.pos_embedding

        patches = torch.cat([self.encoder.cls_token.repeat(patches.shape[0], 1, 1), patches], dim=1)  # class tokenを結合
        for i, block in enumerate(self.encoder.transformer):
            if i < len(self.encoder.transformer) - 1:
                patches = block(patches)
            else:
                return block(patches, return_attn=True)

学習率スケジューラ

In [106]:
# cosine scheduler
class CosineScheduler:
    def __init__(self, epochs, lr, warmup_length=5):
        """
        Arguments
        ---------
        epochs : int
            学習のエポック数．
        lr : float
            学習率．
        warmup_length : int
            warmupを適用するエポック数．
        """
        self.epochs = epochs
        self.lr = lr
        self.warmup = warmup_length

    def __call__(self, epoch):
        """
        Arguments
        ---------
        epoch : int
            現在のエポック数．
        """
        progress = (epoch - self.warmup) / (self.epochs - self.warmup)
        progress = np.clip(progress, 0.0, 1.0)
        lr = self.lr * 0.5 * (1. + np.cos(np.pi * progress))

        if self.warmup:
            lr = lr * min(1., (epoch+1) / self.warmup)

        return lr

学習率変更関数

In [107]:
def set_lr(lr, optimizer):
    for param_group in optimizer.param_groups:
        param_group["lr"] = lr

### Linear probing

In [108]:
aug='_baseline_resize'
transform_train=DatPreprocess(aug_sel=aug)

# ------------------
#    Dataloader
# ------------------
loader_args = {"batch_size": 64, "num_workers": 4, "pin_memory": True}

train_set = ThingsMEGDataset_aug1("train", 'data', transform=transform_train)
val_set = ThingsMEGDataset_aug1("val", 'data', transform=transform_train)
test_set = ThingsMEGDataset_aug1("test", 'data', transform=transform_train)

train_loader = torch.utils.data.DataLoader(train_set, shuffle=True, **loader_args)
val_loader = torch.utils.data.DataLoader(val_set, shuffle=False, **loader_args)
test_loader = torch.utils.data.DataLoader(test_set, shuffle=False, **loader_args)


クラス分類器

In [169]:
class Classifier(nn.Module):
    def __init__(self, encoder: MAE_Encoder, num_classes=1854):
        super().__init__()
        self.cls_token = encoder.cls_token
        self.pos_embedding = encoder.pos_embedding
        self.patchify = encoder.patchify
        self.transformer = encoder.transformer
        self.layer_norm = encoder.layer_norm
        self.head = nn.Linear(self.pos_embedding.shape[-1], num_classes)

    def forward(self, img):
        patches = self.patchify(img)
        patches = patches + self.pos_embedding  # positional embedding

        patches = torch.cat([self.cls_token.repeat(patches.shape[0], 1, 1), patches], dim=1)  # class tokenを結合
        features = self.layer_norm(self.transformer(patches))
        logits = self.head(features[:, 0])  # cls tokenのみを入力する
        return logits

    def get_last_selfattention(self, x):
        patches = self.patchify(x)
        patches = patches + self.pos_embedding

        patches = torch.cat([self.cls_token.repeat(patches.shape[0], 1, 1), patches], dim=1)  # class tokenを結合
        for i, block in enumerate(self.transformer):
            if i < len(self.transformer) - 1:
                patches = block(patches)
            else:
                return block(patches, return_attn=True)

In [170]:
# ハイパーパラメータの設定
config = {
    "image_size": [290, 290],
    "patch_size": [29, 29],
    "emb_dim": 128,
    "enc_layers": 12,
    "enc_heads": 4,
    "enc_dim_head": 128,
    "enc_mlp_dim": 128,
    "dec_layers": 4,
    "dec_heads": 4,
    "dec_dim_head": 64,
    "dec_mlp_dim": 64,
    "mask_ratio": 0.75,
    "dropout": 0.
}

In [171]:
model_path_meg='./model/pretrain_meg_01.pth'
#model_path_img='./model/pretrain_01.pth'

In [172]:
device = "cuda" if torch.cuda.is_available() else "cpu"
pretrained_model = MAE_ViT(**config).to(device)
pretrained_model.load_state_dict(torch.load(model_path_meg, map_location=device))


encoder = pretrained_model.encoder

# モデルの定義
model = Classifier(encoder).to(device)

epochs = 50
lr = 0.0005
warmup_length = int(epochs*0.05)
#batch_size = 128
optimizer = optim.AdamW(model.head.parameters(), lr=lr, betas=(0.9, 0.999), weight_decay=0.05)  # 分類器部分のみ学習
scheduler = CosineScheduler(epochs, lr, warmup_length)
criterion = nn.CrossEntropyLoss()

分類器の学習

In [173]:
class_path_best="./model/classifier_ens_best_01.pth"
class_path_last="./model/classifier_ens_last_01.pth"

In [174]:
from torchmetrics import Accuracy
from termcolor import cprint

max_val_acc = 0
accuracy = Accuracy(
    task="multiclass", num_classes=train_set.num_classes, top_k=10
).to(device)

for epoch in range(epochs):
    new_lr = scheduler(epoch)
    set_lr(new_lr, optimizer)

    train_loss, train_acc, val_loss, val_acc = [], [], [], []

    scaler = torch.cuda.amp.GradScaler()  # added
    for x, t in tqdm(train_loader, desc="Train"):
        x, t = x.to(device), t.to(device)
        with torch.cuda.amp.autocast():  # added
            pred = model(x)
            loss = criterion(pred, t)

        train_loss.append(loss.item())

        optimizer.zero_grad()
        #train_loss.backward()
        scaler.scale(loss).backward()

        #optimizer.step()
        scaler.step(optimizer)

        acc = accuracy(pred, t)
        train_acc.append(acc.item())

        scaler.update()

    with torch.no_grad():
        for x, t in val_loader:
            x, t = x.to(device), t.to(device)
            pred = model(x)

            val_loss.append(criterion(pred, t).item())
            val_acc.append(accuracy(pred, t).item())

    print(f"Epoch {epoch+1}/{epochs} | train loss: {np.mean(train_loss):.3f} | train acc: {np.mean(train_acc):.3f} | val loss: {np.mean(val_loss):.3f} | val acc: {np.mean(val_acc):.3f}")
    torch.save(model.state_dict(), class_path_last)

    if np.mean(val_acc) > max_val_acc:
        cprint("New best.", "cyan")
        torch.save(model.state_dict(), class_path_best)
        max_val_acc = np.mean(val_acc)

torch.save(model.state_dict(), class_path_last)

Train: 100%|██████████| 1027/1027 [01:37<00:00, 10.55it/s]


Epoch 1/50 | train loss: 7.528 | train acc: 0.008 | val loss: 7.510 | val acc: 0.010
[36mNew best.[0m


Train: 100%|██████████| 1027/1027 [01:43<00:00,  9.97it/s]


Epoch 2/50 | train loss: 7.489 | train acc: 0.015 | val loss: 7.487 | val acc: 0.014
[36mNew best.[0m


Train: 100%|██████████| 1027/1027 [01:36<00:00, 10.61it/s]


Epoch 3/50 | train loss: 7.437 | train acc: 0.026 | val loss: 7.476 | val acc: 0.018
[36mNew best.[0m


Train: 100%|██████████| 1027/1027 [01:31<00:00, 11.26it/s]


Epoch 4/50 | train loss: 7.393 | train acc: 0.037 | val loss: 7.469 | val acc: 0.021
[36mNew best.[0m


Train: 100%|██████████| 1027/1027 [01:31<00:00, 11.18it/s]


Epoch 5/50 | train loss: 7.354 | train acc: 0.044 | val loss: 7.464 | val acc: 0.021
[36mNew best.[0m


Train: 100%|██████████| 1027/1027 [01:34<00:00, 10.88it/s]


Epoch 6/50 | train loss: 7.317 | train acc: 0.050 | val loss: 7.460 | val acc: 0.023
[36mNew best.[0m


Train: 100%|██████████| 1027/1027 [01:38<00:00, 10.41it/s]


Epoch 7/50 | train loss: 7.283 | train acc: 0.054 | val loss: 7.456 | val acc: 0.023
[36mNew best.[0m


Train: 100%|██████████| 1027/1027 [01:35<00:00, 10.80it/s]


Epoch 8/50 | train loss: 7.251 | train acc: 0.057 | val loss: 7.453 | val acc: 0.023


Train: 100%|██████████| 1027/1027 [01:33<00:00, 10.97it/s]


Epoch 9/50 | train loss: 7.221 | train acc: 0.060 | val loss: 7.451 | val acc: 0.023


Train: 100%|██████████| 1027/1027 [01:31<00:00, 11.23it/s]


Epoch 10/50 | train loss: 7.193 | train acc: 0.062 | val loss: 7.450 | val acc: 0.024
[36mNew best.[0m


Train: 100%|██████████| 1027/1027 [01:33<00:00, 11.03it/s]


Epoch 11/50 | train loss: 7.167 | train acc: 0.064 | val loss: 7.449 | val acc: 0.024
[36mNew best.[0m


Train: 100%|██████████| 1027/1027 [01:32<00:00, 11.05it/s]


Epoch 12/50 | train loss: 7.143 | train acc: 0.066 | val loss: 7.448 | val acc: 0.024
[36mNew best.[0m


Train: 100%|██████████| 1027/1027 [01:32<00:00, 11.15it/s]


Epoch 13/50 | train loss: 7.120 | train acc: 0.070 | val loss: 7.448 | val acc: 0.024


Train: 100%|██████████| 1027/1027 [01:32<00:00, 11.15it/s]


Epoch 14/50 | train loss: 7.099 | train acc: 0.072 | val loss: 7.447 | val acc: 0.024


Train: 100%|██████████| 1027/1027 [01:32<00:00, 11.09it/s]


Epoch 15/50 | train loss: 7.079 | train acc: 0.073 | val loss: 7.447 | val acc: 0.024


Train: 100%|██████████| 1027/1027 [01:32<00:00, 11.06it/s]


Epoch 16/50 | train loss: 7.060 | train acc: 0.076 | val loss: 7.448 | val acc: 0.023


Train: 100%|██████████| 1027/1027 [01:30<00:00, 11.29it/s]


Epoch 17/50 | train loss: 7.043 | train acc: 0.079 | val loss: 7.448 | val acc: 0.024


Train: 100%|██████████| 1027/1027 [01:34<00:00, 10.88it/s]


Epoch 18/50 | train loss: 7.027 | train acc: 0.082 | val loss: 7.449 | val acc: 0.023


Train: 100%|██████████| 1027/1027 [01:31<00:00, 11.19it/s]


Epoch 19/50 | train loss: 7.012 | train acc: 0.083 | val loss: 7.449 | val acc: 0.024


Train: 100%|██████████| 1027/1027 [01:31<00:00, 11.24it/s]


Epoch 20/50 | train loss: 6.998 | train acc: 0.086 | val loss: 7.450 | val acc: 0.024


Train: 100%|██████████| 1027/1027 [01:32<00:00, 11.08it/s]


Epoch 21/50 | train loss: 6.985 | train acc: 0.088 | val loss: 7.450 | val acc: 0.024


Train: 100%|██████████| 1027/1027 [01:32<00:00, 11.16it/s]


Epoch 22/50 | train loss: 6.973 | train acc: 0.090 | val loss: 7.451 | val acc: 0.024
[36mNew best.[0m


Train: 100%|██████████| 1027/1027 [01:32<00:00, 11.16it/s]


Epoch 23/50 | train loss: 6.962 | train acc: 0.092 | val loss: 7.452 | val acc: 0.024


Train: 100%|██████████| 1027/1027 [01:32<00:00, 11.11it/s]


Epoch 24/50 | train loss: 6.952 | train acc: 0.094 | val loss: 7.452 | val acc: 0.024


Train: 100%|██████████| 1027/1027 [01:33<00:00, 11.00it/s]


Epoch 25/50 | train loss: 6.942 | train acc: 0.096 | val loss: 7.453 | val acc: 0.023


Train: 100%|██████████| 1027/1027 [01:32<00:00, 11.16it/s]


Epoch 26/50 | train loss: 6.933 | train acc: 0.098 | val loss: 7.454 | val acc: 0.024


Train: 100%|██████████| 1027/1027 [01:32<00:00, 11.05it/s]


Epoch 27/50 | train loss: 6.925 | train acc: 0.100 | val loss: 7.454 | val acc: 0.023


Train: 100%|██████████| 1027/1027 [01:34<00:00, 10.90it/s]


Epoch 28/50 | train loss: 6.917 | train acc: 0.101 | val loss: 7.455 | val acc: 0.023


Train: 100%|██████████| 1027/1027 [01:32<00:00, 11.16it/s]


Epoch 29/50 | train loss: 6.910 | train acc: 0.103 | val loss: 7.456 | val acc: 0.023


Train: 100%|██████████| 1027/1027 [01:32<00:00, 11.12it/s]


Epoch 30/50 | train loss: 6.903 | train acc: 0.105 | val loss: 7.456 | val acc: 0.023


Train: 100%|██████████| 1027/1027 [01:33<00:00, 11.03it/s]


Epoch 31/50 | train loss: 6.897 | train acc: 0.105 | val loss: 7.457 | val acc: 0.023


Train: 100%|██████████| 1027/1027 [01:32<00:00, 11.12it/s]


Epoch 32/50 | train loss: 6.892 | train acc: 0.107 | val loss: 7.457 | val acc: 0.023


Train: 100%|██████████| 1027/1027 [01:32<00:00, 11.07it/s]


Epoch 33/50 | train loss: 6.887 | train acc: 0.108 | val loss: 7.457 | val acc: 0.023


Train: 100%|██████████| 1027/1027 [01:33<00:00, 10.96it/s]


Epoch 34/50 | train loss: 6.882 | train acc: 0.109 | val loss: 7.458 | val acc: 0.023


Train: 100%|██████████| 1027/1027 [01:36<00:00, 10.64it/s]


Epoch 35/50 | train loss: 6.878 | train acc: 0.110 | val loss: 7.458 | val acc: 0.023


Train: 100%|██████████| 1027/1027 [02:14<00:00,  7.66it/s]


Epoch 36/50 | train loss: 6.875 | train acc: 0.111 | val loss: 7.458 | val acc: 0.023


Train: 100%|██████████| 1027/1027 [01:34<00:00, 10.83it/s]


Epoch 37/50 | train loss: 6.871 | train acc: 0.112 | val loss: 7.459 | val acc: 0.023


Train: 100%|██████████| 1027/1027 [01:32<00:00, 11.13it/s]


Epoch 38/50 | train loss: 6.868 | train acc: 0.113 | val loss: 7.459 | val acc: 0.023


Train: 100%|██████████| 1027/1027 [01:34<00:00, 10.92it/s]


Epoch 39/50 | train loss: 6.866 | train acc: 0.113 | val loss: 7.459 | val acc: 0.023


Train: 100%|██████████| 1027/1027 [01:35<00:00, 10.78it/s]


Epoch 40/50 | train loss: 6.863 | train acc: 0.114 | val loss: 7.459 | val acc: 0.023


Train: 100%|██████████| 1027/1027 [01:33<00:00, 11.00it/s]


Epoch 41/50 | train loss: 6.861 | train acc: 0.114 | val loss: 7.460 | val acc: 0.023


Train: 100%|██████████| 1027/1027 [01:31<00:00, 11.25it/s]


Epoch 42/50 | train loss: 6.860 | train acc: 0.114 | val loss: 7.460 | val acc: 0.023


Train: 100%|██████████| 1027/1027 [01:32<00:00, 11.05it/s]


Epoch 43/50 | train loss: 6.858 | train acc: 0.115 | val loss: 7.460 | val acc: 0.023


Train: 100%|██████████| 1027/1027 [01:32<00:00, 11.14it/s]


Epoch 44/50 | train loss: 6.857 | train acc: 0.115 | val loss: 7.460 | val acc: 0.023


Train: 100%|██████████| 1027/1027 [01:33<00:00, 10.97it/s]


Epoch 45/50 | train loss: 6.856 | train acc: 0.116 | val loss: 7.460 | val acc: 0.023


Train: 100%|██████████| 1027/1027 [01:32<00:00, 11.11it/s]


Epoch 46/50 | train loss: 6.856 | train acc: 0.116 | val loss: 7.460 | val acc: 0.023


Train: 100%|██████████| 1027/1027 [01:33<00:00, 10.93it/s]


Epoch 47/50 | train loss: 6.855 | train acc: 0.116 | val loss: 7.460 | val acc: 0.023


Train: 100%|██████████| 1027/1027 [01:33<00:00, 10.97it/s]


Epoch 48/50 | train loss: 6.855 | train acc: 0.116 | val loss: 7.460 | val acc: 0.023


Train: 100%|██████████| 1027/1027 [01:32<00:00, 11.11it/s]


Epoch 49/50 | train loss: 6.854 | train acc: 0.116 | val loss: 7.460 | val acc: 0.023


Train: 100%|██████████| 1027/1027 [01:33<00:00, 10.94it/s]


Epoch 50/50 | train loss: 6.854 | train acc: 0.116 | val loss: 7.460 | val acc: 0.023


In [175]:
savedir="./outputs/2024-07-17/22-06-00/"

In [176]:
model.load_state_dict(torch.load(class_path_best, map_location=device))

preds = [] 
model.eval()
for X in tqdm(test_loader, desc="Validation"):        
    preds.append(model(X.to(device)).detach().cpu())
    
preds = torch.cat(preds, dim=0).numpy()
np.save(os.path.join(savedir, "submission"+aug), preds)
cprint(f"Submission {preds.shape} saved at {savedir}", "cyan")

Validation: 100%|██████████| 257/257 [00:26<00:00,  9.80it/s]


[36mSubmission (16432, 1854) saved at ./outputs/2024-07-17/22-06-00/[0m
