In [2]:
from datetime import datetime
from pathlib import Path

import albumentations as A
import cv2
import numpy as np
import polars as pl
import pytz
import timm
import torch
from albumentations.pytorch import ToTensorV2
from omegaconf import OmegaConf
from sklearn.model_selection import GroupKFold
from torch import nn
from torch.utils.data import DataLoader, Dataset
from tqdm.auto import tqdm
from transformers import get_cosine_schedule_with_warmup

from src.config import cfg
from src.dir import create_dir
from src.seed import seed_everything

cfg.exp_number = Path().resolve().name
print(OmegaConf.to_yaml(cfg, resolve=True))

seed_everything(cfg.seed)
pl.Config.set_fmt_str_lengths(1000)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


exp_number: '002'
run_time: base
data:
  input_root: ../../data/input
  train_path: ../../data/input/train_features.csv
  test_path: ../../data/input/test_features.csv
  sample_submission_path: ../../data/input/sample_submission.csv
  img_root: ../../data/input/images
  output_root: ../../data/output
  results_root: ../../results
  results_path: ../../results/002/base
seed: 319
n_splits: 5
target_cols:
- x_0
- y_0
- z_0
- x_1
- y_1
- z_1
- x_2
- y_2
- z_2
- x_3
- y_3
- z_3
- x_4
- y_4
- z_4
- x_5
- y_5
- z_5
cnn:
  model_name: tf_efficientnet_b0_ns
  size: 224
  pretrained: true
  in_chans: 9
  target_size: 18
  lr: 0.001
  num_epochs: 20
  batch_size: 64

Using device: cuda


### exp002

- NNモデルを作ってみるnotebook
- とりあえずCNNで特徴抽出 → 全結合層で回帰というシンプルなアーキテクチャを採用

### データの読み込み

In [3]:
# データの読み込み
train = pl.read_csv(cfg.data.train_path, try_parse_dates=True)
test = pl.read_csv(cfg.data.test_path, try_parse_dates=True)
sample_submission = pl.read_csv(cfg.data.sample_submission_path, try_parse_dates=True)

# データの結合(label encoding用)
train_test = pl.concat([train, test], how="diagonal")

# scene列を作成 → これでGroupKFoldする
train = train.with_columns(pl.col("ID").str.split("_").list[0].alias("scene"))
test = test.with_columns(pl.col("ID").str.split("_").list[0].alias("scene"))

# CV
gkf = GroupKFold(n_splits=cfg.n_splits)


In [4]:
train.head()


ID,vEgo,aEgo,steeringAngleDeg,steeringTorque,brake,brakePressed,gas,gasPressed,gearShifter,leftBlinker,rightBlinker,x_0,y_0,z_0,x_1,y_1,z_1,x_2,y_2,z_2,x_3,y_3,z_3,x_4,y_4,z_4,x_5,y_5,z_5,scene
str,f64,f64,f64,f64,f64,bool,f64,bool,str,bool,bool,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,str
"""00066be8e20318869c38c66be466631a_320""",5.701526,1.538456,-2.165777,-139.0,0.0,False,0.25,True,"""drive""",False,False,2.82959,0.032226,0.045187,6.231999,0.065895,0.107974,9.785009,0.124972,0.203649,13.485472,0.163448,0.302818,17.574227,0.174289,0.406331,21.951269,0.199503,0.485079,"""00066be8e20318869c38c66be466631a"""
"""00066be8e20318869c38c66be466631a_420""",11.176292,0.279881,-11.625697,-44.0,0.0,False,0.0,False,"""drive""",False,True,4.970268,-0.007936,0.005028,10.350489,-0.032374,-0.020701,15.770054,0.084073,0.008645,21.132415,0.391343,0.036335,26.316489,0.843124,0.065,31.383814,1.42507,0.073083,"""00066be8e20318869c38c66be466631a"""
"""00066be8e20318869c38c66be466631a_520""",10.472548,0.231099,-2.985105,-132.0,0.0,False,0.18,True,"""drive""",False,False,4.815701,-0.000813,0.017577,10.153522,-0.0278,0.026165,15.446539,-0.155987,0.040397,20.61816,-0.356932,0.058765,25.677387,-0.576985,0.102859,30.460033,-0.841894,0.152889,"""00066be8e20318869c38c66be466631a"""
"""000fb056f97572d384bae4f5fc1e0f28_120""",6.055565,-0.117775,7.632668,173.0,0.0,False,0.0,False,"""drive""",False,False,2.812608,0.033731,0.0059,5.975378,0.137848,0.01621,9.186793,0.322997,0.031626,12.37311,0.603145,0.031858,15.703514,0.960717,0.043479,19.311182,1.374655,0.058754,"""000fb056f97572d384bae4f5fc1e0f28"""
"""000fb056f97572d384bae4f5fc1e0f28_20""",3.316744,1.276733,-31.725477,-114.0,0.0,False,0.255,True,"""drive""",False,False,1.55186,-0.041849,-0.008847,3.675162,-0.125189,-0.013725,6.113567,-0.239161,-0.012887,8.770783,-0.381813,-0.003898,11.619313,-0.554488,0.011393,14.657048,-0.7788,0.044243,"""000fb056f97572d384bae4f5fc1e0f28"""


### データ拡張


In [5]:
def get_train_transform():
    return A.Compose(
        [
            A.Resize(cfg.cnn.size, cfg.cnn.size),
            A.OneOf(
                [
                    A.GaussNoise(var_limit=[10, 50]),
                    A.GaussianBlur(),
                    A.MotionBlur(),
                ],
                p=0.4,
            ),
            A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            ToTensorV2(),
        ]
    )


def get_valid_transform():
    return A.Compose(
        [
            A.Resize(cfg.cnn.size, cfg.cnn.size),
            A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            ToTensorV2(),
        ]
    )


### データセット

In [6]:
class CustomDataset(Dataset):
    def __init__(self, df, img_dir, transform=None, is_train=True):
        self.df = df
        self.img_dir = Path(img_dir)
        self.is_train = is_train

        # デフォルトの変換処理
        if transform is None:
            self.transform = A.Compose(
                [
                    A.Resize(cfg.cnn.size, cfg.cnn.size),
                    A.Normalize(
                        mean=[0.485, 0.456, 0.406],  # 通常のImageNet平均値
                        std=[0.229, 0.224, 0.225],  # 通常のImageNet標準偏差
                    ),
                    ToTensorV2(),
                ]
            )
        else:
            self.transform = transform

        self.target_cols = cfg.target_cols

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df[idx]
        img_folder = self.img_dir / row["ID"].item()

        # 3枚の画像を読み込み、変換を適用
        img_names = ["image_t-1.0.png", "image_t-0.5.png", "image_t.png"]
        transformed_imgs = []

        for img_name in img_names:
            img_path = img_folder / img_name
            img = cv2.imread(str(img_path))
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

            if self.transform:
                transformed = self.transform(image=img)
                transformed_imgs.append(transformed["image"])

        # チャネル方向に結合 (C*3, H, W)
        img_tensor = torch.cat(transformed_imgs, dim=0)

        # ターゲットの準備
        if self.is_train:
            target = torch.tensor(row[self.target_cols].to_numpy(), dtype=torch.float32).squeeze(0)
            return img_tensor, target
        else:
            return img_tensor


### モデル

In [7]:
class CustomModel(nn.Module):
    def __init__(self, cfg, pretrained=False, target_size=None, model_name=None):
        super().__init__()

        self.encoder = timm.create_model(
            cfg.model_name, pretrained=cfg.pretrained, num_classes=0, in_chans=cfg.in_chans
        )

        self.n_features = self.encoder.num_features

        self.target_size = cfg.target_size if target_size is None else target_size

        # nn.Dropout(0.5),
        self.fc = nn.Sequential(nn.Linear(self.n_features, self.target_size))

    def get_embedding(self, image):
        with torch.no_grad():
            feature = self.encoder(image)
        return feature

    def forward(self, image):
        feature = self.encoder(image)
        output = self.fc(feature)
        return output


In [8]:
model = CustomModel(cfg.cnn)


  model = create_fn(


### 学習

In [8]:
DEBUG = False

if DEBUG:
    cfg.cnn.num_epochs = 1
    train = train.head(100)


In [9]:
# 実験結果格納用のディレクトリを作成
japan_tz = pytz.timezone("Asia/Tokyo")
cfg.run_time = datetime.now(japan_tz).strftime("%Y%m%d_%H%M%S")
create_dir(cfg.data.results_path)

# CV用の配列を初期化
oof_predictions = np.zeros((len(train), len(cfg.target_cols)))
models = {}

for fold, (train_idx, valid_idx) in enumerate(gkf.split(train, groups=train["scene"])):
    print(f"Fold {fold + 1}")

    # データセットの作成
    train_dataset = CustomDataset(train[train_idx], cfg.data.img_root, transform=get_train_transform())
    valid_dataset = CustomDataset(train[valid_idx], cfg.data.img_root, transform=get_valid_transform())

    train_loader = DataLoader(train_dataset, batch_size=cfg.cnn.batch_size, shuffle=True)
    valid_loader = DataLoader(valid_dataset, batch_size=cfg.cnn.batch_size, shuffle=False)

    # モデル、損失関数、オプティマイザーの初期化
    model = CustomModel(cfg.cnn).to(device)
    criterion = nn.HuberLoss()
    # criterion = nn.L1Loss()
    optimizer = torch.optim.AdamW(model.parameters(), lr=cfg.cnn.lr)
    total_steps = len(train_loader) * cfg.cnn.num_epochs
    scheduler = get_cosine_schedule_with_warmup(
        optimizer=optimizer,
        num_warmup_steps=total_steps * 0.1,
        num_training_steps=total_steps,
    )

    best_loss = float("inf")

    # 学習ループ
    for epoch in range(cfg.cnn.num_epochs):
        model.train()
        for images, targets in tqdm(train_loader):
            images = images.to(device)
            targets = targets.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            scheduler.step()

        # 検証
        model.eval()
        valid_losses = []
        with torch.no_grad():
            for images, targets in valid_loader:
                images = images.to(device)
                targets = targets.to(device)
                outputs = model(images)
                loss = criterion(outputs, targets)
                valid_losses.append(loss.item())

        valid_loss = np.mean(valid_losses)
        print(f"Epoch {epoch + 1}, Valid Loss: {valid_loss:.4f}")

        # ベストモデルの保存
        if valid_loss < best_loss:
            best_loss = valid_loss
            torch.save(model.state_dict(), f"{cfg.data.results_path}/model_fold{fold}.pth")

    # ベストモデルでOOF予測
    model.load_state_dict(torch.load(f"{cfg.data.results_path}/model_fold{fold}.pth"))
    model.eval()

    valid_dataset = CustomDataset(train[valid_idx], cfg.data.img_root, transform=get_valid_transform())
    valid_loader = DataLoader(valid_dataset, batch_size=cfg.cnn.batch_size, shuffle=False)

    with torch.no_grad():
        for i, (images, _) in enumerate(valid_loader):
            images = images.to(device)
            outputs = model(images)
            start_idx = i * cfg.cnn.batch_size
            end_idx = start_idx + outputs.shape[0]
            oof_predictions[valid_idx[start_idx:end_idx]] = outputs.cpu().numpy()

# CVスコアの計算（MAEの平均）
mae_scores = []
for i in range(len(cfg.target_cols)):
    mae = np.mean(np.abs(oof_predictions[:, i] - train[cfg.target_cols[i]].to_numpy()))
    mae_scores.append(mae)

cv_score = np.mean(mae_scores)
print(f"CV Score: {cv_score:.4f}")

# oofを保存
np.save(f"{cfg.data.results_path}/oof_predictions.npy", oof_predictions)


Directory created: ../../results/002/20241119_220952
Fold 1


  model = create_fn(
100%|██████████| 543/543 [04:07<00:00,  2.19it/s]


Epoch 1, Valid Loss: 0.9335


100%|██████████| 543/543 [03:42<00:00,  2.44it/s]


Epoch 2, Valid Loss: 1.0115


100%|██████████| 543/543 [03:42<00:00,  2.44it/s]


Epoch 3, Valid Loss: 0.7310


100%|██████████| 543/543 [03:41<00:00,  2.45it/s]


Epoch 4, Valid Loss: 0.7233


100%|██████████| 543/543 [03:42<00:00,  2.44it/s]


Epoch 5, Valid Loss: 0.8505


100%|██████████| 543/543 [03:41<00:00,  2.45it/s]


Epoch 6, Valid Loss: 0.6786


100%|██████████| 543/543 [03:43<00:00,  2.43it/s]


Epoch 7, Valid Loss: 0.6326


100%|██████████| 543/543 [03:44<00:00,  2.42it/s]


Epoch 8, Valid Loss: 0.6184


100%|██████████| 543/543 [03:40<00:00,  2.46it/s]


Epoch 9, Valid Loss: 0.6228


100%|██████████| 543/543 [03:46<00:00,  2.40it/s]


Epoch 10, Valid Loss: 0.5927


100%|██████████| 543/543 [03:53<00:00,  2.33it/s]


Epoch 11, Valid Loss: 0.5634


100%|██████████| 543/543 [03:44<00:00,  2.42it/s]


Epoch 12, Valid Loss: 0.5613


100%|██████████| 543/543 [03:44<00:00,  2.42it/s]


Epoch 13, Valid Loss: 0.5627


100%|██████████| 543/543 [03:39<00:00,  2.47it/s]


Epoch 14, Valid Loss: 0.5551


100%|██████████| 543/543 [03:40<00:00,  2.46it/s]


Epoch 15, Valid Loss: 0.5464


100%|██████████| 543/543 [03:39<00:00,  2.47it/s]


Epoch 16, Valid Loss: 0.5414


100%|██████████| 543/543 [03:37<00:00,  2.50it/s]


Epoch 17, Valid Loss: 0.5378


100%|██████████| 543/543 [03:36<00:00,  2.51it/s]


Epoch 18, Valid Loss: 0.5386


100%|██████████| 543/543 [03:36<00:00,  2.50it/s]


Epoch 19, Valid Loss: 0.5372


100%|██████████| 543/543 [03:37<00:00,  2.50it/s]


Epoch 20, Valid Loss: 0.5370


  model.load_state_dict(torch.load(f"{cfg.data.results_path}/model_fold{fold}.pth"))


Fold 2


100%|██████████| 543/543 [03:39<00:00,  2.48it/s]


Epoch 1, Valid Loss: 0.9964


100%|██████████| 543/543 [03:40<00:00,  2.47it/s]


Epoch 2, Valid Loss: 0.8410


100%|██████████| 543/543 [03:37<00:00,  2.50it/s]


Epoch 3, Valid Loss: 0.8140


100%|██████████| 543/543 [03:38<00:00,  2.48it/s]


Epoch 4, Valid Loss: 0.7425


100%|██████████| 543/543 [03:38<00:00,  2.49it/s]


Epoch 5, Valid Loss: 0.9016


100%|██████████| 543/543 [03:40<00:00,  2.46it/s]


Epoch 6, Valid Loss: 0.6894


100%|██████████| 543/543 [03:41<00:00,  2.45it/s]


Epoch 7, Valid Loss: 0.6780


100%|██████████| 543/543 [03:40<00:00,  2.46it/s]


Epoch 8, Valid Loss: 0.6301


100%|██████████| 543/543 [03:40<00:00,  2.46it/s]


Epoch 9, Valid Loss: 0.6096


100%|██████████| 543/543 [03:41<00:00,  2.46it/s]


Epoch 10, Valid Loss: 0.5877


100%|██████████| 543/543 [03:40<00:00,  2.46it/s]


Epoch 11, Valid Loss: 0.5868


100%|██████████| 543/543 [03:40<00:00,  2.47it/s]


Epoch 12, Valid Loss: 0.5659


100%|██████████| 543/543 [03:37<00:00,  2.50it/s]


Epoch 13, Valid Loss: 0.5656


100%|██████████| 543/543 [03:41<00:00,  2.45it/s]


Epoch 14, Valid Loss: 0.5594


100%|██████████| 543/543 [03:41<00:00,  2.45it/s]


Epoch 15, Valid Loss: 0.5525


100%|██████████| 543/543 [03:41<00:00,  2.46it/s]


Epoch 16, Valid Loss: 0.5539


100%|██████████| 543/543 [03:42<00:00,  2.44it/s]


Epoch 17, Valid Loss: 0.5522


100%|██████████| 543/543 [03:41<00:00,  2.45it/s]


Epoch 18, Valid Loss: 0.5504


100%|██████████| 543/543 [03:40<00:00,  2.46it/s]


Epoch 19, Valid Loss: 0.5513


100%|██████████| 543/543 [03:41<00:00,  2.45it/s]


Epoch 20, Valid Loss: 0.5490
Fold 3


100%|██████████| 543/543 [03:41<00:00,  2.45it/s]


Epoch 1, Valid Loss: 0.8847


100%|██████████| 543/543 [03:42<00:00,  2.44it/s]


Epoch 2, Valid Loss: 0.9658


100%|██████████| 543/543 [03:39<00:00,  2.47it/s]


Epoch 3, Valid Loss: 0.7464


100%|██████████| 543/543 [03:41<00:00,  2.46it/s]


Epoch 4, Valid Loss: 0.7115


100%|██████████| 543/543 [03:41<00:00,  2.45it/s]


Epoch 5, Valid Loss: 0.6969


100%|██████████| 543/543 [03:42<00:00,  2.45it/s]


Epoch 6, Valid Loss: 0.7258


100%|██████████| 543/543 [03:41<00:00,  2.45it/s]


Epoch 7, Valid Loss: 0.6694


100%|██████████| 543/543 [03:40<00:00,  2.46it/s]


Epoch 8, Valid Loss: 0.6075


100%|██████████| 543/543 [03:40<00:00,  2.47it/s]


Epoch 9, Valid Loss: 0.6188


100%|██████████| 543/543 [03:40<00:00,  2.46it/s]


Epoch 10, Valid Loss: 0.6349


100%|██████████| 543/543 [03:42<00:00,  2.44it/s]


Epoch 11, Valid Loss: 0.5814


100%|██████████| 543/543 [03:40<00:00,  2.47it/s]


Epoch 12, Valid Loss: 0.5821


100%|██████████| 543/543 [03:43<00:00,  2.43it/s]


Epoch 13, Valid Loss: 0.5526


100%|██████████| 543/543 [03:41<00:00,  2.45it/s]


Epoch 14, Valid Loss: 0.5521


100%|██████████| 543/543 [03:41<00:00,  2.45it/s]


Epoch 15, Valid Loss: 0.5476


100%|██████████| 543/543 [03:40<00:00,  2.46it/s]


Epoch 16, Valid Loss: 0.5441


100%|██████████| 543/543 [03:41<00:00,  2.45it/s]


Epoch 17, Valid Loss: 0.5468


100%|██████████| 543/543 [03:39<00:00,  2.47it/s]


Epoch 18, Valid Loss: 0.5438


100%|██████████| 543/543 [03:41<00:00,  2.46it/s]


Epoch 19, Valid Loss: 0.5433


100%|██████████| 543/543 [03:40<00:00,  2.46it/s]


Epoch 20, Valid Loss: 0.5422
Fold 4


100%|██████████| 543/543 [03:38<00:00,  2.48it/s]


Epoch 1, Valid Loss: 1.0557


100%|██████████| 543/543 [03:38<00:00,  2.49it/s]


Epoch 2, Valid Loss: 0.7965


100%|██████████| 543/543 [03:39<00:00,  2.47it/s]


Epoch 3, Valid Loss: 0.7717


100%|██████████| 543/543 [03:39<00:00,  2.47it/s]


Epoch 4, Valid Loss: 0.7169


100%|██████████| 543/543 [03:38<00:00,  2.48it/s]


Epoch 5, Valid Loss: 0.7616


100%|██████████| 543/543 [03:40<00:00,  2.46it/s]


Epoch 6, Valid Loss: 0.6428


100%|██████████| 543/543 [03:39<00:00,  2.48it/s]


Epoch 7, Valid Loss: 0.6410


100%|██████████| 543/543 [03:40<00:00,  2.46it/s]


Epoch 8, Valid Loss: 0.8363


100%|██████████| 543/543 [03:39<00:00,  2.48it/s]


Epoch 9, Valid Loss: 0.6354


100%|██████████| 543/543 [03:39<00:00,  2.47it/s]


Epoch 10, Valid Loss: 0.5758


100%|██████████| 543/543 [03:39<00:00,  2.47it/s]


Epoch 11, Valid Loss: 0.5855


100%|██████████| 543/543 [03:38<00:00,  2.49it/s]


Epoch 12, Valid Loss: 0.5695


100%|██████████| 543/543 [03:39<00:00,  2.48it/s]


Epoch 13, Valid Loss: 0.5663


100%|██████████| 543/543 [03:40<00:00,  2.46it/s]


Epoch 14, Valid Loss: 0.5532


100%|██████████| 543/543 [03:42<00:00,  2.44it/s]


Epoch 15, Valid Loss: 0.5526


100%|██████████| 543/543 [03:40<00:00,  2.46it/s]


Epoch 16, Valid Loss: 0.5470


100%|██████████| 543/543 [03:40<00:00,  2.47it/s]


Epoch 17, Valid Loss: 0.5465


100%|██████████| 543/543 [03:40<00:00,  2.47it/s]


Epoch 18, Valid Loss: 0.5453


100%|██████████| 543/543 [03:39<00:00,  2.48it/s]


Epoch 19, Valid Loss: 0.5470


100%|██████████| 543/543 [03:39<00:00,  2.48it/s]


Epoch 20, Valid Loss: 0.5464
Fold 5


100%|██████████| 543/543 [03:37<00:00,  2.50it/s]


Epoch 1, Valid Loss: 1.0397


100%|██████████| 543/543 [03:34<00:00,  2.53it/s]


Epoch 2, Valid Loss: 0.9606


100%|██████████| 543/543 [03:35<00:00,  2.52it/s]


Epoch 3, Valid Loss: 0.7289


100%|██████████| 543/543 [03:35<00:00,  2.52it/s]


Epoch 4, Valid Loss: 0.7697


100%|██████████| 543/543 [03:39<00:00,  2.48it/s]


Epoch 5, Valid Loss: 0.6824


100%|██████████| 543/543 [03:39<00:00,  2.47it/s]


Epoch 6, Valid Loss: 0.6806


100%|██████████| 543/543 [03:40<00:00,  2.46it/s]


Epoch 7, Valid Loss: 0.6827


100%|██████████| 543/543 [03:40<00:00,  2.46it/s]


Epoch 8, Valid Loss: 0.6352


100%|██████████| 543/543 [03:40<00:00,  2.46it/s]


Epoch 9, Valid Loss: 0.6104


100%|██████████| 543/543 [03:41<00:00,  2.46it/s]


Epoch 10, Valid Loss: 0.6043


100%|██████████| 543/543 [03:40<00:00,  2.46it/s]


Epoch 11, Valid Loss: 0.5900


100%|██████████| 543/543 [03:39<00:00,  2.47it/s]


Epoch 12, Valid Loss: 0.5768


100%|██████████| 543/543 [03:40<00:00,  2.46it/s]


Epoch 13, Valid Loss: 0.5753


100%|██████████| 543/543 [03:40<00:00,  2.46it/s]


Epoch 14, Valid Loss: 0.5668


100%|██████████| 543/543 [03:41<00:00,  2.45it/s]


Epoch 15, Valid Loss: 0.5760


100%|██████████| 543/543 [03:41<00:00,  2.45it/s]


Epoch 16, Valid Loss: 0.5699


100%|██████████| 543/543 [03:41<00:00,  2.45it/s]


Epoch 17, Valid Loss: 0.5648


100%|██████████| 543/543 [03:41<00:00,  2.45it/s]


Epoch 18, Valid Loss: 0.5587


100%|██████████| 543/543 [03:41<00:00,  2.45it/s]


Epoch 19, Valid Loss: 0.5583


100%|██████████| 543/543 [03:40<00:00,  2.46it/s]


Epoch 20, Valid Loss: 0.5590
CV Score: 0.7547


### 推論

In [30]:
# testの推論
test_dataset = CustomDataset(test, cfg.data.img_root, transform=get_valid_transform(), is_train=False)
test_loader = DataLoader(test_dataset, batch_size=cfg.cnn.batch_size, shuffle=False)

# 5fold分の予測値を格納する配列
test_predictions = np.zeros((len(test), len(cfg.target_cols), cfg.n_splits))

# 各foldのモデルで予測
for fold in range(cfg.n_splits):
    print(f"Predicting using fold {fold + 1} model")

    # モデルの読み込み
    model = CustomModel(cfg.cnn).to(device)
    model.load_state_dict(torch.load(f"{cfg.data.results_path}/model_fold{fold}.pth"))
    model.eval()

    fold_predictions = []
    with torch.no_grad():
        for images in tqdm(test_loader):
            images = images.to(device)
            outputs = model(images)
            fold_predictions.append(outputs.cpu().numpy())

    # バッチごとの予測を結合
    fold_predictions = np.concatenate(fold_predictions, axis=0)
    test_predictions[:, :, fold] = fold_predictions

# 5fold分の予測値の平均を計算
final_predictions = test_predictions.mean(axis=2)

# submissionファイルの作成
exprs = [pl.Series(final_predictions[:, i]).alias(cfg.target_cols[i]) for i in range(len(cfg.target_cols))]
submission = sample_submission.with_columns(exprs)
submission.write_csv(f"{cfg.data.results_path}/submission.csv")
print("Submission file created!")

# 確認
submission.head()


Predicting using fold 1 model


  model.load_state_dict(torch.load(f"{cfg.data.results_path}/model_fold{fold}.pth"))
100%|██████████| 27/27 [00:09<00:00,  2.70it/s]


Predicting using fold 2 model


100%|██████████| 27/27 [00:06<00:00,  3.95it/s]


Predicting using fold 3 model


100%|██████████| 27/27 [00:06<00:00,  3.96it/s]


Predicting using fold 4 model


100%|██████████| 27/27 [00:06<00:00,  4.07it/s]


Predicting using fold 5 model


100%|██████████| 27/27 [00:06<00:00,  4.06it/s]

Submission file created!





x_0,y_0,z_0,x_1,y_1,z_1,x_2,y_2,z_2,x_3,y_3,z_3,x_4,y_4,z_4,x_5,y_5,z_5
f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
1.564748,0.030296,-0.005134,3.22256,0.098319,-0.012699,4.787374,0.208132,-0.018557,6.273776,0.353713,-0.025955,7.681679,0.528883,-0.033389,9.024183,0.721998,-0.041702
1.138851,0.34751,-0.00604,2.388481,1.015732,0.000645,3.603331,1.932546,0.007776,4.801756,3.061652,0.01193,6.028886,4.369015,0.024479,7.260505,5.83338,0.036508
2.064555,0.008399,-0.005522,4.242183,0.022166,-0.012603,6.30258,0.045728,-0.021937,8.255037,0.076212,-0.035858,10.09707,0.110275,-0.048623,11.855469,0.142373,-0.060925
0.697062,-0.004701,-0.004122,1.429205,-0.012335,-0.004754,2.132749,-0.014095,-0.007812,2.808174,-0.019438,-0.009792,3.475157,-0.02116,-0.011917,4.123117,-0.030823,-0.011751
0.988816,0.001616,-0.007507,2.001525,0.009161,-0.016364,2.940998,0.027944,-0.028809,3.820994,0.053867,-0.040315,4.648424,0.086246,-0.052734,5.431638,0.117115,-0.067859


In [None]:
# final_predictionsを保存
np.save(f"{cfg.data.results_path}/final_predictions.npy", final_predictions)


In [32]:
final_predictions.shape


(1727, 18)

### embedding取得

In [10]:
# trainのembedding取得

# model読み込み用のディレクトリを指定
cfg.run_time = "20241119_220952"

# train_embeddingsを格納する配列を初期化
train_embeddings = np.zeros((len(train), model.n_features))

for fold, (_, valid_idx) in enumerate(gkf.split(train, groups=train["scene"])):
    print(f"Fold {fold + 1}")
    model = CustomModel(cfg.cnn).to(device)

    # ベストモデルでembedding取得
    model.load_state_dict(torch.load(f"{cfg.data.results_path}/model_fold{fold}.pth"))
    model.eval()

    valid_dataset = CustomDataset(train[valid_idx], cfg.data.img_root, transform=get_valid_transform(), is_train=False)
    valid_loader = DataLoader(valid_dataset, batch_size=cfg.cnn.batch_size, shuffle=False)

    with torch.no_grad():
        for i, images in enumerate(tqdm(valid_loader)):
            images = images.to(device)
            start_idx = i * cfg.cnn.batch_size
            end_idx = start_idx + images.shape[0]
            train_embeddings[valid_idx[start_idx:end_idx]] = model.get_embedding(images).cpu().numpy()

print(f"train_embeddings.shape: {train_embeddings.shape}")

# train_embeddingsを保存
np.save(f"{cfg.data.results_path}/train_embeddings.npy", train_embeddings)


Fold 1


  model = create_fn(
  model.load_state_dict(torch.load(f"{cfg.data.results_path}/model_fold{fold}.pth"))
100%|██████████| 136/136 [00:33<00:00,  4.05it/s]


Fold 2


100%|██████████| 136/136 [00:38<00:00,  3.55it/s]


Fold 3


100%|██████████| 136/136 [00:38<00:00,  3.50it/s]


Fold 4


100%|██████████| 136/136 [00:38<00:00,  3.50it/s]


Fold 5


100%|██████████| 136/136 [00:38<00:00,  3.49it/s]


train_embeddings.shape: (43371, 1280)


In [16]:
# testのembedding取得

# testの推論
test_dataset = CustomDataset(test, cfg.data.img_root, transform=get_valid_transform(), is_train=False)
test_loader = DataLoader(test_dataset, batch_size=cfg.cnn.batch_size, shuffle=False)

# 5fold分のembeddingを格納する配列
test_embeddings = np.zeros((len(test), model.n_features, cfg.n_splits))

# 各foldのモデルでembedding取得
for fold in range(cfg.n_splits):
    print(f"Embedding using fold {fold + 1} model")

    # モデルの読み込み
    model = CustomModel(cfg.cnn).to(device)
    model.load_state_dict(torch.load(f"{cfg.data.results_path}/model_fold{fold}.pth"))
    model.eval()

    fold_embeddings = []
    with torch.no_grad():
        for images in tqdm(test_loader):
            images = images.to(device)
            fold_embeddings.append(model.get_embedding(images).cpu().numpy())

    # バッチごとのembeddingを結合
    fold_embeddings = np.concatenate(fold_embeddings, axis=0)
    test_embeddings[:, :, fold] = fold_embeddings

# 5fold分のembeddingの平均を計算
final_embeddings = test_embeddings.mean(axis=2)

print(f"final_embeddings.shape: {final_embeddings.shape}")

# final_embeddingsを保存
np.save(f"{cfg.data.results_path}/final_embeddings.npy", final_embeddings)


Embedding using fold 1 model


  model = create_fn(
  model.load_state_dict(torch.load(f"{cfg.data.results_path}/model_fold{fold}.pth"))
100%|██████████| 27/27 [00:06<00:00,  4.05it/s]


Embedding using fold 2 model


100%|██████████| 27/27 [00:06<00:00,  4.11it/s]


Embedding using fold 3 model


100%|██████████| 27/27 [00:06<00:00,  4.02it/s]


Embedding using fold 4 model


100%|██████████| 27/27 [00:06<00:00,  4.10it/s]


Embedding using fold 5 model


100%|██████████| 27/27 [00:06<00:00,  4.11it/s]

final_embeddings.shape: (1727, 1280)



