In [1]:
import json
from datetime import datetime
from pathlib import Path

import albumentations as A
import cv2
import numpy as np
import polars as pl
import pytz
import timm
import torch
import torch.nn.functional as F
from albumentations.pytorch import ToTensorV2
from omegaconf import OmegaConf
from sklearn.model_selection import GroupKFold
from sklearn.preprocessing import StandardScaler
from torch import nn
from torch.utils.data import DataLoader, Dataset
from tqdm.auto import tqdm
from transformers import get_cosine_schedule_with_warmup

from src.config import cfg
from src.dir import create_dir
from src.seed import seed_everything

cfg.exp_number = Path().resolve().name
print(OmegaConf.to_yaml(cfg, resolve=True))

seed_everything(cfg.seed)
pl.Config.set_fmt_str_lengths(1000)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


  from .autonotebook import tqdm as notebook_tqdm


exp_number: '009'
run_time: base
data:
  input_root: ../../data/input
  train_path: ../../data/input/train_features.csv
  test_path: ../../data/input/test_features.csv
  sample_submission_path: ../../data/input/sample_submission.csv
  img_root: ../../data/input/images
  json_root: ../../data/input/traffic_lights
  depth_root: ../../data/input/depth
  output_root: ../../data/output
  results_root: ../../results
  results_path: ../../results/009/base
seed: 319
n_splits: 5
target_cols:
- x_0
- y_0
- z_0
- x_1
- y_1
- z_1
- x_2
- y_2
- z_2
- x_3
- y_3
- z_3
- x_4
- y_4
- z_4
- x_5
- y_5
- z_5
cnn:
  model_name: convnext_base.fb_in22k_ft_in1k
  size: 128
  pretrained: true
  in_chans: 20
  target_size: 18
  lr: 0.0001
  num_epochs: 20
  batch_size: 64

Using device: cuda


### exp009

- late sub
- 信号機のマスク情報とdepth画像をチャネル方向に結合して入力とするNN
- 数値特徴量も入力とする
- backbone：convnext_base

### データの読み込み

In [2]:
# データの読み込み
train = pl.read_csv(cfg.data.train_path, try_parse_dates=True).with_row_index()
test = pl.read_csv(cfg.data.test_path, try_parse_dates=True).with_row_index()
sample_submission = pl.read_csv(cfg.data.sample_submission_path, try_parse_dates=True).with_row_index()


# scene列を作成 → これでGroupKFoldする
train = train.with_columns(pl.col("ID").str.split("_").list[0].alias("scene"))
test = test.with_columns(pl.col("ID").str.split("_").list[0].alias("scene"))

# データの結合(label encoding用)
train_test = pl.concat([train, test], how="diagonal")

# CV
gkf = GroupKFold(n_splits=cfg.n_splits)


In [3]:
# 数値特徴量の前処理
numeric_cols = ["vEgo", "aEgo", "steeringAngleDeg", "steeringTorque", "gas"]

# 欠損を埋める
train_test = train_test.with_columns([pl.col(col).fill_null(0) for col in numeric_cols])

# 全ての数値特徴量を標準化
scaler = StandardScaler()
scaled_features = scaler.fit_transform(train_test[numeric_cols].to_numpy())

# 標準化した値でDataFrameを更新
train_test = train_test.with_columns(
    [pl.Series(scaled_features[:, i]).alias(col) for i, col in enumerate(numeric_cols)]
)


In [4]:
# train, testに分ける
train = train_test.filter(pl.col("x_0").is_not_null()).sort("index")
test = train_test.filter(pl.col("x_0").is_null()).sort("index")


### データ拡張


In [5]:
def get_train_transform():
    return A.ReplayCompose(
        [
            A.Resize(cfg.cnn.size, cfg.cnn.size),
            A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            ToTensorV2(),
        ]
    )


def get_valid_transform():
    return A.ReplayCompose(
        [
            A.Resize(cfg.cnn.size, cfg.cnn.size),
            A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            ToTensorV2(),
        ]
    )


### 信号機マスク

In [6]:
class TrafficLightMaskGenerator:
    TRAFFIC_LIGHT_CLASSES = ["green", "yellow", "red", "straight", "left", "right", "empty", "other"]

    def __init__(self, image_size: list[int, int] | int | None = None):
        self.original_size = (128, 64)  # (width, height)

        if isinstance(image_size, list):
            self.image_size = (image_size[1], image_size[0])
        elif isinstance(image_size, int):
            self.image_size = (image_size, image_size)
        else:
            self.image_size = self.original_size

        self.scale = (self.image_size[0] / self.original_size[0], self.image_size[1] / self.original_size[1])

        self.class_to_index = {cls: idx for idx, cls in enumerate(self.TRAFFIC_LIGHT_CLASSES)}

    def scale_bbox(self, bbox):
        x1, y1, x2, y2 = bbox
        return [x1 * self.scale[0], y1 * self.scale[1], x2 * self.scale[0], y2 * self.scale[1]]

    def _convert_coordinate(self, coord, max_size):
        """座標を適切な範囲に収める"""
        return max(0, min(int(round(coord)), max_size - 1))

    def generate_masks(self, traffic_lights_json):
        # PosixPathオブジェクトを文字列に変換
        if isinstance(traffic_lights_json, Path):
            traffic_lights_json = str(traffic_lights_json)

        traffic_lights = (
            json.load(open(traffic_lights_json)) if isinstance(traffic_lights_json, str) else traffic_lights_json
        )

        masks = np.zeros((self.image_size[1], self.image_size[0], len(self.TRAFFIC_LIGHT_CLASSES)), dtype=np.float32)

        for light in traffic_lights:
            class_idx = self.class_to_index[light["class"]]
            scaled_bbox = self.scale_bbox(light["bbox"])

            # 座標を適切な範囲に収める
            x1 = self._convert_coordinate(scaled_bbox[0], self.image_size[0])
            y1 = self._convert_coordinate(scaled_bbox[1], self.image_size[1])
            x2 = self._convert_coordinate(scaled_bbox[2], self.image_size[0])
            y2 = self._convert_coordinate(scaled_bbox[3], self.image_size[1])

            masks[y1 : y2 + 1, x1 : x2 + 1, class_idx] = 1.0

        return masks


### データセット

In [7]:
class CustomDataset(Dataset):
    def __init__(self, df, img_dir, json_dir, depth_dir, transform=None, is_train=True):
        self.df = df
        self.img_dir = Path(img_dir)
        self.json_dir = Path(json_dir)
        self.depth_dir = Path(depth_dir)
        self.is_train = is_train
        self.feature_cols = [
            "vEgo",
            "aEgo",
            "steeringAngleDeg",
            "steeringTorque",
            "gas",
            "leftBlinker",
            "rightBlinker",
            "brakePressed",
        ]

        # オリジナル画像用の変換処理
        if transform is None:
            self.transform = A.ReplayCompose(
                [
                    A.Resize(cfg.cnn.size, cfg.cnn.size),
                    A.Normalize(
                        mean=[0.485, 0.456, 0.406],  # 通常のImageNet平均値
                        std=[0.229, 0.224, 0.225],  # 通常のImageNet標準偏差
                    ),
                    ToTensorV2(),
                ]
            )
        else:
            self.transform = transform

        # Depth Map用の変換処理
        self.depth_transform = A.ReplayCompose(
            [
                A.Resize(cfg.cnn.size, cfg.cnn.size),
                A.Normalize(mean=[0.5], std=[0.5]),
                ToTensorV2(),
            ]
        )

        self.target_cols = cfg.target_cols
        self.mask_generator = TrafficLightMaskGenerator(cfg.cnn.size)

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df[idx]
        img_folder = self.img_dir / row["ID"].item()
        depth_folder = self.depth_dir / row["ID"].item()
        json_path = self.json_dir / f"{row['ID'].item()}.json"

        # 3枚の画像を読み込み
        img_names = ["image_t-1.0.png", "image_t-0.5.png", "image_t.png"]
        imgs = []
        depths = []

        for img_name in img_names:
            # オリジナル画像の読み込み
            img_path = img_folder / img_name
            img = cv2.imread(str(img_path))
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            imgs.append(img)

            # Depth Mapの読み込み
            depth_path = depth_folder / img_name
            depth = cv2.imread(str(depth_path), cv2.IMREAD_GRAYSCALE)
            depths.append(depth)

        # オリジナル画像の変換
        if self.transform:
            replay = None
            transformed_imgs = []

            for img in imgs:
                if replay is None:
                    # 最初の画像に対して変換を実行し、replayを保存
                    transformed = self.transform(image=img)
                    replay = transformed["replay"]
                else:
                    # 2枚目以降は保存したreplayを使用
                    transformed = A.ReplayCompose.replay(replay, image=img)
                transformed_imgs.append(transformed["image"])

        # Depth Mapの変換
        transformed_depths = []
        for depth in depths:
            transformed_depth = self.depth_transform(image=depth)
            transformed_depths.append(transformed_depth["image"])

        # オリジナル画像をチャネル方向に結合 (C*3, H, W)
        img_tensor = torch.cat(transformed_imgs, dim=0)

        # Depth Mapをチャネル方向に結合 (3, H, W)
        depth_tensor = torch.cat(transformed_depths, dim=0)

        # 信号機マスクの生成
        mask = self.mask_generator.generate_masks(json_path)
        mask_tensor = torch.from_numpy(mask).permute(2, 0, 1)  # (8, H, W)

        # 画像, Depth Map, 信号機マスクをチャネル方向に結合 (C*3+3+8=20, H, W)
        combined_tensor = torch.cat([img_tensor, depth_tensor, mask_tensor], dim=0)

        # 数値特徴量の取得
        feature_tensor = torch.tensor(row[self.feature_cols].to_numpy(), dtype=torch.float32).squeeze(0)

        # ターゲットの準備
        if self.is_train:
            target = torch.tensor(row[self.target_cols].to_numpy(), dtype=torch.float32).squeeze(0)
            return combined_tensor, feature_tensor, target
        else:
            return combined_tensor, feature_tensor


### モデル

In [8]:
class GeM(nn.Module):
    def __init__(self, p=3, eps=1e-6, p_trainable=True):
        super().__init__()
        if p_trainable:
            self.p = nn.Parameter(torch.ones(1) * p)
        else:
            self.p = p
        self.eps = eps

    def forward(self, x):
        x = x.clamp(min=self.eps).pow(self.p)
        x = F.adaptive_avg_pool2d(x, 1)
        x = x.pow(1.0 / self.p)
        return x


In [9]:
class CustomModel(nn.Module):
    def __init__(self, cfg, pretrained=False, target_size=None, model_name=None):
        super().__init__()

        self.encoder = timm.create_model(
            cfg.model_name,
            pretrained=cfg.pretrained,
            num_classes=0,
            in_chans=cfg.in_chans,
            global_pool="",
        )

        self.n_features = self.encoder.num_features
        self.pool = GeM(p=3, eps=1e-6, p_trainable=True)
        self.n_numeric_features = 8  # 数値特徴量の次元数

        self.target_size = cfg.target_size if target_size is None else target_size

        self.fc = nn.Sequential(
            nn.Linear(self.n_features + self.n_numeric_features, 512),  # 画像特徴量と数値特徴量を結合
            nn.ReLU(),
            nn.Linear(512, self.target_size if target_size is None else target_size),
        )

    def forward(self, image, numeric_features):
        feature = self.encoder(image)
        feature = self.pool(feature).reshape(feature.size(0), -1)
        combined_features = torch.cat([feature, numeric_features], dim=1)
        output = self.fc(combined_features)
        return output


In [10]:
model = CustomModel(cfg.cnn)


### 学習

In [11]:
DEBUG = False

if DEBUG:
    cfg.cnn.num_epochs = 1
    train = train.head(100)


In [12]:
# 実験結果格納用のディレクトリを作成
japan_tz = pytz.timezone("Asia/Tokyo")
cfg.run_time = datetime.now(japan_tz).strftime("%Y%m%d_%H%M%S")
create_dir(cfg.data.results_path)

# CV用の配列を初期化
oof_predictions = np.zeros((len(train), len(cfg.target_cols)))
models = {}

for fold, (train_idx, valid_idx) in enumerate(gkf.split(train, groups=train["scene"])):
    print(f"Fold {fold + 1}")

    # データセットの作成
    train_dataset = CustomDataset(
        train[train_idx],
        cfg.data.img_root,
        cfg.data.json_root,
        cfg.data.depth_root,
        transform=get_train_transform(),
    )
    valid_dataset = CustomDataset(
        train[valid_idx],
        cfg.data.img_root,
        cfg.data.json_root,
        cfg.data.depth_root,
        transform=get_valid_transform(),
    )

    train_loader = DataLoader(train_dataset, batch_size=cfg.cnn.batch_size, shuffle=True)
    valid_loader = DataLoader(valid_dataset, batch_size=cfg.cnn.batch_size, shuffle=False)

    # モデル、損失関数、オプティマイザーの初期化
    model = CustomModel(cfg.cnn).to(device)
    criterion = nn.HuberLoss()
    # criterion = nn.L1Loss()
    optimizer = torch.optim.AdamW(model.parameters(), lr=cfg.cnn.lr)
    total_steps = len(train_loader) * cfg.cnn.num_epochs
    scheduler = get_cosine_schedule_with_warmup(
        optimizer=optimizer,
        num_warmup_steps=total_steps * 0.1,
        num_training_steps=total_steps,
    )

    best_loss = float("inf")

    # 学習ループ
    for epoch in range(cfg.cnn.num_epochs):
        model.train()
        for images, numeric_features, targets in tqdm(train_loader):
            images = images.to(device)
            numeric_features = numeric_features.to(device)
            targets = targets.to(device)

            optimizer.zero_grad()
            outputs = model(images, numeric_features)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            scheduler.step()

        # 検証
        model.eval()
        valid_losses = []
        with torch.no_grad():
            for images, numeric_features, targets in valid_loader:
                images = images.to(device)
                numeric_features = numeric_features.to(device)
                targets = targets.to(device)
                outputs = model(images, numeric_features)
                loss = criterion(outputs, targets)
                valid_losses.append(loss.item())

        valid_loss = np.mean(valid_losses)
        print(f"Epoch {epoch + 1}, Valid Loss: {valid_loss:.4f}")

        # ベストモデルの保存
        if valid_loss < best_loss:
            best_loss = valid_loss
            torch.save(model.state_dict(), f"{cfg.data.results_path}/model_fold{fold}.pth")

    # ベストモデルでOOF予測
    model.load_state_dict(torch.load(f"{cfg.data.results_path}/model_fold{fold}.pth"))
    model.eval()

    valid_dataset = CustomDataset(
        train[valid_idx], cfg.data.img_root, cfg.data.json_root, cfg.data.depth_root, transform=get_valid_transform()
    )
    valid_loader = DataLoader(valid_dataset, batch_size=cfg.cnn.batch_size, shuffle=False)

    with torch.no_grad():
        for i, (images, numeric_features, _) in enumerate(valid_loader):
            images = images.to(device)
            numeric_features = numeric_features.to(device)
            outputs = model(images, numeric_features)
            start_idx = i * cfg.cnn.batch_size
            end_idx = start_idx + outputs.shape[0]
            oof_predictions[valid_idx[start_idx:end_idx]] = outputs.cpu().numpy()

# CVスコアの計算（MAEの平均）
mae_scores = []
for i in range(len(cfg.target_cols)):
    mae = np.mean(np.abs(oof_predictions[:, i] - train[cfg.target_cols[i]].to_numpy()))
    mae_scores.append(mae)

cv_score = np.mean(mae_scores)
print(f"CV Score: {cv_score:.4f}")

# oofを保存
np.save(f"{cfg.data.results_path}/oof_predictions.npy", oof_predictions)


Directory created: ../../results/009/20241124_234741
Fold 1


100%|██████████| 543/543 [06:36<00:00,  1.37it/s]


Epoch 1, Valid Loss: 0.8821


100%|██████████| 543/543 [06:02<00:00,  1.50it/s]


Epoch 2, Valid Loss: 0.7403


100%|██████████| 543/543 [06:10<00:00,  1.47it/s]


Epoch 3, Valid Loss: 0.5543


100%|██████████| 543/543 [06:08<00:00,  1.47it/s]


Epoch 4, Valid Loss: 0.4870


100%|██████████| 543/543 [06:07<00:00,  1.48it/s]


Epoch 5, Valid Loss: 0.4608


100%|██████████| 543/543 [06:09<00:00,  1.47it/s]


Epoch 6, Valid Loss: 0.4247


100%|██████████| 543/543 [06:10<00:00,  1.47it/s]


Epoch 7, Valid Loss: 0.4045


100%|██████████| 543/543 [06:04<00:00,  1.49it/s]


Epoch 8, Valid Loss: 0.4056


100%|██████████| 543/543 [06:07<00:00,  1.48it/s]


Epoch 9, Valid Loss: 0.3866


100%|██████████| 543/543 [06:09<00:00,  1.47it/s]


Epoch 10, Valid Loss: 0.3850


100%|██████████| 543/543 [06:09<00:00,  1.47it/s]


Epoch 11, Valid Loss: 0.3734


100%|██████████| 543/543 [06:08<00:00,  1.47it/s]


Epoch 12, Valid Loss: 0.3698


100%|██████████| 543/543 [06:09<00:00,  1.47it/s]


Epoch 13, Valid Loss: 0.3684


100%|██████████| 543/543 [06:08<00:00,  1.47it/s]


Epoch 14, Valid Loss: 0.3658


100%|██████████| 543/543 [06:08<00:00,  1.47it/s]


Epoch 15, Valid Loss: 0.3647


100%|██████████| 543/543 [06:09<00:00,  1.47it/s]


Epoch 16, Valid Loss: 0.3656


100%|██████████| 543/543 [06:08<00:00,  1.47it/s]


Epoch 17, Valid Loss: 0.3652


100%|██████████| 543/543 [06:08<00:00,  1.47it/s]


Epoch 18, Valid Loss: 0.3654


100%|██████████| 543/543 [06:08<00:00,  1.48it/s]


Epoch 19, Valid Loss: 0.3654


100%|██████████| 543/543 [06:09<00:00,  1.47it/s]


Epoch 20, Valid Loss: 0.3655


  model.load_state_dict(torch.load(f"{cfg.data.results_path}/model_fold{fold}.pth"))


Fold 2


100%|██████████| 543/543 [05:59<00:00,  1.51it/s]


Epoch 1, Valid Loss: 0.9270


100%|██████████| 543/543 [06:01<00:00,  1.50it/s]


Epoch 2, Valid Loss: 0.6811


100%|██████████| 543/543 [06:00<00:00,  1.51it/s]


Epoch 3, Valid Loss: 0.6018


100%|██████████| 543/543 [06:01<00:00,  1.50it/s]


Epoch 4, Valid Loss: 0.4927


100%|██████████| 543/543 [06:00<00:00,  1.50it/s]


Epoch 5, Valid Loss: 0.4492


100%|██████████| 543/543 [06:01<00:00,  1.50it/s]


Epoch 6, Valid Loss: 0.4342


100%|██████████| 543/543 [06:00<00:00,  1.51it/s]


Epoch 7, Valid Loss: 0.4194


100%|██████████| 543/543 [06:00<00:00,  1.50it/s]


Epoch 8, Valid Loss: 0.3979


100%|██████████| 543/543 [06:01<00:00,  1.50it/s]


Epoch 9, Valid Loss: 0.3807


100%|██████████| 543/543 [06:00<00:00,  1.51it/s]


Epoch 10, Valid Loss: 0.3746


100%|██████████| 543/543 [06:01<00:00,  1.50it/s]


Epoch 11, Valid Loss: 0.3758


100%|██████████| 543/543 [06:00<00:00,  1.51it/s]


Epoch 12, Valid Loss: 0.3697


100%|██████████| 543/543 [05:59<00:00,  1.51it/s]


Epoch 13, Valid Loss: 0.3669


100%|██████████| 543/543 [05:59<00:00,  1.51it/s]


Epoch 14, Valid Loss: 0.3663


100%|██████████| 543/543 [06:00<00:00,  1.51it/s]


Epoch 15, Valid Loss: 0.3638


100%|██████████| 543/543 [06:00<00:00,  1.51it/s]


Epoch 16, Valid Loss: 0.3651


100%|██████████| 543/543 [06:00<00:00,  1.51it/s]


Epoch 17, Valid Loss: 0.3644


100%|██████████| 543/543 [06:00<00:00,  1.51it/s]


Epoch 18, Valid Loss: 0.3647


100%|██████████| 543/543 [05:59<00:00,  1.51it/s]


Epoch 19, Valid Loss: 0.3644


100%|██████████| 543/543 [05:59<00:00,  1.51it/s]


Epoch 20, Valid Loss: 0.3644
Fold 3


100%|██████████| 543/543 [06:08<00:00,  1.48it/s]


Epoch 1, Valid Loss: 0.8860


100%|██████████| 543/543 [06:09<00:00,  1.47it/s]


Epoch 2, Valid Loss: 0.7957


100%|██████████| 543/543 [06:08<00:00,  1.47it/s]


Epoch 3, Valid Loss: 0.5270


100%|██████████| 543/543 [06:09<00:00,  1.47it/s]


Epoch 4, Valid Loss: 0.5066


100%|██████████| 543/543 [06:08<00:00,  1.48it/s]


Epoch 5, Valid Loss: 0.4371


100%|██████████| 543/543 [06:09<00:00,  1.47it/s]


Epoch 6, Valid Loss: 0.4270


100%|██████████| 543/543 [06:09<00:00,  1.47it/s]


Epoch 7, Valid Loss: 0.4039


100%|██████████| 543/543 [06:09<00:00,  1.47it/s]


Epoch 8, Valid Loss: 0.4012


100%|██████████| 543/543 [06:06<00:00,  1.48it/s]


Epoch 9, Valid Loss: 0.3888


100%|██████████| 543/543 [06:07<00:00,  1.48it/s]


Epoch 10, Valid Loss: 0.3793


100%|██████████| 543/543 [06:07<00:00,  1.48it/s]


Epoch 11, Valid Loss: 0.3766


100%|██████████| 543/543 [06:07<00:00,  1.48it/s]


Epoch 12, Valid Loss: 0.3666


100%|██████████| 543/543 [06:06<00:00,  1.48it/s]


Epoch 13, Valid Loss: 0.3644


100%|██████████| 543/543 [06:07<00:00,  1.48it/s]


Epoch 14, Valid Loss: 0.3644


100%|██████████| 543/543 [06:06<00:00,  1.48it/s]


Epoch 15, Valid Loss: 0.3619


100%|██████████| 543/543 [06:07<00:00,  1.48it/s]


Epoch 16, Valid Loss: 0.3620


100%|██████████| 543/543 [06:07<00:00,  1.48it/s]


Epoch 17, Valid Loss: 0.3620


100%|██████████| 543/543 [06:08<00:00,  1.47it/s]


Epoch 18, Valid Loss: 0.3620


100%|██████████| 543/543 [06:07<00:00,  1.48it/s]


Epoch 19, Valid Loss: 0.3620


100%|██████████| 543/543 [06:07<00:00,  1.48it/s]


Epoch 20, Valid Loss: 0.3621
Fold 4


100%|██████████| 543/543 [06:08<00:00,  1.47it/s]


Epoch 1, Valid Loss: 0.9826


100%|██████████| 543/543 [06:11<00:00,  1.46it/s]


Epoch 2, Valid Loss: 0.7052


100%|██████████| 543/543 [06:09<00:00,  1.47it/s]


Epoch 3, Valid Loss: 0.5640


100%|██████████| 543/543 [06:10<00:00,  1.46it/s]


Epoch 4, Valid Loss: 0.5288


100%|██████████| 543/543 [06:09<00:00,  1.47it/s]


Epoch 5, Valid Loss: 0.4603


100%|██████████| 543/543 [06:09<00:00,  1.47it/s]


Epoch 6, Valid Loss: 0.4354


100%|██████████| 543/543 [06:11<00:00,  1.46it/s]


Epoch 7, Valid Loss: 0.4103


100%|██████████| 543/543 [06:08<00:00,  1.47it/s]


Epoch 8, Valid Loss: 0.4123


100%|██████████| 543/543 [06:09<00:00,  1.47it/s]


Epoch 9, Valid Loss: 0.4053


100%|██████████| 543/543 [06:10<00:00,  1.47it/s]


Epoch 10, Valid Loss: 0.3894


100%|██████████| 543/543 [06:09<00:00,  1.47it/s]


Epoch 11, Valid Loss: 0.3891


100%|██████████| 543/543 [06:09<00:00,  1.47it/s]


Epoch 12, Valid Loss: 0.3746


100%|██████████| 543/543 [06:07<00:00,  1.48it/s]


Epoch 13, Valid Loss: 0.3760


100%|██████████| 543/543 [06:09<00:00,  1.47it/s]


Epoch 14, Valid Loss: 0.3730


100%|██████████| 543/543 [06:09<00:00,  1.47it/s]


Epoch 15, Valid Loss: 0.3714


100%|██████████| 543/543 [06:10<00:00,  1.47it/s]


Epoch 16, Valid Loss: 0.3706


100%|██████████| 543/543 [06:06<00:00,  1.48it/s]


Epoch 17, Valid Loss: 0.3711


100%|██████████| 543/543 [06:04<00:00,  1.49it/s]


Epoch 18, Valid Loss: 0.3710


100%|██████████| 543/543 [06:03<00:00,  1.49it/s]


Epoch 19, Valid Loss: 0.3711


100%|██████████| 543/543 [06:09<00:00,  1.47it/s]


Epoch 20, Valid Loss: 0.3711
Fold 5


100%|██████████| 543/543 [06:01<00:00,  1.50it/s]


Epoch 1, Valid Loss: 1.0076


100%|██████████| 543/543 [05:59<00:00,  1.51it/s]


Epoch 2, Valid Loss: 0.6819


100%|██████████| 543/543 [05:59<00:00,  1.51it/s]


Epoch 3, Valid Loss: 0.5623


100%|██████████| 543/543 [05:58<00:00,  1.51it/s]


Epoch 4, Valid Loss: 0.5245


100%|██████████| 543/543 [05:59<00:00,  1.51it/s]


Epoch 5, Valid Loss: 0.5069


100%|██████████| 543/543 [06:01<00:00,  1.50it/s]


Epoch 6, Valid Loss: 0.4380


100%|██████████| 543/543 [06:00<00:00,  1.51it/s]


Epoch 7, Valid Loss: 0.4249


100%|██████████| 543/543 [06:00<00:00,  1.51it/s]


Epoch 8, Valid Loss: 0.4126


100%|██████████| 543/543 [06:00<00:00,  1.51it/s]


Epoch 9, Valid Loss: 0.4140


100%|██████████| 543/543 [06:00<00:00,  1.51it/s]


Epoch 10, Valid Loss: 0.3942


100%|██████████| 543/543 [05:58<00:00,  1.51it/s]


Epoch 11, Valid Loss: 0.3937


100%|██████████| 543/543 [05:58<00:00,  1.52it/s]


Epoch 12, Valid Loss: 0.3903


100%|██████████| 543/543 [05:58<00:00,  1.51it/s]


Epoch 13, Valid Loss: 0.3835


100%|██████████| 543/543 [05:58<00:00,  1.51it/s]


Epoch 14, Valid Loss: 0.3837


100%|██████████| 543/543 [05:58<00:00,  1.51it/s]


Epoch 15, Valid Loss: 0.3819


100%|██████████| 543/543 [05:57<00:00,  1.52it/s]


Epoch 16, Valid Loss: 0.3811


100%|██████████| 543/543 [05:58<00:00,  1.51it/s]


Epoch 17, Valid Loss: 0.3816


100%|██████████| 543/543 [05:58<00:00,  1.52it/s]


Epoch 18, Valid Loss: 0.3812


100%|██████████| 543/543 [05:58<00:00,  1.52it/s]


Epoch 19, Valid Loss: 0.3815


100%|██████████| 543/543 [05:57<00:00,  1.52it/s]


Epoch 20, Valid Loss: 0.3814
CV Score: 0.5671


### 推論

In [13]:
# testの推論
test_dataset = CustomDataset(
    test, cfg.data.img_root, cfg.data.json_root, cfg.data.depth_root, transform=get_valid_transform(), is_train=False
)
test_loader = DataLoader(test_dataset, batch_size=cfg.cnn.batch_size, shuffle=False)

# fold分の予測値を格納する配列
test_predictions = np.zeros((len(test), len(cfg.target_cols), cfg.n_splits))

# 各foldのモデルで予測
for fold in range(cfg.n_splits):
    print(f"Predicting using fold {fold + 1} model")

    # モデルの読み込み
    model = CustomModel(cfg.cnn).to(device)
    model.load_state_dict(torch.load(f"{cfg.data.results_path}/model_fold{fold}.pth"))
    model.eval()

    fold_predictions = []
    with torch.no_grad():
        for images, numeric_features in tqdm(test_loader):
            images = images.to(device)
            numeric_features = numeric_features.to(device)
            outputs = model(images, numeric_features)
            fold_predictions.append(outputs.cpu().numpy())

    # バッチごとの予測を結合
    fold_predictions = np.concatenate(fold_predictions, axis=0)
    test_predictions[:, :, fold] = fold_predictions

# fold分の予測値の平均を計算
final_predictions = test_predictions.mean(axis=2)

# submissionファイルの作成
exprs = [pl.Series(final_predictions[:, i]).alias(cfg.target_cols[i]) for i in range(len(cfg.target_cols))]
submission = sample_submission.with_columns(exprs)
submission.write_csv(f"{cfg.data.results_path}/submission.csv")
print("Submission file created!")

# 確認
submission.head()


Predicting using fold 1 model


  model.load_state_dict(torch.load(f"{cfg.data.results_path}/model_fold{fold}.pth"))
100%|██████████| 27/27 [00:11<00:00,  2.41it/s]


Predicting using fold 2 model


100%|██████████| 27/27 [00:11<00:00,  2.43it/s]


Predicting using fold 3 model


100%|██████████| 27/27 [00:10<00:00,  2.49it/s]


Predicting using fold 4 model


100%|██████████| 27/27 [00:10<00:00,  2.55it/s]


Predicting using fold 5 model


100%|██████████| 27/27 [00:10<00:00,  2.48it/s]

Submission file created!





index,x_0,y_0,z_0,x_1,y_1,z_1,x_2,y_2,z_2,x_3,y_3,z_3,x_4,y_4,z_4,x_5,y_5,z_5
u32,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
0,1.762016,-0.049537,0.005101,3.704252,-0.115749,-0.00895,5.674801,-0.194381,-0.033764,7.642698,-0.309448,0.003367,9.445806,-0.447494,-0.023732,11.164513,-0.633084,-0.053004
1,1.066159,0.444887,0.000867,2.175993,1.230253,-0.007904,3.207965,2.298493,0.007959,4.167549,3.639967,0.028065,5.196145,5.131732,0.015617,6.191131,6.822976,0.03896
2,1.502963,0.040286,-0.009258,3.096368,-0.008223,-0.0092,4.650632,-0.042397,-0.021783,6.23341,-0.122242,-0.036655,7.807363,-0.212073,-0.028546,9.317002,-0.329819,-0.069742
3,0.973698,0.126782,0.012872,1.961286,0.351206,-0.049191,2.878072,0.761303,-0.05933,3.681935,1.283892,-0.083329,4.421638,2.010871,-0.078767,5.053724,2.797615,-0.112799
4,0.609454,0.015052,-0.016358,1.044376,0.03595,-0.020092,1.220239,0.028087,-0.011077,1.285821,-0.006175,-0.042235,1.20851,0.040344,-0.049794,1.145105,0.048068,-0.032216


In [18]:
submission.drop("index").write_csv(f"{cfg.data.results_path}/submission.csv")


In [14]:
# final_predictionsを保存
np.save(f"{cfg.data.results_path}/final_predictions.npy", final_predictions)


In [15]:
final_predictions.shape


(1727, 18)