In [1]:
import gc
import importlib
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import polars as pl
import seaborn as sns
from atmacup_18 import constants

import utils

importlib.reload(utils)

<module 'utils' from '/home/tatsuya/projects/atmacup/atmacup_18/experiments/main/v00/v00_24_02/utils.py'>

In [2]:
RANDOM_STATE = 2024
utils.seed_everything(RANDOM_STATE)

## データ読み込み

In [3]:
notebook_dir = Path().resolve()
DATA_DIR = notebook_dir.parents[3].joinpath("data")
DATASET_DIR = DATA_DIR.joinpath("atmaCup#18_dataset")
TR_FEATURES_CSV = DATASET_DIR.joinpath("train_features.csv")
TS_FEATURES_CSV = DATASET_DIR.joinpath("test_features.csv")
IMAGES_DIR = DATASET_DIR.joinpath("images")
TRAFFIC_LIGHTS_CSV = DATASET_DIR.joinpath("traffic_lights.csv")

IMAGE_NAMES = ["image_t.png", "image_t-0.5.png", "image_t-1.0.png"]
TRAFFIC_LIGHTS_BBOX_IMAGE_NAME = constants.TRAFFIC_LIGHT_BBOX_IMAGE_NAME
DEPTH_IMAGE_FILE_PREFIX = constants.DEPTH_IMAGE_FILE_PREFIX
DEPTH_IMAGE_NAMES = [
    f"{DEPTH_IMAGE_FILE_PREFIX}{image_name}.npy" for image_name in IMAGE_NAMES
]

BASE_PRED_DIR = Path("..", "..", "..", "main2", "v00", "v00_05_00")
BASE_OOF_PRED_CSV = BASE_PRED_DIR.joinpath("oof_preds.csv")
BASE_SUBMISSION_CSV = BASE_PRED_DIR.joinpath("submission.csv")

In [4]:
TARGET_COLS = sum([[f"x_{i}", f"y_{i}", f"z_{i}"] for i in range(6)], [])
BASE_PRED_COLS = [f"base_pred_{col}" for col in TARGET_COLS]

In [5]:
tr_df = utils.read_feature_csv(TR_FEATURES_CSV)
tr_df.head(2)

ID,vEgo,aEgo,steeringAngleDeg,steeringTorque,brake,brakePressed,gas,gasPressed,gearShifter,leftBlinker,rightBlinker,x_0,y_0,z_0,x_1,y_1,z_1,x_2,y_2,z_2,x_3,y_3,z_3,x_4,y_4,z_4,x_5,y_5,z_5,scene_id,scene_dsec,origin_idx
str,f64,f64,f64,f64,f64,bool,f64,bool,str,bool,bool,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,str,i32,i64
"""00066be8e20318869c38c66be46663…",5.701526,1.538456,-2.165777,-139.0,0.0,False,0.25,True,"""drive""",False,False,2.82959,0.032226,0.045187,6.231999,0.065895,0.107974,9.785009,0.124972,0.203649,13.485472,0.163448,0.302818,17.574227,0.174289,0.406331,21.951269,0.199503,0.485079,"""00066be8e20318869c38c66be46663…",320,0
"""00066be8e20318869c38c66be46663…",11.176292,0.279881,-11.625697,-44.0,0.0,False,0.0,False,"""drive""",False,True,4.970268,-0.007936,0.005028,10.350489,-0.032374,-0.020701,15.770054,0.084073,0.008645,21.132415,0.391343,0.036335,26.316489,0.843124,0.065,31.383814,1.42507,0.073083,"""00066be8e20318869c38c66be46663…",420,1


In [6]:
ts_df = utils.read_feature_csv(TS_FEATURES_CSV)
ts_df.head(2)

ID,vEgo,aEgo,steeringAngleDeg,steeringTorque,brake,brakePressed,gas,gasPressed,gearShifter,leftBlinker,rightBlinker,scene_id,scene_dsec,origin_idx
str,f64,f64,f64,f64,f64,bool,f64,bool,str,bool,bool,str,i32,i64
"""012baccc145d400c896cb82065a93d…",3.374273,-0.01936,-34.008415,17.0,0.0,False,0.0,False,"""drive""",False,False,"""012baccc145d400c896cb82065a93d…",120,0
"""012baccc145d400c896cb82065a93d…",2.441048,-0.022754,307.860077,295.0,0.0,True,0.0,False,"""drive""",False,False,"""012baccc145d400c896cb82065a93d…",220,1


In [7]:
def reduce_base_pred(
    df: pl.DataFrame, base_pred_df: pl.DataFrame, has_target: bool
) -> pl.DataFrame:
    """
    基礎推定値のdfをconcatし、元のdfのtarget列から除去したdfを返す

    Args:
        df (pl.DataFrame): target列を持つDataFrame
        base_pred_df (pl.DataFrame): 基礎推定値のDataFrame
        has_target (bool): target列を持つかどうか
    """
    target_cols = TARGET_COLS
    base_pred_cols = BASE_PRED_COLS
    df = pl.concat(
        [
            df,
            base_pred_df.select(TARGET_COLS).rename(
                {t: b for t, b in zip(target_cols, base_pred_cols)}
            ),
        ],
        how="horizontal",
    )

    if has_target:
        df = df.with_columns(
            [
                (pl.col(tg_col) - pl.col(base_pred_col)).alias(tg_col)
                for tg_col, base_pred_col in zip(target_cols, base_pred_cols)
            ]
        )
    return df


def add_base_pred_to_target(df: pl.DataFrame, target_cols: list[str]) -> pl.DataFrame:
    """
    target_colsの列に基礎推定量を足したDataFrameを返す

    Args:
        df (pl.DataFrame): target列を持つDataFrame
        target_cols (list[str]): 追加する基礎推定量の列名
    """
    df = df.with_columns(
        [
            (pl.col(tg_col) + pl.col(bp_col)).alias(tg_col)
            for tg_col, bp_col in zip(target_cols, BASE_PRED_COLS)
        ]
    )

    return df


if BASE_PRED_DIR is not None:
    # columns: "x_0", "y_0", "z_0", ..., "x_5", "y_5", "z_5"
    base_oof_pred_df = pl.read_csv(BASE_OOF_PRED_CSV)
    base_submission_df = pl.read_csv(BASE_SUBMISSION_CSV)

    # 基礎推定値を元のtarget列から引いた値を新たなtarget列とする
    tr_df = reduce_base_pred(tr_df, base_oof_pred_df, has_target=True)
    ts_df = reduce_base_pred(ts_df, base_submission_df, has_target=False)

    del base_oof_pred_df, base_submission_df
    gc.collect()

In [8]:
tr_tl_bbox_images = utils.load_npy_images(
    IMAGES_DIR,
    ids=tr_df.get_column("ID").to_list(),
    image_names=[TRAFFIC_LIGHTS_BBOX_IMAGE_NAME],
)
print(tr_tl_bbox_images.shape)
ts_tl_bbox_images = utils.load_npy_images(
    IMAGES_DIR,
    ids=ts_df.get_column("ID").to_list(),
    image_names=[TRAFFIC_LIGHTS_BBOX_IMAGE_NAME],
)
print(ts_tl_bbox_images.shape)

(43371, 1, 64, 128, 8)
(1727, 1, 64, 128, 8)


In [9]:
tr_depth_images = utils.load_npy_images(
    IMAGES_DIR,
    ids=tr_df.get_column("ID").to_list(),
    image_names=DEPTH_IMAGE_NAMES,
)
print(tr_depth_images.shape)
ts_depth_images = utils.load_npy_images(
    IMAGES_DIR,
    ids=ts_df.get_column("ID").to_list(),
    image_names=DEPTH_IMAGE_NAMES,
)
print(ts_depth_images.shape)

(43371, 3, 64, 128, 1)
(1727, 3, 64, 128, 1)


In [10]:
tr_images = utils.load_images(
    IMAGES_DIR, ids=tr_df.get_column("ID").to_list(), image_names=IMAGE_NAMES
)
print(tr_images.shape)
ts_images = utils.load_images(
    IMAGES_DIR, ids=ts_df.get_column("ID").to_list(), image_names=IMAGE_NAMES
)
print(ts_images.shape)

(43371, 3, 64, 128, 3)
(1727, 3, 64, 128, 3)


In [11]:
tr_images = utils.preprocess_images(
    # [tr_images, tr_tl_bbox_images, tr_optical_flow_images]
    [tr_images, tr_tl_bbox_images, tr_depth_images]
    # [tr_images]
)
ts_images = utils.preprocess_images(
    # [ts_images, ts_tl_bbox_images, ts_optical_flow_images]
    [ts_images, ts_tl_bbox_images, ts_depth_images]
    # [ts_images]
)

print(tr_images.shape)
print(ts_images.shape)

(43371, 20, 64, 128)
(1727, 20, 64, 128)


In [12]:
del tr_tl_bbox_images
gc.collect()
del ts_tl_bbox_images
gc.collect()
del tr_depth_images
gc.collect()
del ts_depth_images
gc.collect()

0

## scene_dsec順に並び替える

In [13]:
tr_df = tr_df.sort(["scene_id", "scene_dsec"])
ts_df = ts_df.sort(["scene_id", "scene_dsec"])

tr_images = tr_images[tr_df.get_column("origin_idx").to_numpy()]
ts_images = ts_images[ts_df.get_column("origin_idx").to_numpy()]

## Target

In [14]:
target = utils.CoordinateTarget(prefix="tg_")
target.fit(tr_df)

tg_df = target.transform(tr_df)
print(tg_df.columns)
print(tg_df.describe().glimpse())
tr_df = pl.concat([tr_df, tg_df], how="horizontal")

del tg_df
gc.collect()

['tg_cood_x_0', 'tg_cood_y_0', 'tg_cood_z_0', 'tg_cood_x_1', 'tg_cood_y_1', 'tg_cood_z_1', 'tg_cood_x_2', 'tg_cood_y_2', 'tg_cood_z_2', 'tg_cood_x_3', 'tg_cood_y_3', 'tg_cood_z_3', 'tg_cood_x_4', 'tg_cood_y_4', 'tg_cood_z_4', 'tg_cood_x_5', 'tg_cood_y_5', 'tg_cood_z_5']
Rows: 9
Columns: 19
$ statistic   <str> 'count', 'null_count', 'mean', 'std', 'min', '25%', '50%', '75%', 'max'
$ tg_cood_x_0 <f64> 43371.0, 0.0, 3.915807866824304e-06, 0.09755740299606562, -2.068491360867153, -0.0399921260776388, 0.0001685152283451563, 0.041051261445355536, 1.205429121318332
$ tg_cood_y_0 <f64> 43371.0, 0.0, 3.3850710814478617e-05, 0.062100404071971656, -2.5727879141483267, -0.021748391426786483, -0.0004883458396699972, 0.021632620544553012, 3.813253350830932
$ tg_cood_z_0 <f64> 43371.0, 0.0, 1.0609752174614824e-05, 0.04015807466563177, -0.9997029502068319, -0.018513304908815843, 0.00038892404923006546, 0.017899114943309744, 1.4469071368673965
$ tg_cood_x_1 <f64> 43371.0, 0.0, -6.5520745548367315e-06, 

0

## 特徴量

In [15]:
feature = utils.Feature(prefix="ft_")
feature.fit(tr_df)

ft_df = feature.transform(tr_df)
print(ft_df.columns)
print(ft_df.describe().glimpse())
tr_df = pl.concat([tr_df, ft_df], how="horizontal")

ft_df = feature.transform(ts_df)
print(ft_df.columns)
print(ft_df.describe().glimpse())
ts_df = pl.concat([ts_df, ft_df], how="horizontal")

del ft_df
gc.collect()

['ft_vEgo', 'ft_aEgo', 'ft_steeringAngleDeg', 'ft_steeringTorque', 'ft_brake', 'ft_brakePressed', 'ft_gas', 'ft_gasPressed', 'ft_is_gearShifter_drive', 'ft_is_gearShifter_neutral', 'ft_is_gearShifter_park', 'ft_is_gearShifter_reverse', 'ft_leftBlinker', 'ft_rightBlinker', 'ft_base_pred_x0', 'ft_base_pred_y0', 'ft_base_pred_z0', 'ft_base_pred_x1', 'ft_base_pred_y1', 'ft_base_pred_z1', 'ft_base_pred_x2', 'ft_base_pred_y2', 'ft_base_pred_z2', 'ft_base_pred_x3', 'ft_base_pred_y3', 'ft_base_pred_z3', 'ft_base_pred_x4', 'ft_base_pred_y4', 'ft_base_pred_z4', 'ft_base_pred_x5', 'ft_base_pred_y5', 'ft_base_pred_z5']
Rows: 9
Columns: 33
$ statistic                 <str> 'count', 'null_count', 'mean', 'std', 'min', '25%', '50%', '75%', 'max'
$ ft_vEgo                   <f64> 43371.0, 0.0, 9.172175823216334, 7.226919878374694, -0.1619189828634262, 2.5786657333374023, 8.518790245056152, 14.286815643310547, 27.55126190185547
$ ft_aEgo                   <f64> 43371.0, 0.0, -0.015654028629347255, 0.63

0

## モデリング

In [16]:
N_SPLITS = 2

In [17]:
n_sample_in_scene = 3

model_params = {
    "dnn": {
        "n_sample_in_scene": n_sample_in_scene,
        "n_img_channels": tr_images.shape[1] * n_sample_in_scene,
        "n_features": len(feature.columns) * n_sample_in_scene,
        "n_targets": len(target.columns),
        "dropout": 0.0,
        "embed_dim": 128,
        "n_layers": 1,
    },
    "dnn_pretrained_model": {
        # list[str]: len(list) == n_splits
        "weight_path": None,
        "load_only_backbone": None,
    },
    "dev": "cuda",
}

lr = 5e-5
fit_params = {
    "dnn": {
        "tr_batch_size": 32,
        "vl_batch_size": 256,
        "trainer_params": {
            "criterion_params": {},
            "opt": "adamw",
            "opt_params": {"lr": lr, "weight_decay": 1e-4},
            "backbone_opt_params": {"lr": lr, "weight_decay": 1e-4},
            "sch_params": {
                "max_lr": lr,
                "pct_start": 0.1,
                "div_factor": 25,
                "final_div_factor": 1000,
            },
            "epochs": 5,
            "dev": "cuda",
            "val_freq": 1,
            "prefix": "",
            "save_best": False,
            "save_epochs": [],
            "maximize_score": False,
            "grad_max_norm": None,
        },
    },
}

In [18]:
models, oof_preds = utils.train(
    model_params=model_params,
    fit_params=fit_params,
    df=tr_df,
    images=tr_images,
    target_cols=target.columns,
    feature_cols=feature.columns,
    group_col="scene_id",
    scene_id_col="scene_id",
    scene_dsec_col="scene_dsec",
    n_splits=N_SPLITS,
)

-----------------
-----------------
Training fold 0...
train samples: 21685, valid samples: 21686
Save model : fold0_model.pth

epoch  0
lr  2.000000000000001e-06
lr  2.000000000000001e-06
lr  2.000000000000001e-06
lr  2.000000000000001e-06


100%|██████████| 677/677 [01:24<00:00,  8.00it/s]
100%|██████████| 85/85 [01:12<00:00,  1.17it/s]



Train Loss: 6.0408
{'loss': 6.0408154527052815, 'loss_mse_0': 0.11026944824256432, 'loss_mse_1': 0.06299239109673344, 'loss_mse_2': 0.04329439945673044, 'loss_mse_3': 0.21309417006910464, 'loss_mse_4': 0.12072014838644197, 'loss_mse_5': 0.08894799489038205, 'loss_mse_6': 0.3598104804270024, 'loss_mse_7': 0.1973779031035643, 'loss_mse_8': 0.1358409515379803, 'loss_mse_9': 0.5551288488762594, 'loss_mse_10': 0.3206690606527702, 'loss_mse_11': 0.18705850380144057, 'loss_mse_12': 0.7837526119001859, 'loss_mse_13': 0.5179700219956952, 'loss_mse_14': 0.22984473698358415, 'loss_mse_15': 1.095367812093463, 'loss_mse_16': 0.7383098087027294, 'loss_mse_17': 0.2803661440178598}
Valid Loss: 5.8587
{'loss': 5.858684985777911, 'loss_mse_0': 0.10680826201158411, 'loss_mse_1': 0.07483275206211736, 'loss_mse_2': 0.03930815278607256, 'loss_mse_3': 0.21842070493627996, 'loss_mse_4': 0.1391427036593942, 'loss_mse_5': 0.07985027099356931, 'loss_mse_6': 0.34741809683687547, 'loss_mse_7': 0.21052618543891347

100%|██████████| 677/677 [01:24<00:00,  8.00it/s]
100%|██████████| 85/85 [01:12<00:00,  1.18it/s]



Train Loss: 5.6046
{'loss': 5.604601525133434, 'loss_mse_0': 0.09621704124090326, 'loss_mse_1': 0.05916356146087983, 'loss_mse_2': 0.0412228793350146, 'loss_mse_3': 0.19510416557222737, 'loss_mse_4': 0.1138422235466837, 'loss_mse_5': 0.08363924459437323, 'loss_mse_6': 0.32535113813561917, 'loss_mse_7': 0.1882648933871458, 'loss_mse_8': 0.12687786521068176, 'loss_mse_9': 0.508168265905831, 'loss_mse_10': 0.30410149197150543, 'loss_mse_11': 0.17076548263760755, 'loss_mse_12': 0.7394063123880144, 'loss_mse_13': 0.4691178172694703, 'loss_mse_14': 0.21622693874877347, 'loss_mse_15': 1.016438611170918, 'loss_mse_16': 0.6881783557299145, 'loss_mse_17': 0.26251524447998587}
Valid Loss: 5.7179
{'loss': 5.717880184510175, 'loss_mse_0': 0.10539588340941597, 'loss_mse_1': 0.07369164009304607, 'loss_mse_2': 0.03864932189531186, 'loss_mse_3': 0.2149671378381112, 'loss_mse_4': 0.13585203846587854, 'loss_mse_5': 0.07836172195041881, 'loss_mse_6': 0.3410232323057511, 'loss_mse_7': 0.20870520016726327,

100%|██████████| 677/677 [01:24<00:00,  8.01it/s]
100%|██████████| 85/85 [01:12<00:00,  1.18it/s]



Train Loss: 5.5371
{'loss': 5.537118649553161, 'loss_mse_0': 0.09547942734847568, 'loss_mse_1': 0.05869590868056371, 'loss_mse_2': 0.040977524153260926, 'loss_mse_3': 0.19300675024023964, 'loss_mse_4': 0.11274056097539348, 'loss_mse_5': 0.08315849842632979, 'loss_mse_6': 0.3220850757297068, 'loss_mse_7': 0.18639324380957042, 'loss_mse_8': 0.12589369808905587, 'loss_mse_9': 0.5028383977920301, 'loss_mse_10': 0.29996822129500317, 'loss_mse_11': 0.16911525243791467, 'loss_mse_12': 0.7304493068900383, 'loss_mse_13': 0.4634890445292084, 'loss_mse_14': 0.21406480419054694, 'loss_mse_15': 1.0006712892118903, 'loss_mse_16': 0.678286867587658, 'loss_mse_17': 0.25980477501669874}
Valid Loss: 5.7410
{'loss': 5.740952256146599, 'loss_mse_0': 0.10578542759313303, 'loss_mse_1': 0.07454472094335977, 'loss_mse_2': 0.039033823942436895, 'loss_mse_3': 0.21508658230304717, 'loss_mse_4': 0.1376013820662218, 'loss_mse_5': 0.07850356908405529, 'loss_mse_6': 0.34089502867530375, 'loss_mse_7': 0.209756163989

100%|██████████| 677/677 [01:24<00:00,  7.99it/s]
100%|██████████| 85/85 [01:12<00:00,  1.18it/s]



Train Loss: 5.4712
{'loss': 5.47120660059209, 'loss_mse_0': 0.09508066142371345, 'loss_mse_1': 0.05845728248599889, 'loss_mse_2': 0.040756044701633194, 'loss_mse_3': 0.19180207229336307, 'loss_mse_4': 0.11194702205103732, 'loss_mse_5': 0.08272882069198297, 'loss_mse_6': 0.31905571364713736, 'loss_mse_7': 0.18483265648916516, 'loss_mse_8': 0.1250353715598495, 'loss_mse_9': 0.49762743249249425, 'loss_mse_10': 0.2972305185839732, 'loss_mse_11': 0.1677268318160598, 'loss_mse_12': 0.7193419610864127, 'loss_mse_13': 0.4577453149540386, 'loss_mse_14': 0.21226617247086332, 'loss_mse_15': 0.9829567796722649, 'loss_mse_16': 0.6695976689061703, 'loss_mse_17': 0.25701827365503355}
Valid Loss: 5.7363
{'loss': 5.736336918438182, 'loss_mse_0': 0.10575173212324872, 'loss_mse_1': 0.07393329820650466, 'loss_mse_2': 0.03887528904220637, 'loss_mse_3': 0.215255583998035, 'loss_mse_4': 0.1369646170998321, 'loss_mse_5': 0.0785412328208194, 'loss_mse_6': 0.3416637986898422, 'loss_mse_7': 0.20983420557835522,

100%|██████████| 677/677 [01:24<00:00,  7.99it/s]
100%|██████████| 85/85 [01:12<00:00,  1.18it/s]
  model.load_state_dict(torch.load(model_path))



Train Loss: 5.4239
{'loss': 5.423857583781289, 'loss_mse_0': 0.0946954113666513, 'loss_mse_1': 0.05832251783685987, 'loss_mse_2': 0.040684430181782034, 'loss_mse_3': 0.19062296673105245, 'loss_mse_4': 0.11144302443682355, 'loss_mse_5': 0.08240266544317917, 'loss_mse_6': 0.3166628204495199, 'loss_mse_7': 0.18385886563266046, 'loss_mse_8': 0.12432132631353209, 'loss_mse_9': 0.4936256625825757, 'loss_mse_10': 0.29515266273737484, 'loss_mse_11': 0.16675469054781353, 'loss_mse_12': 0.7117784586755704, 'loss_mse_13': 0.453892378711542, 'loss_mse_14': 0.21083386334834106, 'loss_mse_15': 0.9706435619772273, 'loss_mse_16': 0.6629542993213788, 'loss_mse_17': 0.2552079806211604}
Valid Loss: 5.7332
{'loss': 5.7332484357497275, 'loss_mse_0': 0.10574196213308502, 'loss_mse_1': 0.07372293246581274, 'loss_mse_2': 0.03872184234068674, 'loss_mse_3': 0.2151086260290707, 'loss_mse_4': 0.13630204454940908, 'loss_mse_5': 0.07815888576647814, 'loss_mse_6': 0.3418752601918052, 'loss_mse_7': 0.209343363169361

KeyboardInterrupt: 

In [None]:
oof_preds = oof_preds.select(pl.all().name.prefix("pred_"))
pred_cols = oof_preds.columns

tr_df = pl.concat([tr_df, oof_preds], how="horizontal")
tr_df

## 評価

In [None]:
def calc_score(df: pl.DataFrame, pred_cols: list[str]):
    tg_cols = sum([[f"x_{i}", f"y_{i}", f"z_{i}"] for i in range(6)], [])

    tg = df.select(tg_cols).to_numpy()
    pred = df.select(pred_cols).to_numpy()

    scores = np.abs(tg - pred).mean(axis=0)
    scores = {f"score_{col}": float(score) for col, score in zip(pred_cols, scores)}
    scores["avg"] = float(np.abs(tg - pred).mean())
    return scores


scores = calc_score(tr_df, pred_cols)
scores

In [None]:
utils.plot_calibration_curve(tr_df, pred_cols, n_bins=40)

In [None]:
if BASE_PRED_DIR is not None:
    # 差し引いていた基礎推定値を足して元のtarget, pred列に戻す
    tr_df = add_base_pred_to_target(tr_df, TARGET_COLS)
    tr_df = add_base_pred_to_target(tr_df, pred_cols)

In [None]:
if BASE_PRED_DIR is not None:
    scores = calc_score(tr_df, pred_cols)
    display(scores)

In [None]:
if BASE_PRED_DIR is not None:
    utils.plot_calibration_curve(tr_df, pred_cols, n_bins=40)

## oofを保存

In [None]:
def create_submission_csv(preds: pl.DataFrame, filename: str = "submission.csv"):
    submission_cols = TARGET_COLS

    # validate preds columns
    if len(preds.columns) != len(submission_cols):
        raise ValueError(
            f"preds columns must be {len(submission_cols)}, but got {len(preds.columns)}"
        )

    preds.columns = submission_cols
    preds.write_csv(filename)
    print(f"Submission file is created: {filename}")


# 元の順番に戻して保存
create_submission_csv(tr_df.sort("origin_idx").select(pred_cols), "oof_preds.csv")

## Submission

In [None]:
preds = utils.predict(
    models,
    ts_images,
    ts_df,
    feature.columns,
    scene_id_col="scene_id",
    scene_dsec_col="scene_dsec",
    pred_cols=pred_cols,
)
pred_cols = preds.columns
ts_df = pl.concat([ts_df, preds], how="horizontal")

preds

In [None]:
if BASE_PRED_DIR is not None:
    # 差し引いていた基礎推定値を足して元のtarget, pred列に戻す
    ts_df = add_base_pred_to_target(ts_df, pred_cols)
    display(ts_df)

In [None]:
# 元の順番に戻す
ts_df = ts_df.sort("origin_idx")

In [None]:
create_submission_csv(ts_df.select(pred_cols), "submission.csv")