In [1]:
!rm -r /kaggle/working/*
%cd /kaggle/working

/kaggle/working


In [2]:
import os
import sys

PACKAGE_DIR = "/kaggle/src"
sys.path.append(PACKAGE_DIR)
sys.path.append(os.path.join(PACKAGE_DIR, "Penguin-ML-Library"))

In [3]:
import yaml
from penguinml.utils.logger import get_logger, init_logger
from penguinml.utils.set_seed import seed_base_torch

MODEL_NAME = "deberta"
CFG = yaml.safe_load(open(os.path.join(PACKAGE_DIR, "config.yaml"), "r"))
print(CFG[MODEL_NAME]["execution"]["exp_id"])
CFG["output_dir"] = f"/kaggle/output/{CFG[MODEL_NAME]['execution']['exp_id']}"
!rm -r {CFG["output_dir"]}
os.makedirs(CFG["output_dir"], exist_ok=True)

init_logger(f"{ CFG[MODEL_NAME]['execution']['exp_id']}.log")
logger = get_logger("main")
seed_base_torch(CFG[MODEL_NAME]["execution"]["seed"])

2024-11-21 10:10:29.057830: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-11-21 10:10:29.091398: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


exp_018


  pid, fd = os.forkpty()
set seed: 46


In [4]:
import warnings

import numpy as np
import polars as pl
from tqdm import tqdm

warnings.filterwarnings("ignore")

In [5]:
train = pl.read_csv(os.path.join(CFG["dataset"]["competition_dir"], "train_features.csv"))
train = (
    train.with_columns(
        pl.col("ID").str.split_exact("_", n=1).struct.rename_fields(["sceneID", "offset"]).alias("fields")
    )
    .unnest("fields")
    .with_columns(
        pl.col("offset").cast(pl.Float32),
        pl.lit(False).alias("submit"),
    )
)
test = pl.read_csv(os.path.join(CFG["dataset"]["competition_dir"], "test_features.csv"))
test = (
    test.with_columns(
        pl.col("ID").str.split_exact("_", n=1).struct.rename_fields(["sceneID", "offset"]).alias("fields")
    )
    .unnest("fields")
    .with_columns(
        pl.col("offset").cast(pl.Float32),
        pl.lit(True).alias("submit"),
    )
)
train = pl.concat([train, test], how="diagonal")
print(train.shape)
train.head(1)

(45098, 33)


ID,vEgo,aEgo,steeringAngleDeg,steeringTorque,brake,brakePressed,gas,gasPressed,gearShifter,leftBlinker,rightBlinker,x_0,y_0,z_0,x_1,y_1,z_1,x_2,y_2,z_2,x_3,y_3,z_3,x_4,y_4,z_4,x_5,y_5,z_5,sceneID,offset,submit
str,f64,f64,f64,f64,f64,bool,f64,bool,str,bool,bool,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,str,f32,bool
"""00066be8e20318869c38c66be46663…",5.701526,1.538456,-2.165777,-139.0,0.0,False,0.25,True,"""drive""",False,False,2.82959,0.032226,0.045187,6.231999,0.065895,0.107974,9.785009,0.124972,0.203649,13.485472,0.163448,0.302818,17.574227,0.174289,0.406331,21.951269,0.199503,0.485079,"""00066be8e20318869c38c66be46663…",320.0,False


In [6]:
# normalize
train = train.with_columns(
    pl.col("vEgo") / 30,
    pl.col("aEgo"),
    pl.col("steeringAngleDeg") / 400,
    pl.col("steeringTorque") / 600,
    pl.col("offset") / pl.col("offset").max(),
)

In [7]:
from penguinml.utils.contena import FeatureContena

from feature_engineering import add_basic_features, add_scene_lag_features

features = FeatureContena()
train, features = add_basic_features(train, features)
train, features = add_scene_lag_features(train, features)
print(len(features.num_features()))
train[features.num_features()].describe()

55


statistic,aEgo,aEgo_diff_-1,aEgo_diff_1,aEgo_shift_-1,aEgo_shift_1,brake,brakePressed,brakePressed_diff_-1,brakePressed_diff_1,brakePressed_shift_-1,brakePressed_shift_1,brake_diff_-1,brake_diff_1,brake_shift_-1,brake_shift_1,gas,gasPressed,gasPressed_diff_-1,gasPressed_diff_1,gasPressed_shift_-1,gasPressed_shift_1,gas_diff_-1,gas_diff_1,gas_shift_-1,gas_shift_1,leftBlinker,leftBlinker_diff_-1,leftBlinker_diff_1,leftBlinker_shift_-1,leftBlinker_shift_1,offset,offset_diff_-1,offset_diff_1,offset_shift_-1,offset_shift_1,rightBlinker,rightBlinker_diff_-1,rightBlinker_diff_1,rightBlinker_shift_-1,rightBlinker_shift_1,steeringAngleDeg,steeringAngleDeg_diff_-1,steeringAngleDeg_diff_1,steeringAngleDeg_shift_-1,steeringAngleDeg_shift_1,steeringTorque,steeringTorque_diff_-1,steeringTorque_diff_1,steeringTorque_shift_-1,steeringTorque_shift_1,vEgo,vEgo_diff_-1,vEgo_diff_1,vEgo_shift_-1,vEgo_shift_1
str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
"""count""",45098.0,35577.0,35577.0,35577.0,35577.0,45098.0,45098.0,35577.0,35577.0,35577.0,35577.0,35577.0,35577.0,35577.0,35577.0,45098.0,45098.0,35577.0,35577.0,35577.0,35577.0,35577.0,35577.0,35577.0,35577.0,45098.0,35577.0,35577.0,35577.0,35577.0,45098.0,35577.0,35577.0,35577.0,35577.0,45098.0,35577.0,35577.0,35577.0,35577.0,45098.0,35577.0,35577.0,35577.0,35577.0,45098.0,35577.0,35577.0,35577.0,35577.0,45098.0,35577.0,35577.0,35577.0,35577.0
"""null_count""",0.0,9521.0,9521.0,9521.0,9521.0,0.0,0.0,9521.0,9521.0,9521.0,9521.0,9521.0,9521.0,9521.0,9521.0,0.0,0.0,9521.0,9521.0,9521.0,9521.0,9521.0,9521.0,9521.0,9521.0,0.0,9521.0,9521.0,9521.0,9521.0,0.0,9521.0,9521.0,9521.0,9521.0,0.0,9521.0,9521.0,9521.0,9521.0,0.0,9521.0,9521.0,9521.0,9521.0,0.0,9521.0,9521.0,9521.0,9521.0,0.0,9521.0,9521.0,9521.0,9521.0
"""mean""",-0.016168,-0.005095,0.005095,-0.005095,0.005095,0.0,0.307131,0.001883,-0.001883,0.001883,-0.001883,0.0,0.0,0.0,0.0,0.083971,0.483791,-0.001939,0.001939,-0.001939,0.001939,-0.000771,0.000771,-0.000771,0.000771,0.077764,-0.005228,0.005228,-0.005228,0.005228,0.515167,-0.206897,0.206897,-0.206897,0.206897,0.102067,-0.001068,0.001068,-0.001068,0.001068,-0.005274,2.5e-05,-2.5e-05,2.5e-05,-2.5e-05,-0.031763,-0.000984,0.000984,-0.000984,0.000984,0.306156,0.001376,-0.001376,0.001376,-0.001376
"""std""",0.631639,0.860365,0.860365,0.860365,0.860365,0.0,0.461309,0.487097,0.487097,0.487097,0.487097,0.0,0.0,0.0,0.0,0.109826,0.499743,0.564206,0.564206,0.564206,0.564206,0.126945,0.126945,0.126945,0.126945,0.267803,0.255094,0.255094,0.255094,0.255094,0.32926,0.075381,0.075381,0.075381,0.075381,0.302739,0.238165,0.238165,0.238165,0.238165,0.163457,0.186424,0.186424,0.186424,0.186424,0.184867,0.249753,0.249753,0.249753,0.249753,0.240915,0.086836,0.086836,0.086836,0.086836
"""min""",-4.936206,-5.84588,-5.796605,-5.84588,-5.796605,0.0,0.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-0.8,-0.66,-0.8,-0.66,0.0,-1.0,-1.0,-1.0,-1.0,0.038462,-0.961538,0.192308,-0.961538,0.192308,0.0,-1.0,-1.0,-1.0,-1.0,-1.203486,-1.962355,-2.337468,-1.962355,-2.337468,-1.166667,-1.481667,-1.371667,-1.481667,-1.371667,-0.005569,-0.604864,-0.57511,-0.604864,-0.57511
"""25%""",-0.237048,-0.323888,-0.368738,-0.323888,-0.368738,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.04,-0.05,-0.04,-0.05,0.0,0.0,0.0,0.0,0.0,0.230769,-0.192308,0.192308,-0.192308,0.192308,0.0,0.0,0.0,0.0,0.0,-0.008665,-0.008017,-0.007615,-0.008017,-0.007615,-0.156667,-0.146667,-0.143333,-0.146667,-0.143333,0.086478,-0.033128,-0.031659,-0.033128,-0.031659
"""50%""",-2.105e-15,-8.0168e-37,8.0168e-37,-8.0168e-37,8.0168e-37,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.423077,-0.192308,0.192308,-0.192308,0.192308,0.0,0.0,0.0,0.0,0.0,-0.0009,0.0,0.0,0.0,0.0,-0.021667,0.0,0.0,0.0,0.0,0.284483,-1.5867e-13,1.5867e-13,-1.5867e-13,1.5867e-13
"""75%""",0.222053,0.368738,0.323888,0.368738,0.323888,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.165,1.0,0.0,0.0,0.0,0.0,0.05,0.04,0.05,0.04,0.0,0.0,0.0,0.0,0.0,0.807692,-0.192308,0.192308,-0.192308,0.192308,0.0,0.0,0.0,0.0,0.0,0.00653,0.007615,0.008017,0.007615,0.008017,0.085,0.143333,0.146667,0.143333,0.146667,0.47714,0.031659,0.033128,0.031659,0.033128
"""max""",3.14007,5.796605,5.84588,5.796605,5.84588,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.915,1.0,1.0,1.0,1.0,1.0,0.66,0.8,0.66,0.8,1.0,1.0,1.0,1.0,1.0,1.0,-0.192308,0.961538,-0.192308,0.961538,1.0,1.0,1.0,1.0,1.0,1.211729,2.337468,1.962355,2.337468,1.962355,1.166667,1.371667,1.481667,1.371667,1.481667,0.918375,0.57511,0.604864,0.57511,0.604864


In [8]:
# fillna
for col in features.num_features():
    train = train.with_columns(pl.col(col).fill_null(-1))

In [9]:
# vlm
sentence_df = pl.read_csv("/kaggle/input/vlm_sentence/future_position_descriptions.csv")
train = train.join(sentence_df, on="ID", how="left")
sentence_df.head(1)

ID,Description
str,str
"""00066be8e20318869c38c66be46663…","""The center and right portions …"


In [10]:
import pytorch_lightning as lightning
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint
from torch.autograd import Variable
from torch.nn import functional as F
from torch.optim import lr_scheduler
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torchmetrics import (
    AUROC,
    Accuracy,
    F1Score,
    MeanAbsoluteError,
    MeanSquaredError,
    MetricCollection,
    Precision,
    Recall,
)
from torchvision import datasets, models, transforms

In [11]:
from transformers import AutoModel, AutoTokenizer

from const import TARGET_COLS

os.environ["TOKENIZERS_PARALLELISM"] = "false"


BERT_MODEL_NAME = "roberta-base"

tokenizer = AutoTokenizer.from_pretrained(BERT_MODEL_NAME)


class MyDataset(torch.utils.data.Dataset):
    def __init__(self, df: pl.DataFrame, tokenizer):
        self.X_token = tokenizer(
            df["Description"].to_list(),
            padding=True,
            truncation=True,
            return_tensors="pt",
        )
        self.meta_features = df[features.num_features()].to_numpy().astype(np.float32)
        self.y = df[TARGET_COLS].to_numpy().astype(np.float32)

    def __len__(self):
        return self.y.shape[0]

    def __getitem__(self, index):
        return {
            "token_ids": torch.LongTensor(self.X_token["input_ids"][index]),
            "meta": torch.FloatTensor(self.meta_features[index]),
            "token_mask": torch.LongTensor(self.X_token["attention_mask"][index]),
            "target": self.y[index],
        }


class LightningDataModule(lightning.LightningDataModule):
    def __init__(self, train_dataset=None, valid_dataset=None, batch_size=32):
        super().__init__()
        self.train_dataset = train_dataset
        self.valid_dataset = valid_dataset
        self.batch_size = batch_size

    def train_dataloader(self):
        return torch.utils.data.DataLoader(
            self.train_dataset,
            batch_size=self.batch_size,
            shuffle=True,
            pin_memory=True,
            worker_init_fn=self.worker_init_fn,
            num_workers=2,
        )

    def val_dataloader(self):
        return torch.utils.data.DataLoader(
            self.valid_dataset,
            batch_size=self.batch_size,
            shuffle=False,
            pin_memory=True,
            worker_init_fn=self.worker_init_fn,
            num_workers=2,
        )

    def worker_init_fn(self, worker_id):
        # dataloaderでnum_workers>1の時の乱数設定
        # これを指定しないと各workerのrandom_stateが同じになり、データも同じになる。
        np.random.seed(np.random.get_state()[1][0] + worker_id)


MyDataset(train.head(10), tokenizer)[0]

{'token_ids': tensor([    0,   133,  1312,     8,   235, 14566,     9,     5,  2274,  6364,
             5,  2621,     9,   921, 35480,     6,   217,  7993,     6,    10,
          9128,     6,     8,    10, 11038,  1656,  1970,    15,     5,   314,
           526,     4,    20,  1881,     9,     5,  1703,   797,  4666,    16,
           699,     6, 31843,     6,  5542,     5,  1155,    16,    15,    41,
          2171,  2014,     4,   374,     5,   235,   526,     6,     5,   921,
          2092,     7,    28,  2342,  7933,     6,  3544, 22206,     9,    10,
          2353,   906,     6,  3228,    12, 21765,   921,    19,    97,  1734,
          1455,     4, 50118, 50118, 20930,    15,     5,   699, 35480,     8,
             5,  1703,  8724,     6,     5,  1155,    18,   595,  2698,    16,
          1706,     5,  9128,    15,     5,   314,   526,     4,   318,     5,
          1155,  1388,    31,     5,   595,  6625,     6,    24,    74,   533,
          1004,     7,     5,   235,   

In [12]:
import gc


class fc_bn_relu(nn.Module):
    def __init__(self, d_in, d_out):
        super(fc_bn_relu, self).__init__()
        self.fc = nn.Linear(d_in, d_out)
        self.bn = nn.BatchNorm1d(d_out)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.fc(x)
        x = self.bn(x)
        x = self.relu(x)
        return x


class LightningModel(lightning.LightningModule):
    def __init__(
        self,
        loss_fn,
        lr=0.001,
        weight_decay=0,
    ):
        super().__init__()

        self.sentence_encoder = AutoModel.from_pretrained(BERT_MODEL_NAME)
        self.bert_fc = torch.nn.Linear(self.sentence_encoder.config.hidden_size, len(TARGET_COLS))

        self.loss_fn = loss_fn
        self.lr = lr
        self.weight_decay = weight_decay

        self.train_metrics = MetricCollection([MeanAbsoluteError()], prefix="")
        self.valid_metrics = MetricCollection([MeanAbsoluteError()], prefix="val_")

        self.val_step_outputs = []
        self.val_step_labels = []

    def forward(self, x_token_ids, x_mask):
        x = self.sentence_encoder(x_token_ids, attention_mask=x_mask)[1]
        x = self.bert_fc(x)
        # x = x.squeeze(1)
        return x

    def training_step(self, batch, batch_idx):
        x_token_ids = batch["token_ids"]
        x_mask = batch["token_mask"]
        y = batch["target"]
        preds = self.forward(x_token_ids, x_mask)
        loss = self.loss_fn(preds, y)

        # self.train_metrics(preds, y.to(int))
        self.log(
            "loss",
            loss,
            prog_bar=True,
            logger=True,
            on_epoch=True,
            on_step=True,
        )
        self.log_dict(
            self.train_metrics,
            prog_bar=True,
            logger=True,
            on_epoch=True,
            on_step=True,
        )
        return {"loss": loss}

    def validation_step(self, batch, batch_idx):
        x_token_ids = batch["token_ids"]
        x_mask = batch["token_mask"]
        y = batch["target"]
        preds = self.forward(x_token_ids, x_mask)

        self.val_step_outputs.append(preds)
        self.val_step_labels.append(y)

    def on_validation_epoch_end(self):
        preds = torch.cat(self.val_step_outputs)
        labels = torch.cat(self.val_step_labels)
        self.val_step_outputs.clear()
        self.val_step_labels.clear()
        gc.collect()
        loss = self.loss_fn(preds, labels)

        print(f"[epoch {self.trainer.current_epoch}]")

        self.log(
            "val_loss",
            loss,
            prog_bar=False,
            logger=True,
            on_epoch=True,
            on_step=False,
        )
        self.log_dict(
            self.valid_metrics,
            prog_bar=True,
            logger=True,
            on_epoch=True,
            on_step=False,
        )

        # ログをprint
        self.print_metric(preds, labels, "valid")

    def configure_optimizers(self):
        optimizer = optim.Adam(self.parameters(), lr=self.lr, weight_decay=self.weight_decay)
        scheduler = ReduceLROnPlateau(optimizer, factor=0.1, patience=2, verbose=True)
        # scheduler = LinearWarmupCosineAnnealingLR(optimizer, eta_min=1e-8, max_epochs=1e10,
        #                                           warmup_epochs=10, warmup_start_lr=1e-8)
        return {"optimizer": optimizer, "lr_scheduler": scheduler, "monitor": "val_loss"}

    def print_metric(self, y_hat, y, train_or_valid="train"):
        """
        ログをprintする。次のepochが終わると上書きされてしまうので。
        TODO: たぶんもっとマシな方法があるので探す。
        """
        if train_or_valid == "train":
            metrics = self.train_metrics
        else:
            metrics = self.valid_metrics
        loss = self.loss_fn(y_hat, y)

        print(f"[epoch {self.trainer.current_epoch}] {train_or_valid}: ", end="")
        print(f"{type(self.loss_fn).__name__}={loss:.4f}", end=", ")
        for name, metric in metrics.items():
            v = metric(y_hat, y.to(int))
            print(f"{name}={v:.4f}", end=", ")
        print()

## CV Split


In [13]:
train_folds = pl.read_csv(CFG["dataset"]["train_fold_path"])
train = train.join(train_folds, on="sceneID", how="left")
# assert train["fold"].null_count() == 0

## Training


In [14]:
import torch
import torch.nn as nn
from penguinml.nn.utils.dataloader import get_dataloaders
from penguinml.nn.utils.trainer import Trainer
from transformers import get_cosine_schedule_with_warmup

In [15]:
!rm -r lightning_logs
!rm -r logs

rm: cannot remove 'lightning_logs': No such file or directory
rm: cannot remove 'logs': No such file or directory


In [16]:
# naive
scores = []
for c in TARGET_COLS:
    mae = (train[c] - train[c].median()).abs().mean()
    scores.append(mae)
np.mean(scores)

3.6760581381351227

In [None]:
oof_dfs = []
sub_preds = []
for fold in range(5):
    train_df = train.filter(pl.col("fold") != fold).filter(~pl.col("submit"))
    valid_df = train.filter(pl.col("fold") == fold).filter(~pl.col("submit"))
    test_df = train.filter(pl.col("submit")).sort("ID")

    train_dataset = MyDataset(train_df, tokenizer)
    valid_dataset = MyDataset(valid_df, tokenizer)
    test_dataset = MyDataset(test_df, tokenizer)

    bs = 24
    data_module = LightningDataModule(train_dataset, valid_dataset, batch_size=bs)
    model = LightningModel(lr=0.00001, weight_decay=0, loss_fn=nn.L1Loss())

    # コールバック
    cp_callback = ModelCheckpoint(
        "logs/",
        filename="best_model",
        monitor="val_MeanAbsoluteError",
        mode="min",
        save_top_k=1,
        save_last=False,
    )
    es_callback = EarlyStopping(
        monitor="val_MeanAbsoluteError",
        mode="min",
        patience=3,
    )

    # 学習
    trainer = lightning.Trainer(
        callbacks=[cp_callback, es_callback],
        max_epochs=5,
        num_sanity_val_steps=0,
    )
    trainer.fit(model, datamodule=data_module)

    # 推論
    model = LightningModel.load_from_checkpoint("logs/best_model.ckpt", loss_fn=nn.L1Loss()).to("cuda")
    pred = []
    for i in tqdm(range(0, len(valid_dataset), bs)):
        pred.append(
            nn.Sigmoid()(
                model(
                    torch.stack([valid_dataset[i]["token_ids"] for i in range(i, min(i + bs, len(valid_dataset)))]).to(
                        "cuda"
                    ),
                    torch.stack(
                        [valid_dataset[i]["token_mask"] for i in range(i, min(i + bs, len(valid_dataset)))]
                    ).to("cuda"),
                ).detach()
            )
            .cpu()
            .numpy()
        )
    oof = np.concatenate(pred)
    valid_df = valid_df.with_columns(*[pl.Series(TARGET_COLS[i], oof[:, i]) for i in range(len(TARGET_COLS))])
    oof_dfs.append(valid_df.select(["ID"] + TARGET_COLS))

    # test
    pred = []
    for i in tqdm(range(0, len(test_dataset), bs)):
        pred.append(
            nn.Sigmoid()(
                model(
                    torch.stack([test_dataset[i]["token_ids"] for i in range(i, min(i + bs, len(test_dataset)))]).to(
                        "cuda"
                    ),
                    torch.stack([test_dataset[i]["token_mask"] for i in range(i, min(i + bs, len(test_dataset)))]).to(
                        "cuda"
                    ),
                ).detach()
            )
            .cpu()
            .numpy()
        )
    sub = np.concatenate(pred)
    sub_preds.append(sub)

    del model, trainer
    gc.collect()
    torch.cuda.empty_cache()

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[epoch 0]
[epoch 0] valid: L1Loss=3.5935, val_MeanAbsoluteError=3.4535, 


Validation: |          | 0/? [00:00<?, ?it/s]

[epoch 1]
[epoch 1] valid: L1Loss=3.2286, val_MeanAbsoluteError=3.1046, 


Validation: |          | 0/? [00:00<?, ?it/s]

[epoch 2]
[epoch 2] valid: L1Loss=3.0849, val_MeanAbsoluteError=2.9595, 


Validation: |          | 0/? [00:00<?, ?it/s]

[epoch 3]
[epoch 3] valid: L1Loss=3.1180, val_MeanAbsoluteError=2.9649, 


Validation: |          | 0/? [00:00<?, ?it/s]

[epoch 4]
[epoch 4] valid: L1Loss=3.0404, val_MeanAbsoluteError=2.9120, 


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 362/362 [00:35<00:00, 10.12it/s]
100%|██████████| 72/72 [00:07<00:00, 10.18it/s]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[epoch 0]
[epoch 0] valid: L1Loss=3.5827, val_MeanAbsoluteError=3.4509, 


Validation: |          | 0/? [00:00<?, ?it/s]

[epoch 1]
[epoch 1] valid: L1Loss=3.1997, val_MeanAbsoluteError=3.0691, 


Validation: |          | 0/? [00:00<?, ?it/s]

[epoch 2]
[epoch 2] valid: L1Loss=3.1050, val_MeanAbsoluteError=2.9974, 


Validation: |          | 0/? [00:00<?, ?it/s]

[epoch 3]
[epoch 3] valid: L1Loss=3.0495, val_MeanAbsoluteError=2.9304, 


Validation: |          | 0/? [00:00<?, ?it/s]

[epoch 4]
[epoch 4] valid: L1Loss=3.0415, val_MeanAbsoluteError=2.9273, 


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 362/362 [00:35<00:00, 10.23it/s]
100%|██████████| 72/72 [00:07<00:00, 10.25it/s]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[epoch 0]
[epoch 0] valid: L1Loss=3.6579, val_MeanAbsoluteError=3.5125, 


Validation: |          | 0/? [00:00<?, ?it/s]

[epoch 1]
[epoch 1] valid: L1Loss=3.2648, val_MeanAbsoluteError=3.1247, 


Validation: |          | 0/? [00:00<?, ?it/s]

[epoch 2]
[epoch 2] valid: L1Loss=3.1533, val_MeanAbsoluteError=3.0367, 


Validation: |          | 0/? [00:00<?, ?it/s]

[epoch 3]
[epoch 3] valid: L1Loss=3.0877, val_MeanAbsoluteError=2.9692, 


Validation: |          | 0/? [00:00<?, ?it/s]

[epoch 4]
[epoch 4] valid: L1Loss=3.0957, val_MeanAbsoluteError=2.9563, 


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 362/362 [00:35<00:00, 10.20it/s]
100%|██████████| 72/72 [00:07<00:00, 10.25it/s]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[epoch 0]
[epoch 0] valid: L1Loss=3.7819, val_MeanAbsoluteError=3.6445, 


Validation: |          | 0/? [00:00<?, ?it/s]

[epoch 1]
[epoch 1] valid: L1Loss=3.3697, val_MeanAbsoluteError=3.2310, 


Validation: |          | 0/? [00:00<?, ?it/s]

[epoch 2]
[epoch 2] valid: L1Loss=3.1875, val_MeanAbsoluteError=3.0556, 


Validation: |          | 0/? [00:00<?, ?it/s]

[epoch 3]
[epoch 3] valid: L1Loss=3.0692, val_MeanAbsoluteError=2.9482, 


Validation: |          | 0/? [00:00<?, ?it/s]

[epoch 4]
[epoch 4] valid: L1Loss=3.0743, val_MeanAbsoluteError=2.9610, 


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 362/362 [00:35<00:00, 10.14it/s]
100%|██████████| 72/72 [00:07<00:00, 10.20it/s]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[epoch 0]
[epoch 0] valid: L1Loss=3.5429, val_MeanAbsoluteError=3.4076, 


Validation: |          | 0/? [00:00<?, ?it/s]

[epoch 1]
[epoch 1] valid: L1Loss=3.2079, val_MeanAbsoluteError=3.0895, 


Validation: |          | 0/? [00:00<?, ?it/s]

[epoch 2]
[epoch 2] valid: L1Loss=3.0523, val_MeanAbsoluteError=2.9289, 


Validation: |          | 0/? [00:00<?, ?it/s]

[epoch 3]
[epoch 3] valid: L1Loss=3.0312, val_MeanAbsoluteError=2.8982, 


In [None]:
oof_df = pl.concat(oof_dfs).sort("ID")
train = train.sort("ID")

mae = np.mean(np.abs(oof_df[TARGET_COLS].to_numpy() - train.filter(~pl.col("submit"))[TARGET_COLS].to_numpy()))
print(f"MAE: {mae}")

oof_df.write_csv(os.path.join(CFG["output_dir"], "oof.csv"))
oof_df.head(1)

ValueError: operands could not be broadcast together with shapes (50,18) (43371,18) 

In [None]:
sub_preds = np.stack(sub_preds).mean(axis=0)  # medがいいかも
sub_df = (
    test_df.with_columns(*[pl.Series(TARGET_COLS[i], sub_preds[:, i]) for i in range(len(TARGET_COLS))])
    .sort("ID")
    .select(["ID"] + TARGET_COLS)
)

sub_df.write_csv(os.path.join(CFG["output_dir"], "submission.csv"))
sub_df.head(1)

IndexError: too many indices for array: array is 1-dimensional, but 2 were indexed