In [1]:
!nvidia-smi


Sat Nov 25 12:07:27 2023       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 530.30.02              Driver Version: 530.30.02    CUDA Version: 12.1     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                  Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf            Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GeForce RTX 3090 Ti      On | 00000000:01:00.0 Off |                  Off |
|  0%   32C    P8               18W / 480W|    679MiB / 24564MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [2]:
import sys
sys.path.append('../')

import os
import re
import gc
import pdb
import json
import time
import yaml
import pickle
import shutil
import joblib
import random
import datetime
import requests
import warnings
warnings.filterwarnings('ignore')
from ast import literal_eval
from tqdm.auto import tqdm
from pathlib import Path
from glob import glob

import numpy as np
import pandas as pd
import polars as pl
import scipy
import itertools
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from sklearn.model_selection import (
    StratifiedKFold,
    KFold,
    GroupKFold,
    StratifiedGroupKFold
)
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler, RobustScaler


import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, Subset
from torch.utils.checkpoint import checkpoint
from torch.cuda.amp import autocast, GradScaler
import torch.nn.functional as F
from torch import Tensor

import transformers
from transformers import AutoConfig, AutoModel, AutoTokenizer
from transformers import AdamW, get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup
import tokenizers
import sentencepiece
from transformers import logging

from pandarallel import pandarallel

from kaggle.api.kaggle_api_extended import KaggleApi

from kagglib.nn import EarlyStopping, AverageMeter, MCRMSELoss, MeanPooling, AttentionPooling, GeMPooling, LSTMPooling
from kagglib.utils import get_logger, setup, seed_everything, dataset_create_new
from kagglib.utils.exp_manage import set_wandb
from kagglib.nlp import freeze, get_scheduler, get_optimizer_grouped_parameters, collate
from kagglib.nlp import collate
from kagglib.nn import get_n_params

from src.metrics import event_detection_ap
from src.utils import get_event, get_score
from src.feature_engineering import find_similar_steps
from src.models import (
    DoubleConv,
    Up,
    Down,
    MultiResidualBiGRU,
    WaveNetBlock,
    )



sns.set()
plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = (20, 5)
pandarallel.initialize(progress_bar=True)
logging.set_verbosity_warning()
logging.set_verbosity_error()
%env TOKENIZERS_PARALLELISM=true
%load_ext autoreload
%autoreload 2


INFO: Pandarallel will run on 12 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.
env: TOKENIZERS_PARALLELISM=true


In [3]:
class Config:
    AUTHOR = "shu421"

    EXP = "exp101"
    BASE_PATH = "/home/working/"
    API_PATH = "/root/.kaggle/kaggle.json"
    COMPETITION = "child-mind-institute-detect-sleep-states"

    seed = 42
    apex = False
    wandb = False
    upload = False

    # training
    n_splits = 5
    batch_size = 32
    n_epochs = 30
    es_patience = 10
    num_workers = 4
    target_cols = ["asleep", "onset", "wakeup"]
    n_targets = len(target_cols)
    eval_steps = np.inf

    train_max_length = 17280
    train_stride = 17280
    valid_max_length = 17280
    valid_stride = 17280

    # heatmap label
    sigma = 12
    pos_weight = 100

    # optimizer
    weight_decay = 0.01
    betas = (0.9, 0.999)
    lr = 8e-4
    lr_weight_decay = 0.95
    min_lr = 1e-6
    eps = 1e-6
    clip_grad_norm = 1000
    gradient_accumulation_steps = 1

    # scheduler
    scheduler = "cosine"
    num_cycles = 0.5
    num_warmup_steps_rate = 0

    # metrics
    thr = 0.1
    size = 12 * 10  # 10 min


cfg = setup(Config)


# set log functions
LOGGER = get_logger(Path(cfg.log_path) / f"{cfg.EXP}.log")

if cfg.wandb:
    run = set_wandb(
        cfg, name=cfg.EXP, group=cfg.model_name, config_path="/root/.kaggle/wandb.json"
    )


# Data Load

In [4]:
train_series_df = pl.read_parquet(cfg.INPUT_PATH / "train_series.parquet")
train_events_df = pl.read_csv(cfg.INPUT_PATH / "train_events.csv")
sample_submission = pd.read_csv(cfg.INPUT_PATH / "sample_submission.csv")


In [5]:
train_series_df


series_id,step,timestamp,anglez,enmo
str,u32,str,f32,f32
"""038441c925bb""",0,"""2018-08-14T15:…",2.6367,0.0217
"""038441c925bb""",1,"""2018-08-14T15:…",2.6368,0.0215
"""038441c925bb""",2,"""2018-08-14T15:…",2.637,0.0216
"""038441c925bb""",3,"""2018-08-14T15:…",2.6368,0.0213
"""038441c925bb""",4,"""2018-08-14T15:…",2.6368,0.0215
"""038441c925bb""",5,"""2018-08-14T15:…",2.6367,0.0217
"""038441c925bb""",6,"""2018-08-14T15:…",2.6367,0.0217
"""038441c925bb""",7,"""2018-08-14T15:…",2.6367,0.0218
"""038441c925bb""",8,"""2018-08-14T15:…",2.798,0.0223
"""038441c925bb""",9,"""2018-08-14T15:…",3.0847,0.0217


In [6]:
train_series_df = train_series_df.join(
    train_events_df.select(["series_id", "timestamp", "night"]),
    on=["series_id", "timestamp"],
    how="left",
)


In [7]:
train_series_df


series_id,step,timestamp,anglez,enmo,night
str,u32,str,f32,f32,i64
"""038441c925bb""",0,"""2018-08-14T15:…",2.6367,0.0217,
"""038441c925bb""",1,"""2018-08-14T15:…",2.6368,0.0215,
"""038441c925bb""",2,"""2018-08-14T15:…",2.637,0.0216,
"""038441c925bb""",3,"""2018-08-14T15:…",2.6368,0.0213,
"""038441c925bb""",4,"""2018-08-14T15:…",2.6368,0.0215,
"""038441c925bb""",5,"""2018-08-14T15:…",2.6367,0.0217,
"""038441c925bb""",6,"""2018-08-14T15:…",2.6367,0.0217,
"""038441c925bb""",7,"""2018-08-14T15:…",2.6367,0.0218,
"""038441c925bb""",8,"""2018-08-14T15:…",2.798,0.0223,
"""038441c925bb""",9,"""2018-08-14T15:…",3.0847,0.0217,


In [8]:
train_events_df


series_id,night,event,step,timestamp
str,i64,str,i64,str
"""038441c925bb""",1,"""onset""",4992,"""2018-08-14T22:…"
"""038441c925bb""",1,"""wakeup""",10932,"""2018-08-15T06:…"
"""038441c925bb""",2,"""onset""",20244,"""2018-08-15T19:…"
"""038441c925bb""",2,"""wakeup""",27492,"""2018-08-16T05:…"
"""038441c925bb""",3,"""onset""",39996,"""2018-08-16T23:…"
"""038441c925bb""",3,"""wakeup""",44400,"""2018-08-17T05:…"
"""038441c925bb""",4,"""onset""",57240,"""2018-08-17T23:…"
"""038441c925bb""",4,"""wakeup""",62856,"""2018-08-18T06:…"
"""038441c925bb""",5,"""onset""",,
"""038441c925bb""",5,"""wakeup""",,


In [9]:
train_events_df = train_events_df.drop_nulls()
bad_nights = (
    train_events_df[["series_id", "night", "event"]]
    .group_by(["series_id", "night"], maintain_order=True)
    .count()
)
bad_nights.filter(pl.col("count") != 2)


series_id,night,count
str,i64,u32
"""0ce74d6d2106""",20,1
"""154fe824ed87""",30,1
"""44a41bba1ee7""",10,1
"""efbfc4526d58""",7,1
"""f8a8da8bdd00""",17,1


In [10]:
train_events_df = train_events_df.filter(
    ~(
        (
            (train_events_df["series_id"] == "0ce74d6d2106")
            & (train_events_df["night"] == 20)
        )
        | (
            (train_events_df["series_id"] == "154fe824ed87")
            & (train_events_df["night"] == 30)
        )
        | (
            (train_events_df["series_id"] == "44a41bba1ee7")
            & (train_events_df["night"] == 10)
        )
        | (
            (train_events_df["series_id"] == "efbfc4526d58")
            & (train_events_df["night"] == 7)
        )
        | (
            (train_events_df["series_id"] == "f8a8da8bdd00")
            & (train_events_df["night"] == 17)
        )
    )
)


In [11]:
train_series_df


series_id,step,timestamp,anglez,enmo,night
str,u32,str,f32,f32,i64
"""038441c925bb""",0,"""2018-08-14T15:…",2.6367,0.0217,
"""038441c925bb""",1,"""2018-08-14T15:…",2.6368,0.0215,
"""038441c925bb""",2,"""2018-08-14T15:…",2.637,0.0216,
"""038441c925bb""",3,"""2018-08-14T15:…",2.6368,0.0213,
"""038441c925bb""",4,"""2018-08-14T15:…",2.6368,0.0215,
"""038441c925bb""",5,"""2018-08-14T15:…",2.6367,0.0217,
"""038441c925bb""",6,"""2018-08-14T15:…",2.6367,0.0217,
"""038441c925bb""",7,"""2018-08-14T15:…",2.6367,0.0218,
"""038441c925bb""",8,"""2018-08-14T15:…",2.798,0.0223,
"""038441c925bb""",9,"""2018-08-14T15:…",3.0847,0.0217,


In [12]:
# create asleep column
train_events_df = train_events_df.with_columns(
    pl.when(pl.col("event") == "onset").then(0).otherwise(1).alias("asleep"),
    pl.when(pl.col("event") == "onset").then(1).otherwise(0).alias("onset"),
    pl.when(pl.col("event") == "wakeup").then(1).otherwise(0).alias("wakeup"),
)
train_series_df = train_series_df.join(
    train_events_df.select(["series_id", "timestamp"] + cfg.target_cols),
    on=["series_id", "timestamp"],
    how="left",
)

train_series_df = train_series_df.with_columns(
    (pl.col("asleep").backward_fill().fill_null(0)),
    (pl.col("onset").fill_null(0)),
    (pl.col("wakeup").fill_null(0)),
)


In [13]:
train_series_df


series_id,step,timestamp,anglez,enmo,night,asleep,onset,wakeup
str,u32,str,f32,f32,i64,i32,i32,i32
"""038441c925bb""",0,"""2018-08-14T15:…",2.6367,0.0217,,0,0,0
"""038441c925bb""",1,"""2018-08-14T15:…",2.6368,0.0215,,0,0,0
"""038441c925bb""",2,"""2018-08-14T15:…",2.637,0.0216,,0,0,0
"""038441c925bb""",3,"""2018-08-14T15:…",2.6368,0.0213,,0,0,0
"""038441c925bb""",4,"""2018-08-14T15:…",2.6368,0.0215,,0,0,0
"""038441c925bb""",5,"""2018-08-14T15:…",2.6367,0.0217,,0,0,0
"""038441c925bb""",6,"""2018-08-14T15:…",2.6367,0.0217,,0,0,0
"""038441c925bb""",7,"""2018-08-14T15:…",2.6367,0.0218,,0,0,0
"""038441c925bb""",8,"""2018-08-14T15:…",2.798,0.0223,,0,0,0
"""038441c925bb""",9,"""2018-08-14T15:…",3.0847,0.0217,,0,0,0


In [14]:
def encode_hour(hour):
    hour_sin = np.sin(2 * np.pi * hour / 24.0)
    hour_cos = np.cos(2 * np.pi * hour / 24.0)
    return hour_sin, hour_cos


train_series_df = train_series_df.with_columns(
    (pl.col("timestamp").str.slice(11, 2).alias("hour").cast(pl.Int64)),
)
hour_df = pl.DataFrame(
    {
        "hour": np.arange(24),
    }
)
encoded_hour = hour_df["hour"].apply(encode_hour)
encoded_hour = np.array(encoded_hour.to_list())
hour_df = hour_df.with_columns(
    (pl.lit(encoded_hour[:, 0]).alias("hour_sin")),
    (pl.lit(encoded_hour[:, 1]).alias("hour_cos")),
)

train_series_df = train_series_df.join(
    hour_df,
    on=["hour"],
    how="left",
)


In [15]:
def get_similar_wave(_df):
    # forward
    similar_steps_forward = find_similar_steps(
        _df["anglez"], window_size=17280, threshold=1.0
    )
    similar_wave_forward = np.zeros(len(_df))
    if len(similar_steps_forward) > 0:
        for start_step in similar_steps_forward:
            end_step = start_step + 17280

            similar_wave_forward[start_step:end_step] = 1

    # backward
    similar_steps_backward = find_similar_steps(
        _df["anglez"][::-1], window_size=17280, threshold=1.0
    )
    similar_wave_backward = np.zeros(len(_df))
    if len(similar_steps_backward) > 0:
        for start_step in similar_steps_backward:
            end_step = start_step + 17280

            similar_wave_backward[start_step:end_step] = 1

    # add
    similar_wave = similar_wave_forward + similar_wave_backward[::-1]
    similar_wave = np.clip(similar_wave, 0, 1)

    return pl.DataFrame(similar_wave.astype(np.int8))


# similar_wave = train_series_df.group_by("series_id", maintain_order=True).apply(
#     get_similar_wave
# )
# similar_wave.write_parquet(cfg.input_path / "similar_wave.parquet")

similar_wave = pl.read_parquet(cfg.INPUT_PATH / "similar_wave.parquet")
train_series_df = train_series_df.with_columns(
    similar_wave.to_series().alias("similar_wave")
)

del similar_wave
gc.collect()


54

In [16]:
feat_cols = [
    "anglez",
    "enmo",
    "anglez_diff_pos",
    "enmo_diff_pos",
    "hour_sin",
    "hour_cos",
    "similar_wave",
]

scale_cols = [
    "anglez",
    "enmo",
    "anglez_diff_pos",
    "enmo_diff_pos",
]


util_cols_expr = [pl.col("step"), pl.col("asleep"), pl.col("onset"), pl.col("wakeup")]
feat_cols_expr = [
    pl.col("anglez"),
    pl.col("enmo"),
    (pl.col("anglez").diff().fill_null(0)).alias("anglez_diff_pos"),
    (pl.col("enmo").diff().fill_null(0)).alias("enmo_diff_pos"),
    pl.col("hour_sin"),
    pl.col("hour_cos"),
    pl.col("similar_wave"),
]

use_cols_expr = util_cols_expr + feat_cols_expr

use_col_names = [f.meta.output_name() for f in use_cols_expr]

train_series_df = (
    train_series_df.group_by("series_id", maintain_order=True)
    .agg(use_cols_expr)
    .explode(use_col_names)
)


In [17]:
class CMIDataset(Dataset):
    def __init__(self, df: pl.DataFrame, max_length: int, stride: int):
        self.max_length = max_length
        self.stride = stride

        self.df = self._convert_df_to_segments(df)
        self.label = self.df[cfg.target_cols].to_numpy()

    def __len__(self):
        return len(self.df)

    # ガウス分布の関数
    def _gaussian(self, x, mu, sigma):
        gauss = np.exp(-0.5 * ((x - mu) / sigma) ** 2)
        # outside_3sigma = np.abs(x - mu) > 3 * sigma
        # gauss[outside_3sigma] = 0
        return gauss

    # ヒートマップの生成
    def _create_heatmap(self, series, sigma=1.0):
        indices = np.where(series == 1)[0]
        heatmap = np.zeros_like(series, dtype=float)

        for index in indices:
            x = np.arange(len(series))
            heatmap += self._gaussian(x, index, sigma)

        return np.clip(heatmap, 0, 1)  # 0と1の間にクリップ

    def __getitem__(self, idx):
        feat = np.array([self.df[idx][col].to_numpy()[0] for col in feat_cols])
        labels = self.label[idx]
        labels[1] = self._create_heatmap(labels[1], sigma=cfg.sigma)  # onset
        labels[2] = self._create_heatmap(labels[2], sigma=cfg.sigma)  # wakeup
        labels = np.stack(labels)

        original_length = feat.shape[1]
        attention_mask = np.ones(original_length, dtype=np.int8)

        # Padding or truncating
        diff_length = self.max_length - original_length
        # padding
        if diff_length > 0:
            feat = np.append(feat, np.zeros((feat.shape[0], diff_length)), axis=1)
            attention_mask = np.concatenate(
                [attention_mask, np.zeros(diff_length, dtype=np.int8)]
            )
            labels = np.append(
                labels, np.zeros((len(cfg.target_cols), diff_length)), axis=1
            )
        # truncating
        else:
            feat = feat[:, : self.max_length]
            attention_mask = attention_mask[: self.max_length]
            labels = labels[: self.max_length]

        inputs = {
            "attention_mask": torch.tensor(attention_mask, dtype=torch.float32),
        }
        for k, v in zip(feat_cols, feat):
            inputs[k] = torch.tensor(v.astype(np.float32), dtype=torch.float32)

        labels = torch.tensor(labels.T, dtype=torch.float32)  # (max_length, n_targets)
        return inputs, labels

    def _convert_df_to_segments(self, df: pl.DataFrame):
        segments = []
        # series_idごとにデータを取得

        group_id = 0
        for series, group in tqdm(df.group_by("series_id", maintain_order=True)):
            start = 0
            while start < len(group):
                end = start + self.max_length
                _df = group[start:end]
                _df = _df.with_columns(pl.lit(group_id).alias("group"))
                segments.append(_df)
                start = start + self.stride
                group_id += 1

        segment_df = pl.concat(segments)
        del segments, group, _df
        gc.collect()

        segment_df = segment_df.group_by(
            ["series_id", "group"], maintain_order=True
        ).agg(pl.col(["step", "group"] + cfg.target_cols + feat_cols).explode())

        return segment_df


In [18]:
class FeatureExtractorBlock(nn.Module):
    def __init__(self, feat_cols, hidden_size=32):
        super(FeatureExtractorBlock, self).__init__()

        self.feat_cols = feat_cols
        self.inc = DoubleConv(
            len(feat_cols),
            hidden_size,
            kernel_size=1,
            padding="same",
        )
        self.wave_block1 = WaveNetBlock(
            hidden_size=hidden_size,
            kernel_size=2,
            dilations=6,
        )
        self.wave_block2 = WaveNetBlock(
            hidden_size=hidden_size,
            kernel_size=2,
            dilations=6,
        )
        self.wave_block3 = WaveNetBlock(
            hidden_size=hidden_size,
            kernel_size=2,
            dilations=6,
        )
        self.fc = nn.Linear(hidden_size * 3, hidden_size)

    def forward(self, x):
        x = self.inc(x)
        x1 = self.wave_block1(x)
        x2 = self.wave_block2(x)
        x3 = self.wave_block3(x)

        x = torch.cat([x1, x2, x3], dim=1)
        x = x.permute(0, 2, 1)
        x = self.fc(x)
        x = x.permute(0, 2, 1)

        return x


class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout=0.0, max_len=1000):
        super(PositionalEncoding, self).__init__()

        self.dropout = nn.Dropout(p=dropout)

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)

        div_term = torch.exp(
            torch.arange(0, d_model, 2).float()
            * (-torch.log(torch.tensor(10000.0)))
            / d_model
        )

        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)

        pe = pe.unsqueeze(0)

        self.register_buffer("pe", pe)

    def forward(self, x):
        x = x + self.pe[:, : x.size(1)].clone().detach()
        return self.dropout(x)


class TransformerGRUEncoderBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=12, max_len=17280):
        super(TransformerGRUEncoderBlock, self).__init__()

        self.down = Down(
            in_channels=in_channels, out_channels=out_channels, stride=stride
        )

        self.pos_encoder = PositionalEncoding(
            out_channels, dropout=0.2, max_len=max_len
        )

        encoder_layer = nn.TransformerEncoderLayer(
            d_model=out_channels,
            nhead=4,
            dim_feedforward=out_channels * 2,
            dropout=0.2,
            activation="gelu",
            batch_first=True,
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=4)

        self.gru = MultiResidualBiGRU(
            out_channels,
            out_channels,
            out_channels,
            n_layers=4,
            bidir=True,
        )
        # fully connected
        self.fc = nn.Linear(out_channels, out_channels)

    def forward(self, x):
        x = self.down(x)

        x = x.permute(0, 2, 1)
        x = self.pos_encoder(x)
        x = self.transformer(x)
        x, _ = self.gru(x)
        x = self.fc(x)
        x = x.permute(0, 2, 1)

        return x


class DecoderBlock(nn.Module):
    def __init__(self, hidden_size):
        super(DecoderBlock, self).__init__()

        self.up1 = Up(in_channels=hidden_size * 2, out_channels=hidden_size, stride=12)

        self.mlp = nn.Sequential(
            nn.Linear(hidden_size * 2, hidden_size * 2),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(hidden_size * 2, hidden_size),
            nn.ReLU(),
        )

        self.outc = nn.Conv1d(hidden_size, 3, kernel_size=1, padding="same")

    def forward(self, x_original, x1):
        x1 = self.up1(x1)

        x = torch.cat([x_original, x1], dim=1)

        x = x.permute(0, 2, 1)
        x = self.mlp(x)
        x = x.permute(0, 2, 1)

        x = self.outc(x)

        return x


class CMIModel(nn.Module):
    def __init__(self, feat_cols, hidden_size=32):
        super(CMIModel, self).__init__()

        self.feat_cols = feat_cols

        self.feature_extractor = FeatureExtractorBlock(
            feat_cols, hidden_size=hidden_size
        )

        self.encoder1 = TransformerGRUEncoderBlock(
            hidden_size, hidden_size * 2, stride=12, max_len=17280 // 12
        )

        self.decoder = DecoderBlock(hidden_size)

        self._reinitialize()

    def _reinitialize(self):
        """
        Tensorflow/Keras-like initialization
        """
        for name, p in self.named_parameters():
            if "gru" in name:
                if "weight_ih" in name:
                    nn.init.xavier_uniform_(p.data)
                elif "weight_hh" in name:
                    nn.init.orthogonal_(p.data)
                elif "bias_ih" in name:
                    p.data.fill_(0)
                    # Set forget-gate bias to 1
                    n = p.size(0)
                    p.data[(n // 4) : (n // 2)].fill_(1)
                elif "bias_hh" in name:
                    p.data.fill_(0)
            elif "fc" in name:
                if "weight" in name:
                    nn.init.xavier_uniform_(p.data)
                elif "bias" in name:
                    p.data.fill_(0)

    def forward(self, inputs):
        x = torch.stack([inputs[col] for col in self.feat_cols], dim=1)

        x_original = self.feature_extractor(x)

        x1 = self.encoder1(x_original)

        x = self.decoder(x_original, x1)

        x = x.squeeze(1).permute(0, 2, 1)  # (batch_size, seq_len, n_targets)

        return x


In [19]:
def train_fn(
    model,
    train_data_loader,
    criterion1,
    optimizer,
    scheduler=None,
    scaler=None,
):
    model.train()
    loss_meter = AverageMeter()
    pbar = tqdm(enumerate(train_data_loader), total=len(train_data_loader))
    for step, (inputs, labels) in pbar:
        inputs, labels = collate(inputs, labels)
        inputs = {k: v.to(cfg.device) for k, v in inputs.items()}
        labels = labels.to(cfg.device)
        with autocast(enabled=cfg.apex):
            output = model(inputs)

        # segmentations
        output1 = output[:, :, 0]
        labels1 = labels[:, :, 0]
        mask = inputs["attention_mask"].bool()
        output1 = output1[mask].view(-1)
        labels1 = labels1[mask].view(-1)
        loss1 = criterion1(output1, labels1)

        # heatmap
        mask = inputs["attention_mask"].unsqueeze(-1).expand(-1, -1, 2).bool()
        output2 = output[:, :, 1:]
        labels2 = labels[:, :, 1:]
        output2 = output2[mask].view(-1)
        labels2 = labels2[mask].view(-1)

        pos_weight = torch.tensor([cfg.pos_weight for _ in range(labels2.size(0))]).to(
            cfg.device
        )
        criterion2 = nn.BCEWithLogitsLoss(pos_weight=pos_weight)

        loss2 = criterion2(output2, labels2)

        loss = loss1 + loss2

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        optimizer.zero_grad()
        scheduler.step()
        loss_meter.update(loss.item(), output.size(0))
    return loss_meter.avg


def valid_fn(
    model,
    valid_true_df,
    valid_pred_df,
    valid_data_loader,
    criterion1,
    epoch=None,
):
    model.eval()
    loss_meter = AverageMeter()
    y_pred = []
    for step, (inputs, labels) in enumerate(valid_data_loader):
        inputs, labels = collate(inputs, labels)
        inputs = {k: v.to(cfg.device) for k, v in inputs.items()}
        labels = labels.to(cfg.device)
        with torch.no_grad():
            output = model(inputs)

        # segmentations
        output1 = output[:, :, 0]
        labels1 = labels[:, :, 0]
        mask = inputs["attention_mask"].bool()
        output1 = output1[mask].view(-1)
        labels1 = labels1[mask].view(-1)
        loss1 = criterion1(output1, labels1)

        # heatmap
        mask = inputs["attention_mask"].unsqueeze(-1).expand(-1, -1, 2).bool()
        output2 = output[:, :, 1:]
        labels2 = labels[:, :, 1:]
        output2 = output2[mask].view(-1)
        labels2 = labels2[mask].view(-1)

        pos_weight = torch.tensor([cfg.pos_weight for _ in range(labels2.size(0))]).to(
            cfg.device
        )
        criterion2 = nn.BCEWithLogitsLoss(pos_weight=pos_weight)

        loss2 = criterion2(output2, labels2)

        loss = loss1 + loss2

        loss_meter.update(loss.item(), output.size(0))
        output = output2.detach().cpu().sigmoid().numpy()
        y_pred.append(output)
    y_pred = np.concatenate(y_pred)

    score = get_score(valid_true_df, valid_pred_df, y_pred, thr=cfg.thr, size=cfg.size)
    return loss_meter.avg, score, y_pred


In [20]:
def scaling(input_df, scale_cols, i_fold, is_train=True):
    output_df = input_df.clone()

    if is_train:
        scaler = RobustScaler()
        result = scaler.fit_transform(output_df.select(scale_cols))
        pickle.dump(
            scaler, open(f"{cfg.OUTPUT_EXP_MODEL_PATH}/scaler_fold_{i_fold}.pkl", "wb")
        )
    else:
        scaler = pickle.load(
            open(f"{cfg.OUTPUT_EXP_MODEL_PATH}/scaler_fold_{i_fold}.pkl", "rb")
        )
        result = scaler.transform(output_df.select(scale_cols))

    result_dict = {k: v for k, v in zip(scale_cols, result.T)}
    output_df = output_df.with_columns(pl.DataFrame(result_dict))

    return output_df


In [21]:
fold_df = (
    train_series_df.group_by("series_id")
    .agg(pl.sum("asleep"))
    .sort("series_id")
    .to_pandas()
)
fold_df["fold"] = -1

for i in range(5):
    _fold_dict = yaml.safe_load(open(cfg.INPUT_PATH / "tomo" / f"fold_{i}.yaml", "r"))
    fold_df.loc[fold_df["series_id"].isin(_fold_dict["valid_series_ids"]), "fold"] = i


fold_df.groupby("fold")["asleep"].sum()


fold
0    6176856
1    6340608
2    5494764
3    6294204
4    5531772
Name: asleep, dtype: int32

In [22]:
seed_everything(cfg.seed)

s = time.time()
cv = StratifiedGroupKFold(n_splits=cfg.n_splits, shuffle=True, random_state=cfg.seed)

dataset_tmp = CMIDataset(
    train_series_df,
    max_length=cfg.valid_max_length,
    stride=cfg.valid_stride,
)
oof_df = dataset_tmp.df.select(["series_id", "step"] + cfg.target_cols).explode(
    ["step"] + cfg.target_cols
)
oof = pl.DataFrame(
    {
        "series_id": oof_df["series_id"],
        "step": oof_df["step"],
        "onset": 0.0,
        "wakeup": 0.0,
    }
).to_pandas()

best_scores = []
best_epochs = []

for i_fold in range(cfg.n_splits):
    LOGGER.info(f"{'='*50} Fold {i_fold} {'='*50}")

    train_id = fold_df[fold_df["fold"] != i_fold]["series_id"].to_list()
    valid_id = fold_df[fold_df["fold"] == i_fold]["series_id"].to_list()

    X_train = train_series_df.filter(pl.col("series_id").is_in(train_id))
    X_valid = train_series_df.filter(pl.col("series_id").is_in(valid_id))

    valid_true_df = train_events_df.filter(pl.col("series_id").is_in(valid_id))

    X_train = scaling(X_train, scale_cols, i_fold, is_train=True)
    X_valid = scaling(X_valid, scale_cols, i_fold, is_train=False)

    display(X_train.head())

    train_dataset = CMIDataset(
        X_train, max_length=cfg.train_max_length, stride=cfg.train_stride
    )
    valid_dataset = CMIDataset(
        X_valid,
        max_length=cfg.valid_max_length,
        stride=cfg.valid_stride,
    )
    valid_pred_df = valid_dataset.df.select(
        ["series_id", "step"] + cfg.target_cols
    ).explode(["step"] + cfg.target_cols)

    train_loader = DataLoader(
        train_dataset,
        batch_size=cfg.batch_size,
        num_workers=cfg.num_workers,
        shuffle=True,
        drop_last=True,
        pin_memory=True,
    )
    valid_loader = DataLoader(
        valid_dataset,
        batch_size=cfg.batch_size,
        num_workers=cfg.num_workers,
        shuffle=False,
        drop_last=False,
        pin_memory=True,
    )

    model = CMIModel(feat_cols, hidden_size=32)
    model.to(cfg.device)

    n_params = get_n_params(model)
    LOGGER.info(f"n_params: {n_params}")

    criterion1 = nn.BCEWithLogitsLoss()

    optimizer = torch.optim.AdamW(
        model.parameters(),
        lr=cfg.lr,
        betas=cfg.betas,
        eps=cfg.eps,
        weight_decay=cfg.weight_decay,
    )

    num_train_steps = int(len(train_loader) * cfg.n_epochs)
    scheduler = get_scheduler(cfg, optimizer, num_train_steps)

    scaler = GradScaler(enabled=cfg.apex)

    early_stopping = EarlyStopping(
        patience=cfg.es_patience,
        mode="min",
        model_path=f"{cfg.OUTPUT_EXP_MODEL_PATH}/fold{i_fold}.pth",
        verbose=False,
    )

    pbar = tqdm(range(cfg.n_epochs))
    for epoch in pbar:
        train_loss = train_fn(
            model,
            train_loader,
            criterion1,
            optimizer,
            scheduler,
            scaler,
        )
        valid_loss, valid_score, valid_pred = valid_fn(
            model,
            valid_true_df,
            valid_pred_df,
            valid_loader,
            criterion1,
            epoch,
        )

        early_stopping(valid_loss, valid_pred, valid_loss, model, epoch)
        if early_stopping.early_stop:
            LOGGER.info("Early stopping")
            break

        pbar.set_postfix(
            train_loss=train_loss,
            valid_loss=valid_loss,
            lr=optimizer.param_groups[0]["lr"],
            es=early_stopping.counter,
        )

        LOGGER.info(
            f"epoch:{epoch}/{cfg.n_epochs} | train_loss:{train_loss: .5f} valid_loss:{valid_loss: .5f} valid_score:{valid_score: .5f}"
        )

    best_pred = early_stopping.best_pred
    best_score = get_score(
        valid_true_df, valid_pred_df, best_pred, thr=cfg.thr, size=cfg.size
    )
    best_loss = early_stopping.best_loss

    LOGGER.info("=" * 30)
    LOGGER.info(
        f"fold {i_fold} | best_score:{best_score: .5f} best_loss:{best_loss: .5f}"
    )
    LOGGER.info("=" * 30)
    best_scores.append(best_score)
    best_epochs.append(early_stopping.best_epoch)

    oof.loc[oof["series_id"].isin(valid_id), ["onset", "wakeup"]] = best_pred.reshape(
        -1, 2
    )

oof = pl.from_pandas(oof)
oof = oof.group_by(["series_id", "step"], maintain_order=True).agg(
    (pl.col("onset").mean().alias("onset")),
    (pl.col("wakeup").mean().alias("wakeup")),
)
oof = oof.select(["series_id", "onset", "wakeup"])
oof.write_parquet(Path(cfg.OUTPUT_EXP_PREDS_PATH) / "oof.parquet")
cv_score = get_score(
    train_events_df,
    oof_df,
    oof[["onset", "wakeup"]].to_numpy().flatten(),
    thr=cfg.thr,
    size=cfg.size,
)

LOGGER.info("=" * 20)
LOGGER.info(f"cv: {cv_score: .5f} ±{np.std(best_scores): .5f}")
LOGGER.info("=" * 20)

LOGGER.info(f"elapsed time: {time.time()-s:.2f}sec")


0it [00:00, ?it/s]



series_id,step,asleep,onset,wakeup,anglez,enmo,anglez_diff_pos,enmo_diff_pos,hour_sin,hour_cos,similar_wave
str,u32,i32,i32,i32,f64,f64,f64,f64,f64,f64,i8
"""03d92c9f6f8a""",0,0,0,0,1.12417,1.482436,0.0,0.0,1.2246e-16,-1.0,0
"""03d92c9f6f8a""",1,0,0,0,0.902991,1.362998,-4.360479,-1.085107,1.2246e-16,-1.0,0
"""03d92c9f6f8a""",2,0,0,0,1.085416,3.796253,3.596451,22.106383,1.2246e-16,-1.0,0
"""03d92c9f6f8a""",3,0,0,0,1.31109,1.761124,4.449076,-18.489361,1.2246e-16,-1.0,0
"""03d92c9f6f8a""",4,0,0,0,1.625941,0.40281,6.207194,-12.340425,1.2246e-16,-1.0,0


0it [00:00, ?it/s]

0it [00:00, ?it/s]

n_params: 730915


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/188 [00:00<?, ?it/s]

epoch:0/30 | train_loss: 0.46646 valid_loss: 0.18815 valid_score: 0.54584


  0%|          | 0/188 [00:00<?, ?it/s]

epoch:1/30 | train_loss: 0.17394 valid_loss: 0.14074 valid_score: 0.70354


  0%|          | 0/188 [00:00<?, ?it/s]

epoch:2/30 | train_loss: 0.14582 valid_loss: 0.12620 valid_score: 0.74508


  0%|          | 0/188 [00:00<?, ?it/s]

epoch:3/30 | train_loss: 0.13582 valid_loss: 0.11457 valid_score: 0.77880


  0%|          | 0/188 [00:00<?, ?it/s]

epoch:4/30 | train_loss: 0.12738 valid_loss: 0.12003 valid_score: 0.76493


  0%|          | 0/188 [00:00<?, ?it/s]

epoch:5/30 | train_loss: 0.12408 valid_loss: 0.11166 valid_score: 0.80275


  0%|          | 0/188 [00:00<?, ?it/s]

epoch:6/30 | train_loss: 0.12024 valid_loss: 0.11093 valid_score: 0.79352


  0%|          | 0/188 [00:00<?, ?it/s]

epoch:7/30 | train_loss: 0.11865 valid_loss: 0.11334 valid_score: 0.80169


  0%|          | 0/188 [00:00<?, ?it/s]

epoch:8/30 | train_loss: 0.11377 valid_loss: 0.11330 valid_score: 0.80309


  0%|          | 0/188 [00:00<?, ?it/s]

epoch:9/30 | train_loss: 0.11282 valid_loss: 0.11267 valid_score: 0.80464


  0%|          | 0/188 [00:00<?, ?it/s]

epoch:10/30 | train_loss: 0.11013 valid_loss: 0.11584 valid_score: 0.80075


  0%|          | 0/188 [00:00<?, ?it/s]

epoch:11/30 | train_loss: 0.10797 valid_loss: 0.10731 valid_score: 0.80358


  0%|          | 0/188 [00:00<?, ?it/s]

epoch:12/30 | train_loss: 0.10575 valid_loss: 0.10932 valid_score: 0.79714


  0%|          | 0/188 [00:00<?, ?it/s]

epoch:13/30 | train_loss: 0.10694 valid_loss: 0.10402 valid_score: 0.80186


  0%|          | 0/188 [00:00<?, ?it/s]

epoch:14/30 | train_loss: 0.10110 valid_loss: 0.11666 valid_score: 0.79619


  0%|          | 0/188 [00:00<?, ?it/s]

epoch:15/30 | train_loss: 0.09881 valid_loss: 0.11022 valid_score: 0.80349


  0%|          | 0/188 [00:00<?, ?it/s]

epoch:16/30 | train_loss: 0.09659 valid_loss: 0.10874 valid_score: 0.80271


  0%|          | 0/188 [00:00<?, ?it/s]

epoch:17/30 | train_loss: 0.09395 valid_loss: 0.10696 valid_score: 0.80800


  0%|          | 0/188 [00:00<?, ?it/s]

epoch:18/30 | train_loss: 0.09118 valid_loss: 0.10997 valid_score: 0.80564


  0%|          | 0/188 [00:00<?, ?it/s]

epoch:19/30 | train_loss: 0.08860 valid_loss: 0.11576 valid_score: 0.80249


  0%|          | 0/188 [00:00<?, ?it/s]

epoch:20/30 | train_loss: 0.08758 valid_loss: 0.10951 valid_score: 0.80573


  0%|          | 0/188 [00:00<?, ?it/s]

epoch:21/30 | train_loss: 0.08375 valid_loss: 0.11114 valid_score: 0.80474


  0%|          | 0/188 [00:00<?, ?it/s]

epoch:22/30 | train_loss: 0.08175 valid_loss: 0.11126 valid_score: 0.80291


  0%|          | 0/188 [00:00<?, ?it/s]

Early stopping
fold 0 | best_score: 0.80186 best_loss: 0.10402


series_id,step,asleep,onset,wakeup,anglez,enmo,anglez_diff_pos,enmo_diff_pos,hour_sin,hour_cos,similar_wave
str,u32,i32,i32,i32,f64,f64,f64,f64,f64,f64,i8
"""038441c925bb""",0,0,0,0,0.271642,0.102138,0.0,0.0,-0.707107,-0.707107,0
"""038441c925bb""",1,0,0,0,0.271644,0.097387,4.5e-05,-0.042553,-0.707107,-0.707107,0
"""038441c925bb""",2,0,0,0,0.271649,0.099762,9e-05,0.021277,-0.707107,-0.707107,0
"""038441c925bb""",3,0,0,0,0.271644,0.092637,-9e-05,-0.06383,-0.707107,-0.707107,0
"""038441c925bb""",4,0,0,0,0.271644,0.097387,0.0,0.042554,-0.707107,-0.707107,0


0it [00:00, ?it/s]

0it [00:00, ?it/s]

n_params: 730915


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/184 [00:00<?, ?it/s]

epoch:0/30 | train_loss: 0.36552 valid_loss: 0.35395 valid_score: 0.51598


  0%|          | 0/184 [00:00<?, ?it/s]

epoch:1/30 | train_loss: 0.17167 valid_loss: 0.18072 valid_score: 0.66720


  0%|          | 0/184 [00:00<?, ?it/s]

epoch:2/30 | train_loss: 0.14248 valid_loss: 0.15934 valid_score: 0.71388


  0%|          | 0/184 [00:00<?, ?it/s]

epoch:3/30 | train_loss: 0.12858 valid_loss: 0.18759 valid_score: 0.72036


  0%|          | 0/184 [00:00<?, ?it/s]

epoch:4/30 | train_loss: 0.12621 valid_loss: 0.16226 valid_score: 0.72623


  0%|          | 0/184 [00:00<?, ?it/s]

epoch:5/30 | train_loss: 0.12061 valid_loss: 0.16035 valid_score: 0.73343


  0%|          | 0/184 [00:00<?, ?it/s]

epoch:6/30 | train_loss: 0.11529 valid_loss: 0.18720 valid_score: 0.73905


  0%|          | 0/184 [00:00<?, ?it/s]

epoch:7/30 | train_loss: 0.11594 valid_loss: 0.16663 valid_score: 0.73860


  0%|          | 0/184 [00:00<?, ?it/s]

epoch:8/30 | train_loss: 0.11180 valid_loss: 0.18077 valid_score: 0.73093


  0%|          | 0/184 [00:00<?, ?it/s]

epoch:9/30 | train_loss: 0.10682 valid_loss: 0.15326 valid_score: 0.74554


  0%|          | 0/184 [00:00<?, ?it/s]

epoch:10/30 | train_loss: 0.10684 valid_loss: 0.15703 valid_score: 0.74403


  0%|          | 0/184 [00:00<?, ?it/s]

epoch:11/30 | train_loss: 0.10179 valid_loss: 0.16863 valid_score: 0.75207


  0%|          | 0/184 [00:00<?, ?it/s]

epoch:12/30 | train_loss: 0.10087 valid_loss: 0.15842 valid_score: 0.74986


  0%|          | 0/184 [00:00<?, ?it/s]

epoch:13/30 | train_loss: 0.09941 valid_loss: 0.17884 valid_score: 0.73244


  0%|          | 0/184 [00:00<?, ?it/s]

epoch:14/30 | train_loss: 0.09574 valid_loss: 0.15521 valid_score: 0.74301


  0%|          | 0/184 [00:00<?, ?it/s]

epoch:15/30 | train_loss: 0.09497 valid_loss: 0.15782 valid_score: 0.74958


  0%|          | 0/184 [00:00<?, ?it/s]

epoch:16/30 | train_loss: 0.09221 valid_loss: 0.15574 valid_score: 0.75337


  0%|          | 0/184 [00:00<?, ?it/s]

epoch:17/30 | train_loss: 0.08989 valid_loss: 0.14235 valid_score: 0.75412


  0%|          | 0/184 [00:00<?, ?it/s]

epoch:18/30 | train_loss: 0.08921 valid_loss: 0.15686 valid_score: 0.75095


  0%|          | 0/184 [00:00<?, ?it/s]

epoch:19/30 | train_loss: 0.08468 valid_loss: 0.14373 valid_score: 0.75200


  0%|          | 0/184 [00:00<?, ?it/s]

epoch:20/30 | train_loss: 0.08305 valid_loss: 0.14886 valid_score: 0.75173


  0%|          | 0/184 [00:00<?, ?it/s]

epoch:21/30 | train_loss: 0.08125 valid_loss: 0.15257 valid_score: 0.74695


  0%|          | 0/184 [00:00<?, ?it/s]

epoch:22/30 | train_loss: 0.07799 valid_loss: 0.14910 valid_score: 0.74668


  0%|          | 0/184 [00:00<?, ?it/s]

epoch:23/30 | train_loss: 0.07506 valid_loss: 0.15118 valid_score: 0.75062


  0%|          | 0/184 [00:00<?, ?it/s]

epoch:24/30 | train_loss: 0.07336 valid_loss: 0.14339 valid_score: 0.75442


  0%|          | 0/184 [00:00<?, ?it/s]

epoch:25/30 | train_loss: 0.07179 valid_loss: 0.14233 valid_score: 0.75524


  0%|          | 0/184 [00:00<?, ?it/s]

epoch:26/30 | train_loss: 0.06996 valid_loss: 0.15248 valid_score: 0.75277


  0%|          | 0/184 [00:00<?, ?it/s]

epoch:27/30 | train_loss: 0.06907 valid_loss: 0.15147 valid_score: 0.75291


  0%|          | 0/184 [00:00<?, ?it/s]

epoch:28/30 | train_loss: 0.06830 valid_loss: 0.15067 valid_score: 0.75253


  0%|          | 0/184 [00:00<?, ?it/s]

epoch:29/30 | train_loss: 0.06819 valid_loss: 0.15021 valid_score: 0.75341
fold 1 | best_score: 0.75524 best_loss: 0.14233


series_id,step,asleep,onset,wakeup,anglez,enmo,anglez_diff_pos,enmo_diff_pos,hour_sin,hour_cos,similar_wave
str,u32,i32,i32,i32,f64,f64,f64,f64,f64,f64,i8
"""038441c925bb""",0,0,0,0,0.278515,0.108235,0.0,0.0,-0.707107,-0.707107,0
"""038441c925bb""",1,0,0,0,0.278517,0.103529,4.5e-05,-0.042553,-0.707107,-0.707107,0
"""038441c925bb""",2,0,0,0,0.278522,0.105882,9e-05,0.021277,-0.707107,-0.707107,0
"""038441c925bb""",3,0,0,0,0.278517,0.098823,-9e-05,-0.06383,-0.707107,-0.707107,0
"""038441c925bb""",4,0,0,0,0.278517,0.103529,0.0,0.042554,-0.707107,-0.707107,0


0it [00:00, ?it/s]

0it [00:00, ?it/s]

n_params: 730915


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/191 [00:00<?, ?it/s]

epoch:0/30 | train_loss: 0.42276 valid_loss: 0.18251 valid_score: 0.62989


  0%|          | 0/191 [00:00<?, ?it/s]

epoch:1/30 | train_loss: 0.17367 valid_loss: 0.17921 valid_score: 0.73883


  0%|          | 0/191 [00:00<?, ?it/s]

epoch:2/30 | train_loss: 0.14577 valid_loss: 0.12485 valid_score: 0.76615


  0%|          | 0/191 [00:00<?, ?it/s]

epoch:3/30 | train_loss: 0.13571 valid_loss: 0.10569 valid_score: 0.79121


  0%|          | 0/191 [00:00<?, ?it/s]

epoch:4/30 | train_loss: 0.12801 valid_loss: 0.11272 valid_score: 0.78878


  0%|          | 0/191 [00:00<?, ?it/s]

epoch:5/30 | train_loss: 0.12353 valid_loss: 0.11800 valid_score: 0.80142


  0%|          | 0/191 [00:00<?, ?it/s]

epoch:6/30 | train_loss: 0.13732 valid_loss: 0.12104 valid_score: 0.76576


  0%|          | 0/191 [00:00<?, ?it/s]

epoch:7/30 | train_loss: 0.12377 valid_loss: 0.10411 valid_score: 0.80085


  0%|          | 0/191 [00:00<?, ?it/s]

epoch:8/30 | train_loss: 0.11880 valid_loss: 0.10311 valid_score: 0.80507


  0%|          | 0/191 [00:00<?, ?it/s]

epoch:9/30 | train_loss: 0.11618 valid_loss: 0.10512 valid_score: 0.80627


  0%|          | 0/191 [00:00<?, ?it/s]

epoch:10/30 | train_loss: 0.11393 valid_loss: 0.10613 valid_score: 0.80460


  0%|          | 0/191 [00:00<?, ?it/s]

epoch:11/30 | train_loss: 0.11017 valid_loss: 0.11179 valid_score: 0.80017


  0%|          | 0/191 [00:00<?, ?it/s]

epoch:12/30 | train_loss: 0.10857 valid_loss: 0.10213 valid_score: 0.80614


  0%|          | 0/191 [00:00<?, ?it/s]

epoch:13/30 | train_loss: 0.10659 valid_loss: 0.10439 valid_score: 0.80824


  0%|          | 0/191 [00:00<?, ?it/s]

epoch:14/30 | train_loss: 0.10382 valid_loss: 0.09826 valid_score: 0.80956


  0%|          | 0/191 [00:00<?, ?it/s]

epoch:15/30 | train_loss: 0.10164 valid_loss: 0.10443 valid_score: 0.81201


  0%|          | 0/191 [00:00<?, ?it/s]

epoch:16/30 | train_loss: 0.09836 valid_loss: 0.09949 valid_score: 0.81197


  0%|          | 0/191 [00:00<?, ?it/s]

epoch:17/30 | train_loss: 0.09802 valid_loss: 0.10068 valid_score: 0.81902


  0%|          | 0/191 [00:00<?, ?it/s]

epoch:18/30 | train_loss: 0.09567 valid_loss: 0.10433 valid_score: 0.81730


  0%|          | 0/191 [00:00<?, ?it/s]

epoch:19/30 | train_loss: 0.09241 valid_loss: 0.10145 valid_score: 0.81870


  0%|          | 0/191 [00:00<?, ?it/s]

epoch:20/30 | train_loss: 0.08989 valid_loss: 0.10655 valid_score: 0.81308


  0%|          | 0/191 [00:00<?, ?it/s]

epoch:21/30 | train_loss: 0.08717 valid_loss: 0.10864 valid_score: 0.81301


  0%|          | 0/191 [00:00<?, ?it/s]

epoch:22/30 | train_loss: 0.08542 valid_loss: 0.10460 valid_score: 0.81431


  0%|          | 0/191 [00:00<?, ?it/s]

epoch:23/30 | train_loss: 0.08216 valid_loss: 0.11198 valid_score: 0.81440


  0%|          | 0/191 [00:00<?, ?it/s]

Early stopping
fold 2 | best_score: 0.80956 best_loss: 0.09826


series_id,step,asleep,onset,wakeup,anglez,enmo,anglez_diff_pos,enmo_diff_pos,hour_sin,hour_cos,similar_wave
str,u32,i32,i32,i32,f64,f64,f64,f64,f64,f64,i8
"""038441c925bb""",0,0,0,0,0.295716,0.108747,0.0,0.0,-0.707107,-0.707107,0
"""038441c925bb""",1,0,0,0,0.295719,0.104019,4.6e-05,-0.043478,-0.707107,-0.707107,0
"""038441c925bb""",2,0,0,0,0.295723,0.106383,9.2e-05,0.021739,-0.707107,-0.707107,0
"""038441c925bb""",3,0,0,0,0.295719,0.099291,-9.2e-05,-0.065218,-0.707107,-0.707107,0
"""038441c925bb""",4,0,0,0,0.295719,0.104019,0.0,0.043479,-0.707107,-0.707107,0


0it [00:00, ?it/s]

0it [00:00, ?it/s]

n_params: 730915


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/187 [00:00<?, ?it/s]

epoch:0/30 | train_loss: 0.45281 valid_loss: 0.17562 valid_score: 0.64504


  0%|          | 0/187 [00:00<?, ?it/s]

epoch:1/30 | train_loss: 0.18126 valid_loss: 0.13322 valid_score: 0.74087


  0%|          | 0/187 [00:00<?, ?it/s]

epoch:2/30 | train_loss: 0.13950 valid_loss: 0.12953 valid_score: 0.75215


  0%|          | 0/187 [00:00<?, ?it/s]

epoch:3/30 | train_loss: 0.12961 valid_loss: 0.12505 valid_score: 0.76093


  0%|          | 0/187 [00:00<?, ?it/s]

epoch:4/30 | train_loss: 0.12483 valid_loss: 0.11844 valid_score: 0.77654


  0%|          | 0/187 [00:00<?, ?it/s]

epoch:5/30 | train_loss: 0.12082 valid_loss: 0.12696 valid_score: 0.76429


  0%|          | 0/187 [00:00<?, ?it/s]

epoch:6/30 | train_loss: 0.11864 valid_loss: 0.12392 valid_score: 0.74992


  0%|          | 0/187 [00:00<?, ?it/s]

epoch:7/30 | train_loss: 0.11932 valid_loss: 0.11466 valid_score: 0.78041


  0%|          | 0/187 [00:00<?, ?it/s]

epoch:8/30 | train_loss: 0.11374 valid_loss: 0.11148 valid_score: 0.78711


  0%|          | 0/187 [00:00<?, ?it/s]

epoch:9/30 | train_loss: 0.11335 valid_loss: 0.11018 valid_score: 0.78823


  0%|          | 0/187 [00:00<?, ?it/s]

epoch:10/30 | train_loss: 0.11051 valid_loss: 0.11394 valid_score: 0.77382


  0%|          | 0/187 [00:00<?, ?it/s]

epoch:11/30 | train_loss: 0.10683 valid_loss: 0.11524 valid_score: 0.78166


  0%|          | 0/187 [00:00<?, ?it/s]

epoch:12/30 | train_loss: 0.10726 valid_loss: 0.11098 valid_score: 0.78099


  0%|          | 0/187 [00:00<?, ?it/s]

epoch:13/30 | train_loss: 0.10482 valid_loss: 0.11192 valid_score: 0.78502


  0%|          | 0/187 [00:00<?, ?it/s]

epoch:14/30 | train_loss: 0.10148 valid_loss: 0.10706 valid_score: 0.79215


  0%|          | 0/187 [00:00<?, ?it/s]

epoch:15/30 | train_loss: 0.10015 valid_loss: 0.10804 valid_score: 0.79522


  0%|          | 0/187 [00:00<?, ?it/s]

epoch:16/30 | train_loss: 0.09985 valid_loss: 0.10811 valid_score: 0.79238


  0%|          | 0/187 [00:00<?, ?it/s]

epoch:17/30 | train_loss: 0.09695 valid_loss: 0.10694 valid_score: 0.79776


  0%|          | 0/187 [00:00<?, ?it/s]

epoch:18/30 | train_loss: 0.09342 valid_loss: 0.10584 valid_score: 0.80061


  0%|          | 0/187 [00:00<?, ?it/s]

epoch:19/30 | train_loss: 0.09199 valid_loss: 0.10532 valid_score: 0.80203


  0%|          | 0/187 [00:00<?, ?it/s]

epoch:20/30 | train_loss: 0.09003 valid_loss: 0.10639 valid_score: 0.80509


  0%|          | 0/187 [00:00<?, ?it/s]

epoch:21/30 | train_loss: 0.08699 valid_loss: 0.10696 valid_score: 0.79761


  0%|          | 0/187 [00:00<?, ?it/s]

epoch:22/30 | train_loss: 0.08555 valid_loss: 0.10979 valid_score: 0.80163


  0%|          | 0/187 [00:00<?, ?it/s]

epoch:23/30 | train_loss: 0.08361 valid_loss: 0.10724 valid_score: 0.79712


  0%|          | 0/187 [00:00<?, ?it/s]

epoch:24/30 | train_loss: 0.08199 valid_loss: 0.10872 valid_score: 0.79798


  0%|          | 0/187 [00:00<?, ?it/s]

epoch:25/30 | train_loss: 0.08053 valid_loss: 0.11062 valid_score: 0.79637


  0%|          | 0/187 [00:00<?, ?it/s]

epoch:26/30 | train_loss: 0.07934 valid_loss: 0.11157 valid_score: 0.79542


  0%|          | 0/187 [00:00<?, ?it/s]

epoch:27/30 | train_loss: 0.07880 valid_loss: 0.11116 valid_score: 0.79584


  0%|          | 0/187 [00:00<?, ?it/s]

epoch:28/30 | train_loss: 0.07820 valid_loss: 0.11132 valid_score: 0.79698


  0%|          | 0/187 [00:00<?, ?it/s]

Early stopping
fold 3 | best_score: 0.80203 best_loss: 0.10532


series_id,step,asleep,onset,wakeup,anglez,enmo,anglez_diff_pos,enmo_diff_pos,hour_sin,hour_cos,similar_wave
str,u32,i32,i32,i32,f64,f64,f64,f64,f64,f64,i8
"""038441c925bb""",0,0,0,0,0.289854,0.101176,0.0,0.0,-0.707107,-0.707107,0
"""038441c925bb""",1,0,0,0,0.289856,0.096471,4.5e-05,-0.042553,-0.707107,-0.707107,0
"""038441c925bb""",2,0,0,0,0.289861,0.098824,9e-05,0.021277,-0.707107,-0.707107,0
"""038441c925bb""",3,0,0,0,0.289856,0.091765,-9e-05,-0.06383,-0.707107,-0.707107,0
"""038441c925bb""",4,0,0,0,0.289856,0.096471,0.0,0.042554,-0.707107,-0.707107,0


0it [00:00, ?it/s]

0it [00:00, ?it/s]

n_params: 730915


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/189 [00:00<?, ?it/s]

epoch:0/30 | train_loss: 0.38417 valid_loss: 0.23483 valid_score: 0.60796


  0%|          | 0/189 [00:00<?, ?it/s]

epoch:1/30 | train_loss: 0.15920 valid_loss: 0.18742 valid_score: 0.67856


  0%|          | 0/189 [00:00<?, ?it/s]

epoch:2/30 | train_loss: 0.13711 valid_loss: 0.18638 valid_score: 0.69629


  0%|          | 0/189 [00:00<?, ?it/s]

epoch:3/30 | train_loss: 0.12827 valid_loss: 0.19500 valid_score: 0.65865


  0%|          | 0/189 [00:00<?, ?it/s]

epoch:4/30 | train_loss: 0.12450 valid_loss: 0.16799 valid_score: 0.72520


  0%|          | 0/189 [00:00<?, ?it/s]

epoch:5/30 | train_loss: 0.11332 valid_loss: 0.20435 valid_score: 0.72101


  0%|          | 0/189 [00:00<?, ?it/s]

epoch:6/30 | train_loss: 0.11622 valid_loss: 0.15647 valid_score: 0.72318


  0%|          | 0/189 [00:00<?, ?it/s]

epoch:7/30 | train_loss: 0.10951 valid_loss: 0.17549 valid_score: 0.71162


  0%|          | 0/189 [00:00<?, ?it/s]

epoch:8/30 | train_loss: 0.10678 valid_loss: 0.16163 valid_score: 0.73228


  0%|          | 0/189 [00:00<?, ?it/s]

epoch:9/30 | train_loss: 0.10342 valid_loss: 0.16145 valid_score: 0.74138


  0%|          | 0/189 [00:00<?, ?it/s]

epoch:10/30 | train_loss: 0.10456 valid_loss: 0.16382 valid_score: 0.72889


  0%|          | 0/189 [00:00<?, ?it/s]

epoch:11/30 | train_loss: 0.10194 valid_loss: 0.16673 valid_score: 0.74661


  0%|          | 0/189 [00:00<?, ?it/s]

epoch:12/30 | train_loss: 0.09993 valid_loss: 0.16118 valid_score: 0.74076


  0%|          | 0/189 [00:00<?, ?it/s]

epoch:13/30 | train_loss: 0.09771 valid_loss: 0.14708 valid_score: 0.74820


  0%|          | 0/189 [00:00<?, ?it/s]

epoch:14/30 | train_loss: 0.09494 valid_loss: 0.17360 valid_score: 0.73616


  0%|          | 0/189 [00:00<?, ?it/s]

epoch:15/30 | train_loss: 0.09365 valid_loss: 0.15831 valid_score: 0.74654


  0%|          | 0/189 [00:00<?, ?it/s]

epoch:16/30 | train_loss: 0.09144 valid_loss: 0.17494 valid_score: 0.73846


  0%|          | 0/189 [00:00<?, ?it/s]

epoch:17/30 | train_loss: 0.08818 valid_loss: 0.17085 valid_score: 0.74533


  0%|          | 0/189 [00:00<?, ?it/s]

epoch:18/30 | train_loss: 0.08599 valid_loss: 0.15558 valid_score: 0.75576


  0%|          | 0/189 [00:00<?, ?it/s]

epoch:19/30 | train_loss: 0.08449 valid_loss: 0.16045 valid_score: 0.74956


  0%|          | 0/189 [00:00<?, ?it/s]

epoch:20/30 | train_loss: 0.08158 valid_loss: 0.16699 valid_score: 0.74735


  0%|          | 0/189 [00:00<?, ?it/s]

epoch:21/30 | train_loss: 0.07948 valid_loss: 0.17540 valid_score: 0.74452


  0%|          | 0/189 [00:00<?, ?it/s]

epoch:22/30 | train_loss: 0.07775 valid_loss: 0.17927 valid_score: 0.74577


  0%|          | 0/189 [00:00<?, ?it/s]

Early stopping
fold 4 | best_score: 0.74820 best_loss: 0.14708
cv:  0.77623 ± 0.02609
elapsed time: 11797.97sec


In [4]:
# dataset_create_new(dataset_name=f"cmi-{cfg.EXP}", upload_dir=cfg.OUTPUT_EXP_PATH)


Starting upload for file preds.tar


100%|██████████| 922M/922M [01:31<00:00, 10.5MB/s]   


Upload successful: preds.tar (922MB)
Starting upload for file model.tar


100%|██████████| 16.1M/16.1M [00:03<00:00, 5.04MB/s]


Upload successful: model.tar (16MB)
