In [1]:
from pathlib import Path

import jupyter_black
import numpy as np
import optuna
import pandas as pd
import polars as pl
import seaborn as sns
from hydra import compose
from hydra import initialize
from hydra.core.global_hydra import GlobalHydra

from src.utils.metrics import event_detection_ap
from src.utils.post_process import post_process_for_asleep
from src.utils.post_process import post_process_for_seg

jupyter_black.load()
# plt.style.use("ggplot")
sns.set()

%load_ext autoreload
%autoreload 2

EXP_NAME = "exp077"
RUN_NAME = "run2"
RESULT_DIR = Path("../output/train") / EXP_NAME / RUN_NAME


def load_config(result_dir: Path):
    # clear previous initialization
    GlobalHydra.instance().clear()

    # initialize hydra
    config_path = result_dir / ".hydra"
    initialize(config_path=config_path.as_posix())
    # load the config
    cfg = compose(config_name="config")

    return cfg

In [2]:
cfg = load_config(RESULT_DIR)
# cfg.dir.data_dir = (
#     "/home/kuto/kaggle/kaggle-sleep-v2/data/child-mind-institute-detect-sleep-states"
# )
preds = np.load(RESULT_DIR / "preds.npy")
labels = np.load(RESULT_DIR / "labels.npy")
keys = np.load(RESULT_DIR / "keys.npy")

gt_df = pd.read_csv(Path(cfg.dir.data_dir) / "train_events.csv")
gt_df = gt_df[gt_df["series_id"].isin(cfg.split.valid_series_ids)].dropna().reset_index(drop=True)

The version_base parameter is not specified.
Please specify a compatability version level, or None.
Will assume defaults for version 1.1
  initialize(config_path=config_path.as_posix())


In [3]:
%cd {RESULT_DIR}

/home/kuto/kaggle/kaggle-sleep-v2/output/train/exp077/run2


In [20]:
cfg.post_process.score_th = 0.005
cfg.post_process.distance = 40
cfg.post_process.low_pass_filter_hour = 3

In [21]:
preds = np.load("preds.npy")
# labels = np.load("labels.npy")
keys = np.load("keys.npy")

gt_df = pd.read_csv(Path(cfg.dir.data_dir) / "train_events.csv")
gt_df = gt_df[gt_df["series_id"].isin(cfg.split.valid_series_ids)].dropna().reset_index(drop=True)

### ピーク検出による評価

In [22]:
pred_df1: pl.DataFrame = post_process_for_seg(
    keys,
    preds[:, :, [1, 2]],
    score_th=cfg.post_process.score_th,
    distance=cfg.post_process.distance,
    low_pass_filter_hour=cfg.post_process.low_pass_filter_hour,
)
score1 = event_detection_ap(gt_df, pred_df1.to_pandas())
score1

0.7651407082864914

### 睡眠検出による評価

In [7]:
pred_df2: pl.DataFrame = post_process_for_asleep(
    keys,
    preds[:, :, [0]],
    score_thr=0.5,
)
score2 = event_detection_ap(gt_df, pred_df2.to_pandas())
score2

0.6867250304362296

### 2つを組み合わせる

In [7]:
def min_max_normalize(_df, col="pred"):
    df = _df.clone()
    df = df.with_columns(
        (pl.col(col) - pl.col(col).min()) / (pl.col(col).max() - pl.col(col).min())
    )
    return df

In [9]:
# pred_df_norm1 = min_max_normalize(pred_df1, "score")
# pred_df_norm2 = min_max_normalize(pred_df2, "score")
# pred_df_norm2 = pred_df_norm2.with_columns(pl.col("score") * 0.1)

# pred_df = pl.concat([pred_df_norm1, pred_df_norm2])
# pred_df = (
#     pred_df.group_by(["series_id", "step", "event"])
#     .agg(pl.mean("score"))
#     .sort(["series_id", "step"])
# )
# event_detection_ap(gt_df, pred_df.to_pandas())

In [10]:
def objective(trial: optuna.Trial):
    score_th = trial.suggest_float("score_th", 0.1, 0.5, step=0.05)  # 0.005
    distance = 70  # trial.suggest_int("distance", 1, 200)
    hour = 3  # trial.suggest_int("hour", 1, 5)

    pred_df: pl.DataFrame = post_process_for_seg(
        keys, preds[:, :, [1, 2]], score_th=score_th, distance=distance, low_pass_filter_hour=hour
    )
    score = event_detection_ap(gt_df, pred_df.to_pandas())

    return score


study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=20)

print(study.best_params)

[I 2023-11-10 21:16:27,179] A new study created in memory with name: no-name-c04e272d-47b9-4419-bb3e-2f9c7fa5bbbd
[I 2023-11-10 21:16:30,322] Trial 0 finished with value: 0.6946748626578277 and parameters: {'score_th': 0.25}. Best is trial 0 with value: 0.6946748626578277.
[I 2023-11-10 21:16:33,262] Trial 1 finished with value: 0.6448498624812358 and parameters: {'score_th': 0.35}. Best is trial 0 with value: 0.6946748626578277.
[I 2023-11-10 21:16:36,378] Trial 2 finished with value: 0.7468694646371383 and parameters: {'score_th': 0.1}. Best is trial 2 with value: 0.7468694646371383.
[I 2023-11-10 21:16:39,415] Trial 3 finished with value: 0.6448498624812358 and parameters: {'score_th': 0.35}. Best is trial 2 with value: 0.7468694646371383.
[I 2023-11-10 21:16:42,463] Trial 4 finished with value: 0.6723935981088003 and parameters: {'score_th': 0.30000000000000004}. Best is trial 2 with value: 0.7468694646371383.
[I 2023-11-10 21:16:45,562] Trial 5 finished with value: 0.7323118398189

{'score_th': 0.1}


### 睡眠ピークと真値はどちらにずれているか？