In [1]:
from pathlib import Path

import jupyter_black
import numpy as np
import optuna
import pandas as pd
import polars as pl
import seaborn as sns
from hydra import compose
from hydra import initialize
from hydra.core.global_hydra import GlobalHydra

from src.utils.metrics import event_detection_ap
from src.utils.post_process import post_process_for_asleep_and_event
from src.utils.post_process import post_process_for_seg

jupyter_black.load()
# plt.style.use("ggplot")
sns.set()

%load_ext autoreload
%autoreload 2


def load_config(result_dir: Path):
    # clear previous initialization
    GlobalHydra.instance().clear()

    # initialize hydra
    config_path = result_dir / ".hydra"
    initialize(config_path=config_path.as_posix())
    # load the config
    cfg = compose(config_name="config")

    return cfg

In [2]:
EXP_NAME = "exp125"
RUN_NAMES = ["run0", "run1", "run2", "run3", "run4"]
all_preds = []
all_keys = []
all_labels = []
for run_name in RUN_NAMES:
    RESULT_DIR = Path("../output/train") / EXP_NAME / run_name
    cfg = load_config(RESULT_DIR)
    preds = np.load(RESULT_DIR / "preds.npy")
    labels = np.load(RESULT_DIR / "labels.npy")
    keys = np.load(RESULT_DIR / "keys.npy")
    all_preds.append(preds)
    all_keys.append(keys)
    all_labels.append(labels)

all_preds = np.concatenate(all_preds)
all_keys = np.concatenate(all_keys)
all_labels = np.concatenate(all_labels)
gt_df = pd.read_csv(Path(cfg.dir.data_dir) / "train_events.csv").dropna().reset_index(drop=True)

The version_base parameter is not specified.
Please specify a compatability version level, or None.
Will assume defaults for version 1.1
  initialize(config_path=config_path.as_posix())
The version_base parameter is not specified.
Please specify a compatability version level, or None.
Will assume defaults for version 1.1
  initialize(config_path=config_path.as_posix())


In [29]:
cfg.post_process.score_th = 0.001
cfg.post_process.distance = 70
cfg.post_process.low_pass_filter_hour = 5

### ピーク検出による評価

In [31]:
pred_df1: pl.DataFrame = post_process_for_seg(
    all_keys,
    all_preds[:, :, [1, 2]],
    score_th=cfg.post_process.score_th,
    distance=cfg.post_process.distance,
    low_pass_filter_hour=cfg.post_process.low_pass_filter_hour,
)
score1 = event_detection_ap(
    gt_df.query(f'series_id in {pred_df1["series_id"].unique().to_list()}'), pred_df1.to_pandas()
)
score1

0.7862887979283817

In [None]:
0.7862887979283817

0.785036

### 睡眠検出による評価

In [5]:
# cfg.post_process.score_th = 0.005
# cfg.post_process.distance = 40
# cfg.post_process.low_pass_filter_hour = 3

In [7]:
# pred_df2: pl.DataFrame = post_process_for_asleep(
#     all_keys,
#     all_preds[:, :, [0]],
#     score_th=cfg.post_process.score_th,
#     distance=cfg.post_process.distance,
#     window_size=500,
#     low_pass_filter_hour=cfg.post_process.low_pass_filter_hour,
# )
# score2 = event_detection_ap(gt_df, pred_df2.to_pandas())
# score2

### sleepをeventに変換して評価

In [6]:
from src.utils.post_process import post_process_asleep_to_event_v2

window_size = 500
all_preds2 = post_process_asleep_to_event_v2(all_preds, all_keys, window_size)
pred_df1: pl.DataFrame = post_process_for_seg(
    all_keys,
    all_preds2[:, :, [1, 2]],
    score_th=cfg.post_process.score_th,
    distance=cfg.post_process.distance,
    low_pass_filter_hour=cfg.post_process.low_pass_filter_hour,
)
score3 = event_detection_ap(gt_df, pred_df1.to_pandas())
score3

0.7156882860995255

### 2つを組み合わせる

In [6]:
# pred_df3: pl.DataFrame = post_process_for_asleep_and_event(
#     all_keys,
#     all_preds,
#     score_th=cfg.post_process.score_th,
#     distance=cfg.post_process.distance,
#     window_size=500,
#     low_pass_filter_hour=cfg.post_process.low_pass_filter_hour,
#     event_weight=0.8,
# )
# score3 = event_detection_ap(gt_df, pred_df3.to_pandas())
# score3

### 予測値のみ統合して通常の後処理をかける（アンサンブルしやすくするため)

In [7]:
from src.utils.post_process import post_process_asleep_to_event

window_size = 500
event_weight = 0.7
all_preds2 = post_process_asleep_to_event(all_preds, all_keys, window_size, event_weight)

In [8]:
pred_df4: pl.DataFrame = post_process_for_seg(
    all_keys,
    all_preds2[:, :, [1, 2]],
    score_th=cfg.post_process.score_th,
    distance=cfg.post_process.distance,
    low_pass_filter_hour=cfg.post_process.low_pass_filter_hour,
)
score4 = event_detection_ap(gt_df, pred_df4.to_pandas())
score4

0.7695211646297653

### チューニング

In [None]:
def objective(trial: optuna.Trial):
    score_th = trial.suggest_float("score_th", 0.001, 0.01, step=0.001)  # 0.005
    # distance = trial.suggest_int("distance", 10, 100, step=10)
    hour = trial.suggest_int("hour", 1, 5)
    # window_size = trial.suggest_int("window_size", 100, 500, step=50)
    # weight = trial.suggest_float("weight", 0.1, 0.9, step=0.1)
   #  weights = [weight, 1 - weight]
    pred_df: pl.DataFrame = post_process_for_asleep_and_event(
        all_keys,
        all_preds,
        score_th=score_th,
        distance=40,
        window_size=500,
        low_pass_filter_hour=hour,
        event_weight=0.7,
    )
    score = event_detection_ap(gt_df, pred_df.to_pandas())

    return score


study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=100)

print(study.best_params)

In [None]:
print(study.best_params)

{'score_th': 0.001, 'distance': 60, 'hour': 5, 'window_size': 500, 'weight': 0.9}. Best is trial 49 with value: 0.7476289447484639.