In [58]:
from pathlib import Path

import jupyter_black
import numpy as np
import optuna
import pandas as pd
import polars as pl
import seaborn as sns
from hydra import compose
from hydra import initialize
from hydra.core.global_hydra import GlobalHydra

from src.utils.metrics import event_detection_ap
from src.utils.post_process import post_process_for_asleep
from src.utils.post_process import post_process_for_asleep_and_event
from src.utils.post_process import post_process_for_seg

jupyter_black.load()
# plt.style.use("ggplot")
sns.set()

%load_ext autoreload
%autoreload 2


def load_config(result_dir: Path):
    # clear previous initialization
    GlobalHydra.instance().clear()

    # initialize hydra
    config_path = result_dir / ".hydra"
    initialize(config_path=config_path.as_posix())
    # load the config
    cfg = compose(config_name="config")

    return cfg

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [141]:
EXP_NAME = "exp090"
RUN_NAMES = ["run0", "run1", "run2", "run3", "run4"]
all_preds = []
all_keys = []
all_labels = []
for run_name in RUN_NAMES:
    RESULT_DIR = Path("../output/train") / EXP_NAME / run_name
    cfg = load_config(RESULT_DIR)
    preds = np.load(RESULT_DIR / "preds.npy")
    labels = np.load(RESULT_DIR / "labels.npy")
    keys = np.load(RESULT_DIR / "keys.npy")
    all_preds.append(preds)
    all_keys.append(keys)
    all_labels.append(labels)

all_preds = np.concatenate(all_preds)
all_keys = np.concatenate(all_keys)
all_labels = np.concatenate(all_labels)
gt_df = pd.read_csv(Path(cfg.dir.data_dir) / "train_events.csv").dropna().reset_index(drop=True)
# gt_df = gt_df[gt_df["series_id"].isin(cfg.split.valid_series_ids)].dropna().reset_index(drop=True)

The version_base parameter is not specified.
Please specify a compatability version level, or None.
Will assume defaults for version 1.1
  initialize(config_path=config_path.as_posix())


In [136]:
cfg.post_process.score_th = 0.005
cfg.post_process.distance = 40
cfg.post_process.low_pass_filter_hour = 3

### ピーク検出による評価

In [137]:
pred_df1: pl.DataFrame = post_process_for_seg(
    all_keys,
    all_preds[:, :, [1, 2]],
    score_th=cfg.post_process.score_th,
    distance=cfg.post_process.distance,
    low_pass_filter_hour=cfg.post_process.low_pass_filter_hour,
)
score1 = event_detection_ap(gt_df, pred_df1.to_pandas())
score1

0.7532104941183857

### 睡眠検出による評価

In [138]:
cfg.post_process.score_th = 0.005
cfg.post_process.distance = 40
cfg.post_process.low_pass_filter_hour = 3

In [139]:
# exp088: 0.7321 window=100
# exp090: 0.6915 window=300

pred_df2: pl.DataFrame = post_process_for_asleep(
    all_keys,
    all_preds[:, :, [0]],
    score_th=cfg.post_process.score_th,
    distance=cfg.post_process.distance,
    window_size=500,
    low_pass_filter_hour=cfg.post_process.low_pass_filter_hour,
)
score2 = event_detection_ap(gt_df, pred_df2.to_pandas())
score2

0.692863085117281

### 2つを組み合わせる

In [140]:
# 0.5: 0.7405259650258358

pred_df3: pl.DataFrame = post_process_for_asleep_and_event(
    all_keys,
    all_preds,
    score_th=cfg.post_process.score_th,
    distance=cfg.post_process.distance,
    window_size=500,
    low_pass_filter_hour=cfg.post_process.low_pass_filter_hour,
    event_weight=0.7,
)
score3 = event_detection_ap(gt_df, pred_df3.to_pandas())
score3

0.759687383885687

In [67]:
def objective(trial: optuna.Trial):
    score_th = trial.suggest_float("score_th", 0.001, 0.01, step=0.001)  # 0.005
    # distance = trial.suggest_int("distance", 10, 100, step=10)
    hour = trial.suggest_int("hour", 1, 5)
    # window_size = trial.suggest_int("window_size", 100, 500, step=50)
    # weight = trial.suggest_float("weight", 0.1, 0.9, step=0.1)
   #  weights = [weight, 1 - weight]
    pred_df: pl.DataFrame = post_process_for_asleep_and_event(
        all_keys,
        all_preds,
        score_th=score_th,
        distance=40,
        window_size=500,
        low_pass_filter_hour=hour,
        event_weight=0.7,
    )
    score = event_detection_ap(gt_df, pred_df.to_pandas())

    return score


study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=100)

print(study.best_params)

[I 2023-11-19 21:50:09,197] A new study created in memory with name: no-name-82fdd53b-c75e-4c13-b620-5512b05bbf02
[I 2023-11-19 21:50:50,766] Trial 0 finished with value: 0.7297852837260924 and parameters: {'score_th': 0.009000000000000001, 'distance': 90, 'hour': 3, 'window_size': 400, 'weight': 0.2}. Best is trial 0 with value: 0.7297852837260924.
[I 2023-11-19 21:51:32,603] Trial 1 finished with value: 0.7342988769483512 and parameters: {'score_th': 0.005, 'distance': 60, 'hour': 4, 'window_size': 350, 'weight': 0.30000000000000004}. Best is trial 1 with value: 0.7342988769483512.
[I 2023-11-19 21:52:15,900] Trial 2 finished with value: 0.7356610451333877 and parameters: {'score_th': 0.002, 'distance': 20, 'hour': 2, 'window_size': 250, 'weight': 0.30000000000000004}. Best is trial 2 with value: 0.7356610451333877.
[I 2023-11-19 21:52:56,869] Trial 3 finished with value: 0.7370692417569704 and parameters: {'score_th': 0.01, 'distance': 70, 'hour': 5, 'window_size': 300, 'weight': 0.

KeyboardInterrupt: 

In [68]:
print(study.best_params)

{'score_th': 0.001, 'distance': 60, 'hour': 5, 'window_size': 450, 'weight': 0.9}


{'score_th': 0.001, 'distance': 60, 'hour': 5, 'window_size': 500, 'weight': 0.9}. Best is trial 49 with value: 0.7476289447484639.