In [1]:
import xgboost as xgb
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import yaml
import datetime
import sys
from tqdm import tqdm
import os
from tqdm import tqdm
import random
import pickle
from glob import glob
import gc
from multiprocessing import Pool, cpu_count

import warnings
warnings.filterwarnings('ignore')

sys.path.append('/kaggle/src')
from utils.xgb import fit_xgb, inference_xgb
from utils.metric import compute_comptetition_metric
from utils.feature_contena import Features
from utils.pandas_utils import reduce_mem_usage



In [2]:
config = f"""
execution:
    exp_id: exp_ensemble
    debug: False
    submit: False
    multiprocessing: True

dataset: 
    competition_dir: /kaggle/input/child-mind-institute-detect-sleep-states
    cv_split_path: /kaggle/input/cv_split/train_folds.csv
    train_base_path: /kaggle/input/train_base/train_base.csv
    step_csv_dir: /kaggle/input/save_series_csv/csvs

feature:
    agg_freq: 24 # [step]

seed: 46
"""

CFG = yaml.load(config, Loader=yaml.SafeLoader)

CFG["output_dir"] = f"/kaggle/output/{CFG['execution']['exp_id']}"
os.makedirs(CFG["output_dir"], exist_ok=True)

In [3]:
# oof = pd.read_parquet(os.path.join("/kaggle/output", os.path.join(CFG["execution"]["exp_id"], "oof.parquet")))
oof = pd.read_csv(os.path.join("/kaggle/output", os.path.join(CFG["execution"]["exp_id"], "oof.csv")))
sub = pd.read_csv(os.path.join("/kaggle/output", os.path.join(CFG["execution"]["exp_id"], "submission.csv")))
display(oof.head())

Unnamed: 0,series_id,step,wakeup_oof,onset_oof
0,038441c925bb,6,0.006173,0.015106
1,038441c925bb,18,0.008835,0.016535
2,038441c925bb,30,0.010534,0.016725
3,038441c925bb,42,0.00584,0.015388
4,038441c925bb,54,0.003448,0.017686


In [6]:
labels = pd.read_csv(f"{CFG['dataset']['competition_dir']}/train_events.csv").dropna()
labels.head()

Unnamed: 0,series_id,night,event,step,timestamp
0,038441c925bb,1,onset,4992.0,2018-08-14T22:26:00-0400
1,038441c925bb,1,wakeup,10932.0,2018-08-15T06:41:00-0400
2,038441c925bb,2,onset,20244.0,2018-08-15T19:37:00-0400
3,038441c925bb,2,wakeup,27492.0,2018-08-16T05:41:00-0400
4,038441c925bb,3,onset,39996.0,2018-08-16T23:03:00-0400


In [8]:
for sid, oof_df in tqdm(oof.groupby("series_id")):
    sub_df = sub[sub["series_id"] == sid].sort_values("step").reset_index(drop=True)
    label_df = labels[labels["series_id"] == sid].sort_values("step").reset_index(drop=True)
    sensor_df = pd.read_csv(os.path.join(CFG["dataset"]["step_csv_dir"], f"{sid}.csv"))

    fig, axs = plt.subplots(3, 1, figsize=(20, 10))

    axs[0].plot(oof_df["step"], oof_df["wakeup_oof"], label="wakup")    
    axs[0].plot(oof_df["step"], oof_df["onset_oof"], label="onset")
    ax2 = axs[0].twinx()
    for night, night_df in label_df.groupby("night"):
        ax2.axvspan(night_df["step"].min(), night_df["step"].max(), color="red", alpha=0.1)    
    axs[0].scatter(sub_df["step"], sub_df["score"], color="red", s=7)
    switch_time = sensor_df[sensor_df["timestamp"].str[11:19] == "12:00:00"]["step"].values
    for switch in switch_time:
        axs[0].axvline(switch, color="black", alpha=0.5, ls="--")
    axs[0].grid()

    axs[1].plot(sensor_df["step"], sensor_df["anglez"],)
    axs[1].set_ylabel("anglez")
    ax2 = axs[1].twinx()
    for night, night_df in label_df.groupby("night"):
        ax2.axvspan(night_df["step"].min(), night_df["step"].max(), color="red", alpha=0.1)
    axs[1].grid()

    axs[2].plot(sensor_df["step"], sensor_df["enmo"],)
    axs[2].set_ylabel("enmo")
    ax2 = axs[2].twinx()
    for night, night_df in label_df.groupby("night"):
        ax2.axvspan(night_df["step"].min(), night_df["step"].max(), color="red", alpha=0.1)
    axs[2].grid()

    path = f"/kaggle/output/{CFG['execution']['exp_id']}/all_days/{sid}.png"
    os.makedirs(os.path.dirname(path), exist_ok=True)
    plt.suptitle(f"sid: {sid}")
    plt.tight_layout()
    plt.savefig(path)
    plt.close()
    # plt.show()        
    # break

100%|██████████| 277/277 [06:23<00:00,  1.38s/it]
