In [1]:
import xgboost as xgb
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import yaml
import datetime
from cuml import ForestInference
import sys
from tqdm import tqdm
import os
import random

import warnings
warnings.filterwarnings('ignore')

sys.path.append('/kaggle')
from utils.xgb import fit_xgb, inference_xgb
from utils.metric import compute_comptetition_metric

In [3]:
config = f"""
execution:
    debug: False
    submit: False

dataset: 
    competition_dir: /kaggle/input/child-mind-institute-detect-sleep-states
    cv_split_path: /kaggle/input/cv_split/train_folds.csv
    train_base_path: /kaggle/input/train_base/train_base.csv
    day_csv_dir: /kaggle/input/save_day_csv/day_csvs

xgboost:    
    objective: "reg:squarederror"
    learning_rate: 0.01  # 0.01で固定。学習時間とのトレードオフ
    reg_alpha: 0.02  # L1正則化。0.1が推奨。
    reg_lambda: 0.2  # L2正則化。0.1が推奨
    random_state: 42
    max_depth: 5  # 3-8。7くらいでいい。
    colsample_bytree: 0.7  # カラムが多い時は少なめ(0.4とか)にする。
    
seed: 46
"""

CFG = yaml.load(config, Loader=yaml.SafeLoader)

In [4]:
# base df
train_base = pd.read_csv(CFG['dataset']['train_base_path'])
train_base["start_time"] = pd.to_datetime(train_base["start_time"], utc=True)
train_base["end_time"] = pd.to_datetime(train_base["end_time"], utc=True)

In [7]:
train_base["series_id"].unique()[:10]

array(['038441c925bb', '03d92c9f6f8a', '0402a003dae9', '04f547b8017d',
       '05e1944c3818', '062cae666e2a', '062dbd4c95e6', '08db4255286f',
       '0a96f4993bd7', '0cd1e3d0ed95'], dtype=object)

In [11]:
train_base[train_base["series_id"]=="05e1944c3818"]

Unnamed: 0,series_id,start_time,end_time,target_type,target_step,target_timestamp,sample_id,target
129,05e1944c3818,2018-11-15 23:00:00+00:00,2018-11-16 22:59:59+00:00,wakeup,,,129,
130,05e1944c3818,2018-11-16 23:00:00+00:00,2018-11-17 22:59:59+00:00,wakeup,,,130,
131,05e1944c3818,2018-11-17 23:00:00+00:00,2018-11-18 22:59:59+00:00,wakeup,27360.0,2018-11-18 13:00:00+00:00,131,10080.0
132,05e1944c3818,2018-11-18 23:00:00+00:00,2018-11-19 22:59:59+00:00,wakeup,45408.0,2018-11-19 14:04:00+00:00,132,10848.0
133,05e1944c3818,2018-11-19 23:00:00+00:00,2018-11-20 22:59:59+00:00,wakeup,61272.0,2018-11-20 12:06:00+00:00,133,9432.0
134,05e1944c3818,2018-11-20 23:00:00+00:00,2018-11-21 22:59:59+00:00,wakeup,78480.0,2018-11-21 12:00:00+00:00,134,9360.0
135,05e1944c3818,2018-11-21 23:00:00+00:00,2018-11-22 22:59:59+00:00,wakeup,95760.0,2018-11-22 12:00:00+00:00,135,9360.0
136,05e1944c3818,2018-11-22 23:00:00+00:00,2018-11-23 22:59:59+00:00,wakeup,113220.0,2018-11-23 12:15:00+00:00,136,9540.0
137,05e1944c3818,2018-11-23 23:00:00+00:00,2018-11-24 22:59:59+00:00,wakeup,131352.0,2018-11-24 13:26:00+00:00,137,10392.0
138,05e1944c3818,2018-11-24 23:00:00+00:00,2018-11-25 22:59:59+00:00,wakeup,,,138,
