In [1]:
### To be required once per session

!pip install --upgrade -q numpy numba polars lightgbm tensorflow-addons


[0m

In [2]:
from joblib import Parallel, delayed
import polars as pl
import numpy as np
from tqdm import tqdm
import gc
import gc
import os
import json
import sys
from sklearn.model_selection import KFold
sys.path.append('..')
import tensorflow as tf
import tensorflow_addons as tfa
import numba as nb
import lightgbm as lgb
import sklearn as sk
import pandas as pd

2023-12-16 06:03:00.761977: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.

TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 

 The versions of TensorFlow you are currently using is 2.10.0 and is not supported. 
Some things might work, some things might not.
If you were to encounter a bug, do not file an issue.
If you want to make sure you're using a tested and supp

In [3]:
packages = ['numpy', 'pandas', 'numba', 'polars', 'lightgbm', 'tensorflow', 'tensorflow_addons', 'scikit-learn']
versions = [np.__version__, pd.__version__, nb.__version__, pl.__version__, lgb.__version__, tf.__version__, tfa.__version__, sk.__version__]

for package, version in zip(packages, versions):
    print(f'{package}=={version}')
    # print(f'{package}=={versions.pop(0)}')
    # print(f'{package}=={versions.pop(0)}')

numpy==1.24.4
pandas==1.4.3
numba==0.58.1
polars==0.19.19
lightgbm==4.1.0
tensorflow==2.10.0
tensorflow_addons==0.21.0
scikit-learn==0.24.2


In [4]:
import sys
print(sys.version)

3.8.10 (default, Jun 22 2022, 20:18:18) 
[GCC 9.4.0]


In [5]:
!nvidia-smi

Sat Dec 16 06:03:03 2023       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.54.03              Driver Version: 535.54.03    CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA RTX 6000 Ada Gene...    On  | 00000000:23:00.0 Off |                  Off |
| 30%   39C    P8              26W / 300W |      3MiB / 49140MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [6]:
np.__version__
nb.__version__
pl.__version__
tf.__version__

'2.10.0'

In [7]:
class cfg:
    samp_freq=1
    gaussian_overlay_len = 60
    std_dev_num = 2400

In [8]:
settings_json = json.load(open('../settings.json', 'r'))
print(settings_json)

for k,v in settings_json.items():
    setattr(cfg, k, v)
    
print(cfg.__dict__)

{'train_events_path': '../data/train_events.csv', 'train_series_path': '../data/train_series.parquet', 'processed_data_path': '../data_processed_models', 'output_dir': '../outputs'}
{'__module__': '__main__', 'samp_freq': 1, 'gaussian_overlay_len': 60, 'std_dev_num': 2400, '__dict__': <attribute '__dict__' of 'cfg' objects>, '__weakref__': <attribute '__weakref__' of 'cfg' objects>, '__doc__': None, 'train_events_path': '../data/train_events.csv', 'train_series_path': '../data/train_series.parquet', 'processed_data_path': '../data_processed_models', 'output_dir': '../outputs'}


In [9]:
cfg.__dict__

mappingproxy({'__module__': '__main__',
              'samp_freq': 1,
              'gaussian_overlay_len': 60,
              'std_dev_num': 2400,
              '__dict__': <attribute '__dict__' of 'cfg' objects>,
              '__weakref__': <attribute '__weakref__' of 'cfg' objects>,
              '__doc__': None,
              'train_events_path': '../data/train_events.csv',
              'train_series_path': '../data/train_series.parquet',
              'processed_data_path': '../data_processed_models',
              'output_dir': '../outputs'})

In [10]:
# Importing data 
# Column transformations
dt_transforms = [
    pl.col('timestamp').str.strptime(pl.Datetime, format='%Y-%m-%dT%H:%M:%S%Z'), 
]

data_transforms = [
    pl.col('anglez').cast(pl.Int16), # Casting anglez to 16 bit integer
    (pl.col('enmo')*1000).cast(pl.UInt16), # Convert enmo to 16 bit uint
]

In [11]:
def transform(df, night_offset=20):
    return (
        df.with_columns(
            [
                pl.col("timestamp")
                .str.strptime(pl.Datetime, format="%Y-%m-%dT%H:%M:%S%Z")
                .alias("timestamp"),
            ]
        )
        .with_columns(
            [
                (pl.col("timestamp").dt.year() - 2000).cast(pl.Int8).alias("year"),
                pl.col("timestamp").dt.month().cast(pl.Int8).alias("month"),
                pl.col("timestamp").dt.day().cast(pl.Int8).alias("day"),
                pl.col("timestamp").dt.hour().cast(pl.Int8).alias("hour"),
            ]
        )
        .with_columns(  # 正午をまたいで日付を調整
            pl.when(pl.col("hour") < night_offset)
            .then(pl.col("timestamp"))
            .otherwise(pl.col("timestamp") + pl.duration(days=1))
            .dt.date()
            .alias("night_group"),
        )
        .with_columns(
            [
                (
                    pl.col("series_id")
                    + pl.lit("_")
                    + pl.col("night_group").cast(pl.Datetime).dt.strftime("%Y%m%d")
                ).alias("group_id"),
            ]
        )
        .with_columns(
            [
                pl.col("timestamp").cumcount().over("group_id").alias("norm_step"),
            ]
        )
        .drop(["night_group"])
    )

def transform_series(df):
    return transform(df).with_columns(
        [
            (pl.col("enmo") == 0).alias("is_enmo_clipped"),
        ]
    )

In [12]:
def add_feature(
    df,
    group_col="series_id",
    day_group_col="group_id",
    term1=(5 * 60) // 5,
    term2=(30 * 60) // 5,
    term3=(60 * 60) // 5,
    min_threshold=0.005,
    max_threshold=0.04,
    center=True,
):
    return (
        df.with_columns(
            [
                pl.col("anglez").diff(1).abs().over(group_col).alias("anglez_diff"),
                pl.col("enmo").diff(1).abs().over(group_col).alias("enmo_diff"),
            ]
        )
        .with_columns(
            [
                pl.col("anglez_diff")
                .rolling_median(term1, center=center)  # 5 min window
                .over(group_col)
                .alias("anglez_diff_median_5min"),
                pl.col("enmo_diff")
                .rolling_median(term1, center=center)  # 5 min window
                .over(group_col)
                .alias("enmo_diff_median_5min"),
            ]
        )
        .with_columns(
            [
                pl.col("anglez_diff_median_5min")
                .quantile(0.1)
                .clip(min_threshold, max_threshold)
                .over(day_group_col)
                .alias("critical_threshold")
            ]
        )
        .with_columns(
            [
                (pl.col("anglez_diff_median_5min") < pl.col("critical_threshold") * 15)
                .over(group_col)
                .alias("is_static")
            ]
        )
        .with_columns(
            [
                pl.col("is_static")
                .cast(pl.Int32)
                .rolling_sum(term2, center=center)
                .over(group_col)
                .alias("is_static_sum_30min"),
            ]
        )
        .with_columns(
            [(pl.col("is_static_sum_30min") == ((30 * 60) // 5)).over(group_col).alias("tmp")]
        )
        .with_columns(
            [
                pl.col("tmp").shift(term2 // 2).over(group_col).alias("tmp_left"),
                pl.col("tmp").shift(-(term2 // 2)).over(group_col).alias("tmp_right"),
            ]
        )
        .with_columns(
            [
                (pl.col("tmp_left") | pl.col("tmp_right")).alias("is_sleep_block"),
            ]
        )
        .drop(["tmp", "tmp_left", "tmp_right"])
        .with_columns([pl.col("is_sleep_block").not_().alias("is_gap")])
        .with_columns(
            [
                pl.col("is_gap")
                .cast(pl.Int32)
                .rolling_sum(term3, center=center)
                .over(group_col)
                .alias("gap_length")
            ]
        )
        .with_columns([(pl.col("gap_length") == term3).over(group_col).alias("tmp")])
        .with_columns(
            [
                pl.col("tmp").shift(term3 // 2).over(group_col).alias("tmp_left"),
                pl.col("tmp").shift(-(term3 // 2)).over(group_col).alias("tmp_right"),
            ]
        )
        .with_columns(
            [
                (pl.col("tmp_left") | pl.col("tmp_right")).alias("is_large_gap"),
            ]
        )
        .drop(["tmp", "tmp_left", "tmp_right"])
        .with_columns([pl.col("is_large_gap").not_().alias("is_sleep_episode")])
        #
        # extract longest sleep episode
        #
        .with_columns(
            [
                # extract false->true transition
                (
                    (
                        pl.col("is_sleep_episode")
                        & pl.col("is_sleep_episode")
                        .shift_and_fill(pl.lit(False), periods=1)
                        .not_()
                    )
                    .cumsum()
                    .over("group_id")
                ).alias("sleep_episode_id")
            ]
        )
        .with_columns(
            [
                pl.col("is_sleep_episode")
                .sum()
                .over(["group_id", "sleep_episode_id"])
                .alias("sleep_episode_length")
            ]
        )
        .with_columns(
            [
                pl.col("sleep_episode_length")
                .max()
                .over(["group_id"])
                .alias("max_sleep_episode_length")
            ]
        )
        .with_columns(
            [
                (
                    pl.col("is_sleep_episode")
                    & (pl.col("sleep_episode_length") == pl.col("max_sleep_episode_length"))
                ).alias("is_longest_sleep_episode")
            ]
        )
    )

In [13]:
%%time
train_events = pl.read_csv(cfg.train_events_path).with_columns(dt_transforms)
train_series = pl.read_parquet(cfg.train_series_path)

CPU times: user 8.39 s, sys: 16.1 s, total: 24.5 s
Wall time: 5.76 s


In [14]:
%%time 
train_series = transform_series(train_series)
train_series = add_feature(train_series)

  pl.col("timestamp").cumcount().over("group_id").alias("norm_step"),
  & pl.col("is_sleep_episode")
  & pl.col("is_sleep_episode")
  (


CPU times: user 5min 38s, sys: 1min 39s, total: 7min 18s
Wall time: 2min 3s


In [15]:
train_series = train_series[['series_id', 'step', 'timestamp', 'anglez', 'enmo',
                             'is_longest_sleep_episode', 'is_sleep_episode', 'is_large_gap',
                             'is_gap', 'is_sleep_block', 'is_static']]

In [16]:
train_series = train_series.with_columns([pl.col(c).fill_null(False) for c in train_series.columns if c != 'series_id'])

In [17]:
all_series_ids = np.unique(train_events['series_id'].unique())

In [18]:
use_series = train_events.drop_nulls()['series_id'].unique()
print(f'Using a total of {len(use_series)} series')

train_series = train_series.filter(pl.col('series_id').is_in(use_series))
train_events = train_events.filter(pl.col('series_id').is_in(use_series))

print(train_series['series_id'].n_unique(), train_events['series_id'].n_unique())

Using a total of 269 series
269 269


In [19]:
train_series = train_series.with_columns([pl.col('timestamp').dt.date().alias('date')])
train_series = train_series.with_columns(pl.col('step').cast(pl.Int64))

train_events = train_events.with_columns([pl.col('timestamp').dt.date().alias('date')])
train_events = train_events.drop_nulls()

In [20]:
train_events_pd = train_events.to_pandas()

In [21]:
fltr_series = train_events_pd[train_events_pd['event'] == 'onset'].groupby('series_id').size() == train_events_pd[train_events_pd['event'] == 'wakeup'].groupby('series_id').size()
fltr_series.sum()

264

In [22]:
use_series = fltr_series[fltr_series == True].index.tolist()
len(use_series)

264

In [23]:
print(f'Using a total of {len(use_series)} series')

train_series = train_series.filter(pl.col('series_id').is_in(use_series))
train_events = train_events.filter(pl.col('series_id').is_in(use_series))

print(train_series['series_id'].n_unique(), train_events['series_id'].n_unique())

Using a total of 264 series


264 264


In [24]:
def rolling_nunique(arr, window=5):
    n = len(arr)
    nunique = np.zeros_like(arr)
    if window > n:
        return nunique
    
    # Initialize the dictionary with the first window
    window_counts = {}
    for i in arr[:window]:
        window_counts[i] = window_counts.get(i, 0) + 1

    # Set the unique count for the first window
    nunique[window-1] = len(window_counts)

    for i in range(window, n):
        # Element leaving the window
        leaving = arr[i - window]
        window_counts[leaving] -= 1
        if window_counts[leaving] == 0:
            del window_counts[leaving]

        # Element entering the window
        entering = arr[i]
        window_counts[entering] = window_counts.get(entering, 0) + 1

        # Update the unique count
        nunique[i] = len(window_counts)

    return nunique

In [25]:
def feat_eng(df, add_target=False):
    
    df = df.with_columns([pl.col('timestamp').dt.hour().alias('hour'),
                                         (pl.col('timestamp').dt.hour() * 60 + pl.col('timestamp').dt.minute()).alias('hour_minute'),
                                          (pl.col('timestamp').dt.hour()*60*60 + pl.col('timestamp').dt.minute()*60 + pl.col('timestamp').dt.second()).alias('hms'),
                                          pl.col('timestamp').dt.weekday().alias('dayofweek'),
                                          pl.col('enmo').diff().fill_null(0).alias('enmo_diff'),
                                          pl.col('anglez').diff().abs().alias('anglez_diff').fill_null(0),
                                          (pl.col('step') % (24*60*12)).alias('step_mod')]).with_columns([\
    ((pl.col('anglez_diff') > 5)*1.0).alias('large_anglez_diff'),
     pl.col('enmo').mean().over(['hour']).alias('enmo_mean_hour'),
    pl.col('enmo').std().over(['hour']).fill_null(0).alias('enmo_std_hour'),
    pl.col('enmo').std().over(['hms']).fill_null(0).alias('anglez_std_stepmod'),
    ((pl.col('enmo') == pl.col('enmo').median())*1).over(['hms']).alias('enmo_eq_median_stepmod'),
    ((pl.col('enmo') == pl.col('enmo').mean())*1).over(['hms']).alias('enmo_eq_stepmod')
    ]).with_columns([
        pl.col('enmo_eq_stepmod').sum().over(['hms']).alias('enmo_eq_cnt_stepmod'),
        pl.col('enmo_eq_stepmod').mean().over(['hms']).alias('enmo_eq_pct_stepmod')
    ]).with_columns([
        pl.col('enmo_eq_pct_stepmod').diff().fill_null(0).alias('enmo_eq_pct_stepmod_diff'),
    ])
    
    windows = [12, 60]
    
    df = df.with_columns([pl.Series(rolling_nunique(df['anglez'].to_numpy().astype('int'), window=window)).alias(f'rolling_nunique_anglez_win_{window}')\
                            for window in windows])
    
    aggs_col_wise = {
        'enmo': ['mean', 'std'],
        'anglez_diff': ['max'],
        'large_anglez_diff': ['max', 'mean']
    }

    df = df.with_columns([getattr(pl.col(col), f'rolling_{agg}')(window_size=win_size, min_periods=1, center=True)\
        .alias(f'{col}_rolling_{agg}_win_{win_size}')\
        for col in aggs_col_wise.keys() for agg in aggs_col_wise[col] for win_size in windows])
    
    ### Rolling range by calculating max - min
    
    df = df.with_columns([(pl.col('anglez').rolling_max(window_size=win_size, min_periods=1, center=True) -\
        pl.col('anglez').rolling_min(window_size=win_size, min_periods=1, center=True)).fill_null(0).alias(f'anglez_rolling_range_win_{win_size}')\
            for win_size in windows])

    df = df.sort(by='step', descending=False)

    if add_target == True:
        df = df.join(train_events[['series_id', 'event', 'step']], on=['series_id', 'step'], how='left')
        df = df.with_columns(pl.col('event').replace({'onset': 1, 'wakeup': 0}, default=None).fill_null(0).alias('is_onset'))
        df = df.with_columns(pl.col('event').replace({'onset': 0, 'wakeup': 1}, default=None).fill_null(0).alias('is_wakeup'))

    return df

In [26]:
partitions = train_series.partition_by(by='series_id', maintain_order=True)

def process_partition(df):
    df = feat_eng(df, add_target=True)
    return df

# Parallel execution
results = Parallel(n_jobs=64)(delayed(process_partition)(df) for df in tqdm(partitions))

100%|██████████| 264/264 [01:06<00:00,  3.94it/s]


In [27]:
train_series = pl.concat(results)
del partitions, results
_ = gc.collect()

In [28]:
norm_cols = ['enmo', 'hour', 'anglez', 'dayofweek', 'enmo_diff', 'anglez_diff',
             'anglez_std_stepmod', 'enmo_eq_stepmod',  'enmo_eq_pct_stepmod','enmo_eq_pct_stepmod_diff', 'large_anglez_diff',
             'enmo_eq_median_stepmod',
             'is_longest_sleep_episode', 'is_sleep_episode', 'is_large_gap', 'is_gap', 'is_sleep_block', 'is_static'
            ] + [c for c in train_series.columns if 'rolling' in c]


In [29]:
is_added_cols = ['is_longest_sleep_episode', 'is_sleep_episode', 'is_large_gap', 'is_gap', 'is_sleep_block', 'is_static']

train_series = train_series.with_columns([(pl.col(c)*1).alias(c) for c in is_added_cols])
train_series = train_series.drop(['event_dup', 'timestamp', 'date', 'event'])
_ = gc.collect()

In [30]:
def resample_arr_1d(arr, samp_freq=12, samp_method='mean'):
    
    if len(arr) % samp_freq != 0:
        n_rem = samp_freq - len(arr) % samp_freq
        arr = np.concatenate([arr, np.zeros(n_rem)])
    
    arr_samp = arr.reshape(-1, samp_freq)
    
    if samp_method == 'last':
        arr_samp = arr_samp[:, -1]
    elif samp_method in ['argmax', 'argmin']:
        arr_samp =getattr(arr_samp, samp_method)(axis=1)/samp_freq
    
    elif samp_method == 'first':
        arr_samp = arr_samp[:, 0]
        
    elif samp_method == 'aux':
        argmax_idx = np.argmax(arr_samp, axis=1)
        return np.where(argmax_idx > samp_freq//2, 0, 1)
        
    else:
        arr_samp = getattr(arr_samp, samp_method)(axis=1)

    return arr_samp

In [31]:
def gaussian(position, length, sigma):
    """Generate a Gaussian curve centered at 'position' with a given 'sigma'."""
    x = np.arange(0, length)
    return 1 / (sigma * np.sqrt(2*np.pi)) * np.exp(-(x - position)**2 / (2*sigma**2))

def get_y_gaussian(series_df):
    
    onset_steps = series_df.filter(pl.col('is_onset') == 1)['step'].to_numpy()
    wakeup_steps = series_df.filter(pl.col('is_wakeup') == 1)['step'].to_numpy()

    assert len(onset_steps) == len(wakeup_steps)

    y_sub = np.zeros((len(series_df), 2))

    for onset_step, wakeup_step in zip(onset_steps, wakeup_steps):

        ### Onsets
        s_onset, e_onset = max(0, onset_step-cfg.gaussian_overlay_len//2), onset_step+cfg.gaussian_overlay_len//2+1

        y_sub[s_onset: e_onset, 0] = gaussian(position=cfg.gaussian_overlay_len//2,
                                              length=cfg.gaussian_overlay_len+1,
                                              sigma=cfg.std_dev_num/cfg.samp_freq)[s_onset-(onset_step-cfg.gaussian_overlay_len//2):]

        ### Wakeups
        s_wakeup, e_wakeup = wakeup_step-cfg.gaussian_overlay_len//2, min(len(y_sub), wakeup_step+cfg.gaussian_overlay_len//2+1)
        
        
        y_sub[s_wakeup: e_wakeup:, 1] = gaussian(position=cfg.gaussian_overlay_len//2,
                                                 length=cfg.gaussian_overlay_len+1,
                                                 sigma=cfg.std_dev_num/cfg.samp_freq)[:cfg.gaussian_overlay_len+1-((wakeup_step+cfg.gaussian_overlay_len//2+1)-e_wakeup)]
        
    return y_sub



In [32]:
def partition_series_data(partitions, ft_cols, cfg, target_cols=None, norm_agg_dict=None):
    
    # If target_cols is not provided or is empty, skip y_s related computation
    compute_y_s = target_cols is not None and len(target_cols) > 0

    X_s, series_ids = [], []
    if compute_y_s:
        y_s = []
        
    for ser_id in tqdm(partitions.keys()):
        
        df_ser_sub = partitions[ser_id]

        X_sub = df_ser_sub[ft_cols].to_numpy()
        X_sub_samp = np.concatenate([np.array([resample_arr_1d(X_sub[:, i], cfg.samp_freq, agg) for agg in norm_agg_dict[ft_cols[i]]]).T for i in range(X_sub.shape[1])], axis=1).astype('float32')
        
        X_s.append(X_sub_samp)
        series_ids.append(ser_id)
        
        if compute_y_s:
            y_sub = get_y_gaussian(df_ser_sub)
            y_sub_samp = np.array([resample_arr_1d(y_sub[:, i], cfg.samp_freq, 'max') for i in range(len(target_cols))]).T.astype('float32')
            y_s.append(y_sub_samp)
            
    
    if compute_y_s:
        return X_s, y_s, series_ids
    else:
        return X_s, series_ids
    
norm_agg_dict = {c: ['mean'] for c in norm_cols}

partitions = train_series.partition_by(by='series_id', maintain_order=True, as_dict=True)
del train_series
_ = gc.collect()

X_s, y_s, series_ids = partition_series_data(partitions, norm_cols, cfg, target_cols=['is_onset', 'is_wakeup'], norm_agg_dict=norm_agg_dict)
del partitions
_ = gc.collect()


  0%|          | 0/264 [00:00<?, ?it/s]

100%|██████████| 264/264 [00:47<00:00,  5.52it/s]


In [33]:
def save_data(X_s, y_s, series_ids, directory):
    if not os.path.exists(directory):
        os.makedirs(directory)

    for X, y, series_id in tqdm(zip(X_s, y_s, series_ids), total=len(X_s)):
        np.save(os.path.join(directory, f'{series_id}_X.npy'), X)
        np.save(os.path.join(directory, f'{series_id}_y.npy'), y)

def load_data(directory):
    X_s = []
    y_s = []
    series_ids = []

    for filename in tqdm(os.listdir(directory)):
        if filename.endswith('_X.npy'):
            series_id = filename.split('_X.npy')[0]
            X = np.load(os.path.join(directory, filename))
            y = np.load(os.path.join(directory, f'{series_id}_y.npy'))

            X_s.append(X)
            y_s.append(y)
            series_ids.append(series_id)

    return X_s, y_s, series_ids

In [34]:
def normalize_y(y):
    
    for i in range(y.shape[1]):
        
        mean = y[:,i].mean()
        std = y[:,i].std()
        y[:,i] = (y[:,i]-mean)/(std+1e-16)
    
    return y

In [35]:
# Create directory if doesn't exist

if not os.path.exists(cfg.processed_data_path):
    os.makedirs(cfg.processed_data_path)

save_data(X_s, y_s, series_ids, cfg.processed_data_path)

100%|██████████| 264/264 [00:11<00:00, 23.46it/s]


In [36]:
train_events.write_ipc(os.path.join(cfg.processed_data_path, 'train_events.ipc'), compression='zstd')

In [37]:
X_s[0].shape, y_s[0].shape, len(series_ids)

((389880, 32), (389880, 2), 264)

In [38]:
norm_cols

['enmo',
 'hour',
 'anglez',
 'dayofweek',
 'enmo_diff',
 'anglez_diff',
 'anglez_std_stepmod',
 'enmo_eq_stepmod',
 'enmo_eq_pct_stepmod',
 'enmo_eq_pct_stepmod_diff',
 'large_anglez_diff',
 'enmo_eq_median_stepmod',
 'is_longest_sleep_episode',
 'is_sleep_episode',
 'is_large_gap',
 'is_gap',
 'is_sleep_block',
 'is_static',
 'rolling_nunique_anglez_win_12',
 'rolling_nunique_anglez_win_60',
 'enmo_rolling_mean_win_12',
 'enmo_rolling_mean_win_60',
 'enmo_rolling_std_win_12',
 'enmo_rolling_std_win_60',
 'anglez_diff_rolling_max_win_12',
 'anglez_diff_rolling_max_win_60',
 'large_anglez_diff_rolling_max_win_12',
 'large_anglez_diff_rolling_max_win_60',
 'large_anglez_diff_rolling_mean_win_12',
 'large_anglez_diff_rolling_mean_win_60',
 'anglez_rolling_range_win_12',
 'anglez_rolling_range_win_60']