# Setup/Imports


In [1]:
#### run this cell on Paperspace to deal with issues not present on Kaggle
# Import score function when using paperspace
%run /notebooks/event-detection-ap.ipynb
competition_score = score
# suppress TF placeholder value warning on paperspace
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

In [2]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import polars as pl
import datetime

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import backend as K

# from metric import score as competition_score
from sklearn.model_selection import GroupKFold
from tqdm import tqdm

import wandb



In [3]:
### get WandB api key from bash script on Paperspace
LOG_WANDB = True
if LOG_WANDB:
    envs=  !bash /notebooks/sleepstates.sh
    wandb.login(key=envs.l[0])
    
### prevent tf warning spam in Paperspace environment
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [4]:
DATA_DIR = "/notebooks/data"
TRAINING_DIR = "/notebooks/5feat-series_data"
OUTPUT_DIR = "/notebooks/output"
EVENTS_PATH = "/notebooks/input/train_events.csv"

# Config

In [5]:
GPU_BATCH_SIZE = 32


LABEL_DOWNSAMPLE = 6

CFG = { "dataset": "full-series--clip3-5feat-12tol",
        "samples_per_day": 12, # used to create windowed dataset from each series.
        "n_feats": 5,
       ### model_hparams
        "patches":[LABEL_DOWNSAMPLE+4,8,8,8],
        "strides":[LABEL_DOWNSAMPLE,4,4,3], 
        "channels":[12,24,48,96],
        "enc_layers":[2,2,2,2], 
        "heads":[4,4,4,4], 
        "r_ratios":[1,1,1,1], # if 1, efficient-MHSA will be replaced with "vanilla" MHSA
        "exp_ratios":[6,6,6,6],
        "d_decoder":64,

        'block_size': 17280,
        'label_agg_size':LABEL_DOWNSAMPLE,
       
        'mha_dropout': 0,
        "fuse_dropout":0.1,
        "decoder_dropout":0.2,
        "encoder_dropout":0.2,

        "lr":2e-3,
        "loss_fn": "CCE",

        "lr_scheduler": "cos",
        "epochs":10,
        "kfolds": 10,
        "ONE_FOLD":True, # if true, stop training after first fold complete
        "class_weights":[1.0,1.0,1.0], ## unweighted

        ### Dataset hparams
        "roll":1,
        "drop_series":[],
        "restricted_series": {
            "04f547b8017d":(50_000,-1),
            "31011ade7c0a":(0,190_000),
            "05e1944c3818": (0,142_500),
            "fe90110788d2": (12_000,-1),
            "fcca183903b7": (13_000,-1),
            "f6d2cc003183": (15_000,-1),
            "a596ad0b82aa": (0,232_500),
            "854206f602d0": (15_000,-1),
        },
        "label_threshold": 0.6,
        "label_smoothing": 0,
        # post-processing hparams
        "allow_unpaired_preds": True, # e.g., Onset with no corresponding Wakeup 
        "logit_threshold": 0.0, # ignore logits that don't exceed this threshold
        "min_pred_distance": 30,
        "postproc_strategy": "alt",  # unused param, purely for experiment tracking

        'TPU': False,
        'GPU_BATCH_SIZE':GPU_BATCH_SIZE,
        'model_name':"TF SegFormer - mhsa"
        }
if CFG["TPU"]:
    GPU_BATCH_SIZE *= 8
    
if LOG_WANDB: wandb.init(project="SleepStates-SegFormer-v1",config=CFG)

[34m[1mwandb[0m: Currently logged in as: [33mquillen[0m. Use [1m`wandb login --relogin`[0m to force relogin


# Generate GroupKFold train/val splits

In [6]:
# create tf dataset from np.arrays on disk.  dataset contains (X, y) tuples
df_file_list = (pd.read_csv(f"{DATA_DIR}/filepaths.csv")
                    .query("split > 0")
                    .assign(file_name = lambda x: x["window"].astype(str)+"/"+x["series_id"]+"_"+x["split"].astype(str)+".npy")
)
for s in CFG["drop_series"]:
    df_file_list = df_file_list[df_file_list.series_id != s]
    print(f"dropped {s}")
file_paths = df_file_list.file_name
pd_series_ids = df_file_list.series_id

In [7]:
group_kfold = GroupKFold(n_splits=CFG["kfolds"])
groups = df_file_list["series_id"]
train_folds=[]
val_folds=[]
for i, (train_index, test_index) in enumerate(group_kfold.split(df_file_list, groups=groups)):
    train_folds.append(train_index)
    val_folds.append(test_index)

# Define Datasets

In [8]:
###windowed dataset for training
def map_series_to_x(series_id):
    arr = np.load(f"{TRAINING_DIR}/{series_id.decode('UTF-8')}.npy")[:,1:-1]
    s_name = series_id.decode('UTF-8')
    if s_name in CFG["restricted_series"].keys():
        arr = arr[CFG["restricted_series"][s_name][0]:CFG["restricted_series"][s_name][1],:]
        remainder = arr.shape[0]%12
        if remainder != 0: arr = arr[:-remainder,:]
    return arr

def get_windowed_x(arr, samples_per_day=CFG["samples_per_day"]):
    tensor_ds = (tf.data.Dataset.from_tensor_slices(arr)
                             .window(size=CFG["block_size"],
                                     shift=(CFG["block_size"]//samples_per_day),
                                     drop_remainder=True)
                             .flat_map(lambda x:x.batch(CFG["block_size"]),)
    )
    return tensor_ds

def map_series_to_y(series_id):
    arr = np.load(f"{TRAINING_DIR}/{series_id.decode('UTF-8')}.npy")[:,-1]
    s_name = series_id.decode('UTF-8')
    if s_name in CFG["restricted_series"].keys():
        arr = arr[CFG["restricted_series"][s_name][0]:CFG["restricted_series"][s_name][1]]
        remainder = arr.shape[0]%12
        if remainder != 0: arr = arr[:-remainder]
        
    arr = arr.reshape(-1, CFG["label_agg_size"])
    arr = np.where(arr==2,-1,arr)
    arr = arr.mean(axis=-1)
    #set class label for patches based on the mean value of its steps
    arr = np.where(arr>=CFG["label_threshold"],1,arr)
    arr = np.where( (-CFG["label_threshold"]<=arr) & (arr<=CFG["label_threshold"]),0,arr)
    arr = np.where(arr<=-CFG["label_threshold"],2,arr)
    return arr


def get_windowed_y(arr, samples_per_day=CFG['samples_per_day']):
    tensor_ds = (tf.data.Dataset.from_tensor_slices(arr)
                             .window(size=CFG["block_size"]//CFG["label_agg_size"],
                                     shift=(CFG["block_size"]//CFG["label_agg_size"])//samples_per_day,
                                     drop_remainder=True)
                             .flat_map(lambda x:x.batch(CFG["block_size"]//CFG["label_agg_size"]))
    )
    return tensor_ds
    

def get_windowed_dataset(series_ids, shuffle=False, samples_per_day=CFG['samples_per_day']):
    # file_paths: series containing filenames of saved npy files
    dataset = tf.data.Dataset.from_tensor_slices(series_ids)
    dataset = dataset.interleave(lambda item: tf.data.Dataset.zip(
                                            (get_windowed_x(tf.numpy_function(map_series_to_x, [item], tf.float32),samples_per_day),
                                             get_windowed_y(tf.numpy_function(map_series_to_y, [item], tf.float32),samples_per_day)
                                            )),
                                  num_parallel_calls=tf.data.AUTOTUNE
                                 ).cache()
    if shuffle:
        dataset = dataset.shuffle(shuffle, reshuffle_each_iteration=True)
    dataset = dataset.batch(GPU_BATCH_SIZE, drop_remainder=True)
    return dataset

In [9]:
# dataset for calculating mAP during CV.
def map_path_to_x(fname):
    arr = np.load(f"{DATA_DIR}/{fname.decode('UTF-8')}")[:,1:-1]
    arr = np.nan_to_num(arr)
    return arr

def map_path_to_y(fname):
    arr = np.load(f"{DATA_DIR}/{fname.decode('UTF-8')}")[:,-1]
    arr = arr.reshape(CFG["block_size"]//CFG["label_agg_size"], CFG["label_agg_size"])
    arr = np.where(arr==2,-1,arr)
    arr = arr.mean(axis=-1)
    #set class label for patches based on the mean value of its steps
    arr = np.where(arr>=CFG["label_threshold"],1,arr)
    arr = np.where( (-CFG["label_threshold"]<=arr) & (arr<=CFG["label_threshold"]),0,arr)
    arr = np.where(arr<=-CFG["label_threshold"],2,arr)
    return arr

#NOTE:  val_data must be deterministic and not shuffled, else `post_proc` and `calc_metric` silently fail
def get_dataset(file_paths, shuffle=False, deterministic=False):
    dataset = tf.data.Dataset.from_tensor_slices(file_paths)
    dataset = dataset.map(lambda item: (tf.ensure_shape(tf.numpy_function(map_path_to_x, [item], tf.float32), 
                                                        (CFG["block_size"], CFG["n_feats"])),
                                                        tf.numpy_function(map_path_to_y, [item], tf.float32)),
                          num_parallel_calls=tf.data.AUTOTUNE,
                          deterministic=deterministic).cache()
    if shuffle:
        dataset = dataset.shuffle(shuffle, reshuffle_each_iteration=True)
    dataset = dataset.batch(GPU_BATCH_SIZE, drop_remainder=True)
    return dataset

In [10]:
def map_path_to_step_idx(fname):
    arr = np.load(f"{DATA_DIR}/{fname.decode('UTF-8')}")[:,0]
    arr = arr.reshape(CFG["block_size"]//CFG["label_agg_size"], CFG["label_agg_size"])
    arr = np.floor(arr.mean(axis=-1)).astype(np.int32)
    return arr

def get_step_idxs(file_paths):
    # file_paths: series containing filenames of saved npy files
    dataset = tf.data.Dataset.from_tensor_slices(file_paths)
    dataset = dataset.map(lambda item:  tf.numpy_function(map_path_to_step_idx, [item], tf.int32),
                          num_parallel_calls=tf.data.AUTOTUNE,
                          deterministic=True)
    dataset = dataset.batch(GPU_BATCH_SIZE, drop_remainder=True)
    return dataset

# Define model


# Build model

In [11]:
@keras.saving.register_keras_serializable()
class AugRoll(layers.Layer):
    def __init__(self, roll_range):
        super().__init__()
        self.r=roll_range
        
    def call(self,x, training=False):
        if training:
            return tf.roll(x, shift=tf.random.uniform([],-self.r,self.r+1,tf.int32), axis=-1)
        return x
    
    def get_config(self):
        config = super().get_config()
        # Update the config with the custom layer's parameters
        config.update({ "roll_range": self.r})
        return config

In [13]:
@keras.saving.register_keras_serializable()
class PatchEmbedding(layers.Layer):
    def __init__(self, patch_size, stride, d_out, layer_norm=True):
        super().__init__()
        self.k = patch_size; self.s = stride; self.d = d_out; self.l = layer_norm
        
        self.proj = layers.Dense(d_out)
        self.layer_norm = layers.LayerNormalization() if layer_norm else layers.Identity()
    
    def call(self,x, training=None): # x shape: (B,L,C)
        x = tf.expand_dims(x,axis=1) # (B,1,L,C)
        x = tf.image.extract_patches(x, [1,1,self.k,1], [1,1,self.s,1], [1,1,1,1], padding="SAME") # (B,1,L/s,C*k)
        x = tf.squeeze(x, axis=1) # (B,L/s,C*k)
        x = self.proj(x) # (B,L/s,d_out)
        x = self.layer_norm(x)
        return x
    
    def get_config(self):
        config = super().get_config()
        # Update the config with the custom layer's parameters
        config.update({ "patch_size": self.k,
                        "stride": self.s,
                        "d_out": self.d,
                        # "d_out": self.proj,
                        "layer_norm": self.l}
        )
        return config

In [14]:
@keras.saving.register_keras_serializable()
class MixFFN(layers.Layer):
    def __init__(self,d_in, d_hidden, dropout):
        super().__init__()
        self.d_in = d_in; self.d_hidden = d_hidden; self.dropout = dropout
        
        self.dense1 = layers.Dense(d_hidden)
        self.dwconv = layers.DepthwiseConv1D(3, depth_multiplier=1, padding='same', activation="gelu")
        self.dense2 = layers.Dense(d_in)
        self.dropout = layers.Dropout(dropout)
        self.add = layers.Add()
        
    def call(self, x_in, training=None):
        x = self.dense1(x_in)
        x = self.dwconv(x)
        x = self.dropout(x)
        x = self.dense2(x)
        x = self.dropout(x)
        x = self.add([x_in,x])
        return x
    
    def get_config(self):
        config = super().get_config()
        # Update the config with the custom layer's parameters
        config.update({ "d_in": self.d_in,
                        "d_hidden": self.d_hidden,
                        "dropout": self.dropout}
        )
        return config

In [15]:
@keras.saving.register_keras_serializable()
class EfficientMHSA(layers.Layer):
    def __init__(self, input_dim, n_heads, r_ratio, mha_dropout=CFG["mha_dropout"]):
        super().__init__()
        self.input_dim=input_dim; self.n_heads = n_heads; self.r_ratio = r_ratio; self.mha_d = mha_dropout
        
        self.reshape = layers.Reshape((-1,input_dim*r_ratio)) if r_ratio!=1 else layers.Identity()
        self.key_proj = layers.Dense(input_dim) if r_ratio!=1 else layers.Identity()
        self.value_proj = layers.Dense(input_dim) if r_ratio!=1 else layers.Identity()
        self.norm = layers.LayerNormalization()
        self.att = layers.MultiHeadAttention(n_heads, input_dim, input_dim, dropout=mha_dropout)
        
    def call(self,x, training=None):
        k = self.norm( self.key_proj( self.reshape(x)))
        v = self.norm( self.value_proj( self.reshape(x)))
        output = self.att(x,v,k)
        return output
    
    def get_config(self):
        config = super().get_config()
        # Update the config with the custom layer's parameters
        config.update({ "input_dim": self.input_dim,
                        "n_heads": self.n_heads,
                        "r_ratio": self.r_ratio,
                        "mha_dropout": self.mha_d}
        )
        return config


In [16]:
@keras.saving.register_keras_serializable()
class SegformerEncoderBlock(layers.Layer):

    def __init__(self, channels, n_layers, n_heads, r_ratio, exp_ratio):
        '''
        channels: number of input (and output) channels
        n_layers: number of encoder layers for this block
        n_heads: number of self-attention heads for each encoder layer
        r_ratio: reduction ratio for efficient self-attention
        exp_ratio: expansion ratio of mix-FFN layer
        '''
        super().__init__()
        self.channels=channels
        self.n_heads = n_heads
        self.r_ratio=r_ratio,
        self.exp_ratio = exp_ratio
        self.mix_ffn = MixFFN(d_in=channels, d_hidden=channels*exp_ratio, dropout=CFG["encoder_dropout"])
        self.esa = EfficientMHSA(channels, n_heads, r_ratio)
        self.layer_norm = layers.LayerNormalization()
        self.add = layers.Add()
        self.n_layers=n_layers
        
    def call(self,x, training=None):
        for _ in range(self.n_layers):
            x_skip = x
            x = self.layer_norm(x)
            x = self.esa(x)
            x = self.add([x_skip,x])
            x_skip = x
            x = self.layer_norm(x)
            x = self.mix_ffn(x)
            x = self.add([x_skip,x])
        
        return x
        
    def get_config(self):
        config = super().get_config()
        # Update the config with the custom layer's parameters
        config.update({ "channels": self.channels,
                        "n_heads": self.n_heads,
                        "r_ratio": self.r_ratio,
                        "n_layers": self.n_layers,
                        "exp_ratio": self.exp_ratio}
        )
        return config

In [17]:
@keras.saving.register_keras_serializable()
class UnifyFeatures(layers.Layer):
    def __init__(self, decoder_channels, stride):
        super().__init__()
        self.d=decoder_channels
        self.s=stride
        self.linear = layers.Dense(decoder_channels) 
        self.dropout = layers.Dropout(CFG["decoder_dropout"])
        self.upsample = layers.UpSampling1D(stride) if stride > 1 else layers.Identity()
        
    def call(self,x, training=None):
        x = self.linear(x)
        x = self.dropout(x)
        x = self.upsample(x)
        return x
    
    def get_config(self):
        config = super().get_config()
        # Update the config with the custom layer's parameters
        config.update({ "decoder_channels": self.d,
                        "stride": self.s}
        )
        return config

In [18]:
@keras.saving.register_keras_serializable()
class SegFormer(tf.keras.Model):
    def __init__(self, patches, strides, channels, enc_layers, heads, r_ratios, exp_ratios, d_decoder, aug_roll):
        '''
        patches (list): list of patch sizes for each of the embedding layers
        strides (list): list of strides for each embedding layer (and their corresponding upsampling layer)
        channels (list): list of channel dimensions for each encoder block
        enc_layers (list): list of layers in each encoder block
        heads (list): list of attention head numbers for each encoder block
        r_ratios (list): list of EfficientMHSA reduction ratios in each encoder block 
        exp_ratios (list): list of MixFFN expansion ratios in each encoder block
        d_decoder (int): final channel size for the decoder
        '''
        super().__init__()
        self.p=patches
        self.s=strides
        self.c=channels
        self.enc=enc_layers
        self.h=heads
        self.r=r_ratios
        self.exp=exp_ratios
        self.dec=d_decoder
        self.roll=aug_roll
        ### hyperparameter lists must have the same length
        assert len( set(
            [len(patches), len(strides), len(channels), len(enc_layers), len(heads), len(r_ratios), len(exp_ratios)]
        )) == 1,"SegFormer hyperparameter lists must be the same size!"
        
        self.aug_roll = AugRoll(aug_roll)
        self.patch_embed = [PatchEmbedding(p,s,c) for p,s,c in zip(patches, strides, channels)]  
        self.enc_block = [SegformerEncoderBlock(c,l,h,r,e) for c,l,h,r,e in zip(channels,enc_layers,heads,r_ratios,exp_ratios)]
        ####### TODO: make this generalizable to any list size
        self.unify = [UnifyFeatures(d_decoder,1), UnifyFeatures(d_decoder,strides[1]), 
                      UnifyFeatures(d_decoder,strides[1]*strides[2]),UnifyFeatures(d_decoder,strides[1]*strides[2]*strides[3])] 
        self.mlp_fuse = layers.Dense(d_decoder)
        self.fuse_do = layers.Dropout(CFG["fuse_dropout"])
        self.classifier = layers.Dense(3) #shape: (B, L/strides[0], 3)
        self.concat = layers.Concatenate()
        self.bn= layers.BatchNormalization()
        self.act = layers.Activation("relu")
          
    
        ###### WORKAROUND TO "FIX" MODEL.SUMMARY() ######
        self.input_layer = tf.keras.layers.Input(shape=(CFG['block_size'],CFG['n_feats']), name="input_layer")
        input_shape = self.input_layer
        outputs_shape = self.call(input_shape)
        super().__init__(inputs=input_shape,outputs=outputs_shape)
        #################################################
        
    def call(self, x, training=None):
        x = self.aug_roll(x,training)
        x = self.patch_embed[0](x)
        out_0 = self.enc_block[0](x)
        x = self.patch_embed[1](out_0)
        out_1 = self.enc_block[1](x)
        x = self.patch_embed[2](out_1)
        out_2 = self.enc_block[2](x)
        x = self.patch_embed[3](out_2)
        out_3 = self.enc_block[3](x)
        x = self.concat([self.unify[0](out_0), self.unify[1](out_1), self.unify[2](out_2), self.unify[3](out_3)]) 
        x = self.mlp_fuse(x)
        x = self.bn(x)
        x = self.fuse_do(x)
        x = self.act(x)
        x = self.classifier(x)
        return x
    
    def get_config(self):
        config = super().get_config()
        # Update the config with the custom layer's parameters
        config.update({ "patches": self.p,
                        "strides": self.s,
                        "channels": self.c,
                        "enc_layers": self.enc,
                        "heads": self.h,
                        "r_ratios": self.r,
                        "exp_ratios": self.exp,
                        "d_decoder": self.dec,
                        "aug_roll": self.roll}
        )
        return config

In [19]:
segmodel = SegFormer(patches=CFG["patches"], 
                     strides=CFG["strides"], 
                     channels=CFG["channels"], 
                     enc_layers=CFG["enc_layers"], 
                     heads=CFG["heads"], 
                     r_ratios=CFG["r_ratios"], 
                     exp_ratios=CFG["exp_ratios"],
                     d_decoder=CFG["d_decoder"],
                     aug_roll=CFG["roll"])
segmodel.build(input_shape=(None,
                            CFG['block_size'],
                            CFG['n_feats']))

### fixed keras summary with: https://github.com/keras-team/keras/issues/13782#issuecomment-1446475317
segmodel.summary(expand_nested=True)

Model: "seg_former_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_layer (InputLayer)       [(None, 17280, 5)]   0           []                               
                                                                                                  
 aug_roll (AugRoll)             (None, 17280, 5)     0           ['input_layer[0][0]']            
                                                                                                  
 patch_embedding (PatchEmbeddin  (None, 2880, 12)    636         ['aug_roll[0][0]']               
 g)                                                                                               
                                                                                                  
 segformer_encoder_block (Segfo  (None, 2880, 12)    4608        ['patch_embedding[0][0

# Define custom losses, postprocessing, and metrics

# Functions to generate valid submission

In [20]:
def preproc(df):
    """ 
    input: pd.DataFrame -- columns = ("series_id, step, anglez, enmo, lids") -- 17280 steps
    output: tuple of np.arrays (x, step_idxs) -- shapes == (n_patches, -1)
    """
    step_idx = df["step"].to_numpy(dtype=np.int32)
    if step_idx.shape[0]<17280:
        pad = np.zeros((17280-step_idx.shape[0],),dtype=np.float32)
        step_idx = np.concatenate((pad,step_idx),axis=None)
    if CFG['n_feats']==3:
        x = df[["anglez", "enmo", "lids"]].to_numpy(dtype=np.float32)
    elif CFG['n_feats']==5:
        x = df[["anglez", "enmo", "lids", "t_sin", "t_cos"]].to_numpy(dtype=np.float32)
    if x.shape[0]<17280:
        pad = np.zeros((17280-x.shape[0],x.shape[1]),dtype=np.float32)
        x = np.vstack((pad,x))
    x = np.nan_to_num(x) # replace NaNs (from rolling functions) with zeroes 
    x = np.expand_dims(x, axis=0) # add batch dimension expected by the model

    step_idx = step_idx.reshape(-1, LABEL_DOWNSAMPLE)
    step_idx = np.floor(step_idx.mean(axis=-1)).astype(np.int32)
    
    return x, step_idx

In [21]:
# handle 1 column at a time.
##### i.e., pass `onset` and `wakeup` separately (from:<<  onset, wakeup = np.hsplit(a[:,:],2)   >> )
def parse_logits(col, min_distance=30, min_logit=0):
    # get sorted array of top logit idxs
    col_sorted = np.argsort(col, axis = None)[::-1]
    hit_idx = np.flatnonzero(col > min_logit)
    if hit_idx.shape[0]==0:
        return None, None
    
    top_hit_idx = col_sorted[np.isin(col_sorted, hit_idx)]
    
    # filter logit idxs to prevent submitting neighboring predictions
    distance_matrix = np.abs(top_hit_idx.reshape(-1,1) - top_hit_idx.reshape(-1,1).T)
    bool_matrix = np.where(distance_matrix>=min_distance, False, True)
    # prediction idxs
    pred_idxs = top_hit_idx[(np.argmax(bool_matrix,axis=1) - np.arange(bool_matrix.shape[0]))==0]
    # prediction confidences
    pred_logits = col[pred_idxs].flatten() 
    
    return pred_idxs, pred_logits

def postproc(x, step_idx, series_id):
    """
    inputs: 
        x: np.array with shape (n_patches,2).  column 0 = onset logits, column 1 = wakeup logits
        step_idx: np.array for mapping argmax to step
        series_id: string
    output: None OR pd.Dataframe -- columns = (series_id, step, event, score)
    """
    onset, wakeup = np.hsplit(x,2)

    onset_idxs, onset_logits = parse_logits(onset, min_distance=CFG["min_pred_distance"], min_logit=CFG["logit_threshold"])
    wakeup_idxs, wakeup_logits = parse_logits(wakeup, min_distance=CFG["min_pred_distance"], min_logit=CFG["logit_threshold"])
    
    # exit if no wakeup or onset predictions
    if (onset_idxs is None) and (wakeup_idxs is None):
        return None
    
    # verify step_idx is correct shape for extending step_idx_list
    if len(step_idx.shape) == 2: 
        step_idx = step_idx.flatten()
        
        
    # initialize lists to hold values until dataframe creation
    series_id_list, step_idx_list, event_list, score_list = [], [], [], []
    
    if onset_idxs is not None:
        
        step_idx_list.extend(step_idx[onset_idxs])
        score_list.extend(onset_logits)
        series_id_list.extend([series_id] * onset_idxs.shape[0])
        event_list.extend(["onset"] * onset_idxs.shape[0])
    
    if wakeup_idxs is not None:
        
        step_idx_list.extend(step_idx[wakeup_idxs])
        score_list.extend(wakeup_logits)
        series_id_list.extend([series_id] * wakeup_idxs.shape[0])
        event_list.extend(["wakeup"] * wakeup_idxs.shape[0])
        
    df_pred = pd.DataFrame({
        "series_id" : series_id_list,
        "step" : step_idx_list,
        "event" : event_list,
        "score" : score_list   
    })
    
    return df_pred

# Compile and train the model

### TODO: CONFIGURE CALLBACKS AND MODEL SAVING AND HISTORY for training loop

- consider learning rate scheduler callback?

In [22]:
TOLERANCES = {"onset":  [12, 36, 60, 90, 120, 150, 180, 240, 300, 360],
              "wakeup": [12, 36, 60, 90, 120, 150, 180, 240, 300, 360]}

def calc_metric(val_idxs):
    # only keep indices for window 19
    val_idxs = list( set(val_idxs) - set(df_file_list.loc[df_file_list.window != 19].index))

    ### get "solution" dataframe for computing the metric. 
    val_series_ids = (df_file_list.iloc[val_idxs].series_id.unique().tolist())
    df_pred_list = []
    for series_id in val_series_ids:
        series_splits = (pl.scan_parquet("/notebooks/input/train_series.parquet")
                        # polars LazyFrame
                        .select(["series_id","step","timestamp"])
                        .filter(pl.col("series_id")==series_id) 
                        .with_columns((pl.col("timestamp")
                                      .str.strptime(pl.Datetime, "%Y-%m-%dT%H:%M:%S%z")), #convert timezone to UTC
                        )
                        .filter(pl.col("timestamp").dt.time() == datetime.time(19,0,0))
                        .drop("timestamp")
                        .collect()
                        .to_pandas()
                         # pandas DataFrame
                        .assign(start_idx = lambda x: np.where(x["step"].astype(np.int32)-17280 > 0,
                                                           x["step"].astype(np.int32)-17280,
                                                           0)
                        )
                        .rename(columns={"step":"end_idx"})
                        .loc[:,["series_id","start_idx","end_idx"]]
        )

        df_series=(pl.scan_parquet("/notebooks/input/train_series.parquet")
                        .filter(pl.col("series_id")==series_id)
                        .with_columns([
                            (pl.col("timestamp").str.strptime(pl.Datetime, "%Y-%m-%dT%H:%M:%S%z")), #parse timezone
                            (pl.col("anglez")
                                 .diff(n=1,null_behavior="ignore")
                                 .fill_null(value=0)
                                 .abs()
                                 .alias("anglez")),
                            (pl.col("enmo").clip(0.0,3.0).alias("lids")),
                            (pl.col("enmo").clip(0.0,3.0).alias("enmo")),
                        ])
                        # center anglez and enmo
                        .with_columns([
                            ((pl.col("anglez")-pl.col("anglez").mean())
                                .alias("anglez")),
                            ((pl.col("enmo")-pl.col("enmo").mean())
                                .alias("enmo")),
                            (((pl.col("timestamp").dt.hour()*pl.lit(60)+pl.col("timestamp").dt.minute())*pl.lit(2*np.pi/1440))
                                 .sin().alias("t_sin")
                            ),
                            (((pl.col("timestamp").dt.hour()*pl.lit(60)+pl.col("timestamp").dt.minute())*pl.lit(2*np.pi/1440))
                                 .cos().alias("t_cos")
                        )
                        ])
                        .with_columns((pl.col("anglez")/pl.col("anglez").std()).alias("anglez"))
                        .drop("timestamp")
                        .collect()
                        .to_pandas()
                        .assign(lids= lambda x: (100 / (np.maximum(0., x['lids'] - 0.02).rolling(120, center=True, min_periods=1).agg('sum') + 1).rolling(360, center=True, min_periods=1).agg('mean').astype(np.float32)),)
                        .assign(lids =lambda x: (x.lids/50)-1)
                        .reset_index(drop=True)
        )

        for i in series_splits.index:
            #pad split_0 if it's reasonably long.  otherwise discard it.
            if i == 0: continue

            day=(df_series[df_series["step"].between(series_splits.iloc[i,-2],
                                                     series_splits.iloc[i,-1], inclusive="left")]
                         .loc[:,["series_id", "step", "anglez", "enmo", "lids", "t_sin", "t_cos"]]
            )


        ### Inference
            x, step_idx  = preproc(day)
            logits = tf.squeeze(segmodel.predict(x,verbose=0)).numpy()[:,1:]
            df_pred = postproc(logits, step_idx, series_id)
            # postproc may return <None> if logits don't meet criteria 
            if df_pred is not None:
                df_pred_list.append(df_pred)
    
    
    df_true = (pd.read_csv(EVENTS_PATH)
                   .query("series_id.isin(@val_series_ids)"))
    
    if len(df_pred_list) > 0:
        df_val_preds = (pd.concat(df_pred_list, axis=0, ignore_index=True)
                           .reset_index()
                           .rename(columns={"index":"row_id"}))   
    else: return 0.0
        
    return competition_score(  solution = df_true,
                                submission = df_val_preds,
                                tolerances= TOLERANCES,
                                series_id_column_name = "series_id",
                                time_column_name = "step",
                                event_column_name = "event",
                                score_column_name = "score")



In [23]:
# TODO: log the metric since I can tell it to write to the logger?
class CustomCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        comp_metric = calc_metric(val_folds[k])
        print(f"********** Fold {k} epoch {epoch} AP = {comp_metric} **********")
        if LOG_WANDB:
            wandb.log({f"val_loss_k{k}": logs["val_loss"], 
                       f"loss_k{k}": logs["loss"],
                       f"AP_k{k}": comp_metric,
                      })
callbacks = [CustomCallback()]

In [24]:
# LEARNING RATE SCHEDULER
def get_lr(batches=1500):
    cos_lr = tf.keras.optimizers.schedules.CosineDecay(
        CFG["lr"],
        (CFG["epochs"]*batches*GPU_BATCH_SIZE)//16,
        alpha=CFG["lr"]*1e-2,
    )

    lr = cos_lr if (CFG["lr_scheduler"] == "cos") else CFG["lr"]
    return lr

In [25]:
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
segmodel.compile(loss = loss, optimizer = tf.keras.optimizers.AdamW(learning_rate=get_lr()))
# save initial weights so we can reset the model between CV folds
init_weights = segmodel.get_weights()

In [None]:
for k in range(CFG["kfolds"]):
    #get kth fold data
    train_data = get_windowed_dataset(pd_series_ids.iloc[train_folds[k]].unique(), shuffle=4000)
    val_data = get_windowed_dataset(pd_series_ids.iloc[val_folds[k]].unique(),samples_per_day=2)
    # reset model for new fold
    segmodel.compile(loss = loss,
            optimizer = tf.keras.optimizers.AdamW(learning_rate=get_lr(
                    (CFG["kfolds"]-1)*110000//GPU_BATCH_SIZE//(CFG["kfolds"]))) #an estimate
    )
    segmodel.set_weights(init_weights)
    segmodel.fit(train_data, validation_data=val_data, epochs=CFG["epochs"], callbacks=callbacks)
    segmodel.save(f"{OUTPUT_DIR}/segmodel_fold_{k}.tf", overwrite=True,save_format="tf")
    
    if CFG["ONE_FOLD"]: break

Epoch 1/10
    134/Unknown - 193s 685ms/step - loss: 0.1771

In [None]:
if LOG_WANDB: wandb.finish()

# Visualize model output (logits)

In [None]:
sample = val_data.unbatch().take(40)
for x, y in sample:
    demo_preds = segmodel(tf.expand_dims(x,0), training=False)[0]
    demo_labels = y

    fig = plt.figure(figsize=(12,5))
    ax = fig.add_subplot()
    ax.plot(np.arange(y.shape[0]), demo_labels*4, color='green', label="label")
    ax.plot(np.arange(y.shape[0]), demo_preds[:,1], color='b', linewidth=0.5, label="onset_preds")
    ax.plot(np.arange(y.shape[0]), demo_preds[:,2], color='xkcd:orange red', linewidth=0.5, label="wakeup_preds")

    ax.legend()
    ax.set_xlabel('step')
    ax.set_ylabel('logits')
    ax.set_facecolor('xkcd:off white')
    labels = ax.get_xticklabels()
    plt.setp(labels, rotation=40, fontsize=8)
    plt.show()