In [None]:
import sys, os
import numpy as np
import pandas as pd
import pickle
import gc
import matplotlib.pyplot as plt
import pathlib
import lightgbm as lgb
import logging
import warnings
import tensorflow as tf
import scipy.ndimage as ndi
warnings.simplefilter('ignore')
from tqdm.notebook import tqdm
from glob import glob
from sklearn.model_selection import GroupKFold, KFold, StratifiedKFold, StratifiedGroupKFold
from sklearn import cluster
from sklearn.metrics import precision_score, average_precision_score
from sklearn.metrics import precision_score, average_precision_score
from tensorflow import keras
from tensorflow.keras.callbacks import EarlyStopping, LearningRateScheduler, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.optimizers.schedules import ExponentialDecay, CosineDecay

DATA_DIR = "/kaggle/input/tlvmc-parkinsons-freezing-gait-prediction/"
train = False

In [None]:
subjects = pd.read_csv(f"{DATA_DIR}subjects.csv")
meta_tdcs = pd.read_csv(f"{DATA_DIR}tdcsfog_metadata.csv")
meta_defog = pd.read_csv(f"{DATA_DIR}defog_metadata.csv")

ids, folds, subs = [], [], []
for si in subjects['Subject'].values:
    idi = meta_tdcs[meta_tdcs['Subject']==si]['Id'].tolist() + meta_defog[meta_defog['Subject']==si]['Id'].tolist()
    ids += idi
    subs += [si] * len(idi)

if train:
    id_tdcs = set([os.path.basename(f).split('.')[0] for f in  glob(f"{DATA_DIR}train/tdcsfog/*.csv")])
    id_defog = set([os.path.basename(f).split('.')[0] for f in  glob(f"{DATA_DIR}train/defog/*.csv")])
    id_notype = set([os.path.basename(f).split('.')[0] for f in  glob(f"{DATA_DIR}train/notype/*.csv")])
else:
    id_tdcs = set([os.path.basename(f).split('.')[0] for f in  glob(f"{DATA_DIR}test/tdcsfog/*.csv")])
    id_defog = set([os.path.basename(f).split('.')[0] for f in  glob(f"{DATA_DIR}test/defog/*.csv")])
    id_notype = set([os.path.basename(f).split('.')[0] for f in  glob(f"{DATA_DIR}test/notype/*.csv")])
datanames = []
for i in ids:
    if i in id_defog:
        datanames.append('defog')
    elif i in id_tdcs:
        datanames.append('tdcsfog')
    elif i in id_notype:
        datanames.append('notype')
    else:
        datanames.append('')
        
id_info = pd.DataFrame({'Id':ids, 'Subject':subs, 'Data':datanames})
id_info.drop_duplicates(inplace=True)

## Common func

In [None]:
def precision(preds, gts):
    metrics = []
    metrics.append(average_precision_score(gts[:, 0].flatten()>0, preds[:, 0].flatten()))
    metrics.append(average_precision_score(gts[:, 1].flatten()>0, preds[:, 1].flatten()))
    metrics.append(average_precision_score(gts[:, 2].flatten()>0, preds[:, 2].flatten()))

    print(metrics)
    print(np.mean(metrics))
    return np.mean(metrics)

In [None]:
def resize_func(x, target_size, use_percentile_feat=False):
    ch = x.shape[0] 
    input_size = x.shape[1]
    
    pad = target_size - input_size % target_size
    factor = (input_size + pad) / input_size

    x = np.array([ndi.zoom(xi, zoom=factor, mode='reflect') for xi in x])
    x = x.reshape((ch, target_size, -1))

    res = {} 
    res['mean'] = np.mean(x, axis=2).reshape(ch, -1)
    res['max'] = np.max(x, axis=2).reshape(ch, -1)
    res['min'] = np.min(x, axis=2).reshape(ch, -1)
    res['med'] = np.median(x, axis=2).reshape(ch, -1)
    res['std'] = np.sqrt(np.var(x, axis=2).reshape(ch, -1))
    if use_percentile_feat:
        for p in [15, 30, 45, 60, 75, 90]:
            res[f"p{p}"] = np.percentile(x, [p], axis=2).reshape(ch, -1)

    return res

def resize_sequence(filename, target_size):
    f = pd.read_csv(filename)
    data = filename.split('/')[-2]
    
    x_cols = ['AccV', 'AccML', 'AccAP']
    y_cols = ['Event'] if data == 'notype' else ['StartHesitation', 'Turn', 'Walking']
    p_cols = ['p15', 'p30', 'p45', 'p60', 'p75', 'p90']
    res_cols = ['mean', 'max', 'min', 'med', 'std']
    
    res_x = resize_func(f[x_cols].values.transpose(), target_size, CFG.use_percentile_feat)
    if CFG.train:
        res_y = resize_func(f[y_cols].values.transpose(), target_size, False)
        
    res = {}
    for k in res_cols:
        res[f"{x_cols[0]}{k}"] = res_x[k][0]
        res[f"{x_cols[1]}{k}"] = res_x[k][1]
        res[f"{x_cols[2]}{k}"] = res_x[k][2]
        
        if CFG.train:
            # there are just dummy. validation loss calculated by those values are not monitored. 
            if data == 'notype':
                res[f"StartHesitation{k}"] = np.zeros_like(res_y[k][0]).astype(res_y[k][0].dtype)
                res[f"Turn{k}"] = res_y[k][0]
                res[f"Walking{k}"] = np.zeros_like(res_y[k][0]).astype(res_y[k][0].dtype)
            else:
                res[f"{y_cols[0]}{k}"] = res_y[k][0]
                res[f"{y_cols[1]}{k}"] = res_y[k][1]
                res[f"{y_cols[2]}{k}"] = res_y[k][2]
        
        if CFG.use_percentile_feat: 
            for k in p_cols:
                res[f"{x_cols[0]}{k}"] = res_x[k][0]
                res[f"{x_cols[1]}{k}"] = res_x[k][1]
                res[f"{x_cols[2]}{k}"] = res_x[k][2]
        
    
    res = pd.DataFrame(res)
    return res

def save_resized_data(data, target_size):
    out = f"resized_sequences/{data}/"
    os.makedirs(out, exist_ok=True)
    files =  glob(f"{DATA_DIR}/train/{data}/*.csv") if CFG.train else glob(f"{DATA_DIR}/test/{data}/*.csv")
    for fi in tqdm(files):
        name = fi.split('/')[-1]
        res = resize_sequence(fi, target_size)
        res.to_csv(f"{out}{name}", index=False)

In [None]:
def get_data(valid=False):
    use_cols = []
    cols = ['mean', 'max', 'min', 'med', 'std']
    if CFG.use_percentile_feat:
        cols += ['p15', 'p30', 'p45', 'p60', 'p75', 'p90'] 
    for c in cols:
        use_cols += [f"AccV{c}", f"AccML{c}", f"AccAP{c}"]
        
    bools = id_info['Data'] == 'notype' if valid else id_info['Data'] != 'notype'
    x_data, y_data, names = [], [], []
    for di, idi in tqdm(id_info[bools][['Data', 'Id']].values):
        if di == '':continue
        f = pd.read_csv(f"resized_sequences/{di}/{idi}.csv")
        f.fillna(method="ffill", inplace=True)    
        x = f[use_cols].values.astype(np.float32).reshape(-1, CFG.target_size, len(use_cols))
        names.append(f"{di}/{idi}.csv")
        x_data.append(x)
        
        if CFG.train:
            y = f[['StartHesitationmax', 'Turnmax', 'Walkingmax']].values.astype(np.float32).reshape(-1, CFG.target_size, 3)
            y_data.append(y)
            
    x_data = np.concatenate(x_data)
    if CFG.train:
        y_data = np.concatenate(y_data)
        # add noevent  
        noevent = np.expand_dims(np.sum(y_data, axis=2) < 1, axis=2).astype('float32')
        y_data = np.concatenate([y_data, noevent], axis=2)
        y_data[np.sum(y_data, axis=2) == 2] /= 2
        y_data[np.sum(y_data, axis=2) == 3] /= 3
        
        return x_data, y_data, names

    return x_data, names

In [None]:
# Evaluate only presence of Event.
def loss_func(y_true, preds):
    return tf.reduce_mean(keras.losses.binary_crossentropy(1.0 - y_true[:, :, -1], 1.0 - preds[:, :, -1]))

## Models

In [None]:
def load_bidLSTM(seq_len, n_feat, hidden0, hidden1, hidden2, hidden3, dense, n_class=4):
    model = keras.models.Sequential([
            keras.layers.Input(shape = (seq_len, n_feat)),
            keras.layers.Bidirectional(keras.layers.LSTM(hidden0, return_sequences = True)),
            keras.layers.Bidirectional(keras.layers.LSTM(hidden1, return_sequences = True)),
            keras.layers.Bidirectional(keras.layers.LSTM(hidden2, return_sequences = True)),
            keras.layers.Bidirectional(keras.layers.LSTM(hidden3, return_sequences = True)),
            keras.layers.Dense(dense, activation = 'selu'),
            keras.layers.Dense(n_class, activation='softmax'),
        ])
    return model

In [None]:
# Implementation of https://www.kaggle.com/competitions/ventilator-pressure-prediction/discussion/285330
def load_1dCNN(seq_len, n_feat, n_class=4):
    inputs_x = keras.layers.Input(shape=(seq_len, n_feat))

    c0 = keras.layers.Conv1D(n_feat, 2, padding='same')(inputs_x)
    c1 = keras.layers.Conv1D(n_feat, 3, padding='same')(inputs_x)
    c2 = keras.layers.Conv1D(n_feat, 4, padding='same')(inputs_x)

    x = tf.keras.layers.Concatenate(axis=2)([inputs_x, c0, c1, c2])
    x = keras.layers.Bidirectional(keras.layers.LSTM(1024, return_sequences=True, dropout=0.1))(x)
    x = keras.layers.Bidirectional(keras.layers.LSTM(512, return_sequences=True, dropout=0.1))(x)
    x = keras.layers.Bidirectional(keras.layers.LSTM(256, return_sequences=True, dropout=0.1))(x)
    x = keras.layers.Bidirectional(keras.layers.LSTM(128, return_sequences=True, dropout=0.1))(x)

    x = tf.keras.layers.Dense(n_class, activation="softmax")(x)
    model = tf.keras.Model(inputs=inputs_x, outputs=x)

    return model

## Training
All labeled data are used as training data. Validation score is calucrated by only 'NoEvent' data. 

In [None]:
# 1dCNN
class CFG:
    exp = '001'
    target_size = 2048
    lr = 3e-4
    early_stop_patience = 30
    epoch = 100
    batch_size = 32
    model_no = 0
    use_percentile_feat = False
    train = train
    
if CFG.train:
    save_resized_data('tdcsfog', CFG.target_size)
    save_resized_data('defog', CFG.target_size)
    save_resized_data('notype', CFG.target_size)
    
    gpu_strategy = tf.distribute.get_strategy()
    with gpu_strategy.scope():
        valid_x, valid_y, _ = get_data(True)
        train_x, train_y, _ = get_data(False)

        model = load_1dCNN(train_x.shape[1], train_x.shape[2])
        scheduler = CosineDecay(CFG.lr, CFG.epoch * (train_x.shape[0] // CFG.batch_size))
        optimizer = keras.optimizers.Adam(learning_rate = scheduler)
        model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=loss_func)

        callbacks = []
        callbacks.append(
            EarlyStopping(
                monitor = "val_loss_func",
                patience = CFG.early_stop_patience,
                verbose =1,
                mode = "min",
                restore_best_weights = True))

        model.fit(train_x, train_y, validation_data=(valid_x, valid_y), epochs=CFG.epoch, batch_size=CFG.batch_size, callbacks=callbacks) 
        save_locally = tf.saved_model.SaveOptions(experimental_io_device='/job:localhost')
        model.save(f'model-{CFG.model_no}', options=save_locally)

        preds = 1.0 - model.predict(valid_x, batch_size=CFG.batch_size, verbose=2)[:, :, -1]
        # It is not a full size score. If you want to see the actual score, you need to resize it.
        print(average_precision_score((1.0 - valid_y[:, :, -1]).flatten() > 0, preds.flatten()))

In [None]:
# Bidirectional LSTM
class CFG:
    exp = '001'
    target_size = 2048
    lr = 3e-4
    early_stop_patience = 30
    epoch = 100
    batch_size = 32
    model_no = 1
    use_percentile_feat = False
    train = train
    
if CFG.train:
    save_resized_data('tdcsfog', CFG.target_size)
    save_resized_data('defog', CFG.target_size)
    save_resized_data('notype', CFG.target_size)
    
    gpu_strategy = tf.distribute.get_strategy()
    with gpu_strategy.scope():
        valid_x, valid_y, _ = get_data(True)
        train_x, train_y, _ = get_data(False)

        model = load_bidLSTM(train_x.shape[1], train_x.shape[2], 1024, 512, 256, 128, 128)
        scheduler = CosineDecay(CFG.lr, CFG.epoch * (train_x.shape[0] // CFG.batch_size))
        optimizer = keras.optimizers.Adam(learning_rate = scheduler)
        model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=loss_func)

        callbacks = []
        callbacks.append(
            EarlyStopping(
                monitor = "val_loss_func",
                patience = CFG.early_stop_patience,
                verbose =1,
                mode = "min",
                restore_best_weights = True))

        model.fit(train_x, train_y, validation_data=(valid_x, valid_y), epochs=CFG.epoch, batch_size=CFG.batch_size, callbacks=callbacks) 
        save_locally = tf.saved_model.SaveOptions(experimental_io_device='/job:localhost')
        model.save(f'model-{CFG.model_no}', options=save_locally)

        preds = 1.0 - model.predict(valid_x, batch_size=CFG.batch_size, verbose=2)[:, :, -1]
        # It is not a full size score. If you want to see the actual score, you need to resize it.
        print(average_precision_score((1.0 - valid_y[:, :, -1]).flatten() > 0, preds.flatten()))

In [None]:
# 1dCNN (percentile features are added)
class CFG:
    exp = '001'
    target_size = 2048
    lr = 3e-4
    early_stop_patience = 30
    epoch = 100
    batch_size = 16
    model_no = 2
    use_percentile_feat = True
    train = train
    
if CFG.train:
    save_resized_data('tdcsfog', CFG.target_size)
    save_resized_data('defog', CFG.target_size)
    save_resized_data('notype', CFG.target_size)
    
    gpu_strategy = tf.distribute.get_strategy()
    with gpu_strategy.scope():
        valid_x, valid_y, _ = get_data(True)
        train_x, train_y, _ = get_data(False)

        model = load_bidLSTM(train_x.shape[1], train_x.shape[2], 1024, 512, 256, 128, 128)
        scheduler = CosineDecay(CFG.lr, CFG.epoch * (train_x.shape[0] // CFG.batch_size))
        optimizer = keras.optimizers.Adam(learning_rate = scheduler)
        model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=loss_func)

        callbacks = []
        callbacks.append(
            EarlyStopping(
                monitor = "val_loss_func",
                patience = CFG.early_stop_patience,
                verbose =1,
                mode = "min",
                restore_best_weights = True))

        model.fit(train_x, train_y, validation_data=(valid_x, valid_y), epochs=CFG.epoch, batch_size=CFG.batch_size, callbacks=callbacks) 
        save_locally = tf.saved_model.SaveOptions(experimental_io_device='/job:localhost')
        model.save(f'model-{CFG.model_no}', options=save_locally)

        preds = 1.0 - model.predict(valid_x, batch_size=CFG.batch_size, verbose=2)[:, :, -1]
        # It is not a full size score. If you want to see the actual score, you need to resize it.
        print(average_precision_score((1.0 - valid_y[:, :, -1]).flatten() > 0, preds.flatten()))

In [None]:
# Long1dCNN
class CFG:
    exp = '001'
    target_size = 4096
    lr = 3e-4
    early_stop_patience = 30
    epoch = 100
    batch_size = 16
    model_no = 3
    use_percentile_feat = False
    train = train
    
if CFG.train:
    save_resized_data('tdcsfog', CFG.target_size)
    save_resized_data('defog', CFG.target_size)
    save_resized_data('notype', CFG.target_size)
    
    gpu_strategy = tf.distribute.get_strategy()
    with gpu_strategy.scope():
        valid_x, valid_y, _ = get_data(True)
        train_x, train_y, _ = get_data(False)

        model = load_1dCNN(train_x.shape[1], train_x.shape[2])
        scheduler = CosineDecay(CFG.lr, CFG.epoch * (train_x.shape[0] // CFG.batch_size))
        optimizer = keras.optimizers.Adam(learning_rate = scheduler)
        model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=loss_func)

        callbacks = []
        callbacks.append(
            EarlyStopping(
                monitor = "val_loss_func",
                patience = CFG.early_stop_patience,
                verbose =1,
                mode = "min",
                restore_best_weights = True))

        model.fit(train_x, train_y, validation_data=(valid_x, valid_y), epochs=CFG.epoch, batch_size=CFG.batch_size, callbacks=callbacks) 
        save_locally = tf.saved_model.SaveOptions(experimental_io_device='/job:localhost')
        model.save(f'model-{CFG.model_no}', options=save_locally)

        preds = 1.0 - model.predict(valid_x, batch_size=CFG.batch_size, verbose=2)[:, :, -1]
        # It is not a full size score. If you want to see the actual score, you need to resize it.
        print(average_precision_score((1.0 - valid_y[:, :, -1]).flatten() > 0, preds.flatten()))

## Inference
I chosen models that score of NoEvent are higher than 0.28.

In [None]:
preds= []
expid = '035'

In [None]:
# model 0: 1dCNN
class CFG:
    exp = expid
    target_size = 2048
    use_percentile_feat = False
    model_no = 0
    batch_size = 32
    train = train
    
save_resized_data('tdcsfog', CFG.target_size)
save_resized_data('defog', CFG.target_size)

test_x, names = get_data(False)

gpu_strategy = tf.distribute.get_strategy()
with gpu_strategy.scope():
    model = keras.models.load_model(f"/kaggle/input/pfgp-exp{CFG.exp}/model-{CFG.model_no}", custom_objects={'loss_func':loss_func})
    preds.append(model.predict(test_x, batch_size=CFG.batch_size, verbose=2))
        
del test_x
gc.collect()

In [None]:
# model 3: BiLSTM
class CFG:
    exp = expid
    target_size = 2048
    use_percentile_feat = False
    model_no = 3
    batch_size = 32
    train = train
    
save_resized_data('tdcsfog', CFG.target_size)
save_resized_data('defog', CFG.target_size)
test_x, names = get_data(False)

gpu_strategy = tf.distribute.get_strategy()
with gpu_strategy.scope():
    model = keras.models.load_model(f"/kaggle/input/pfgp-exp{CFG.exp}/model-{CFG.model_no}", custom_objects={'loss_func':loss_func})
    preds.append(model.predict(test_x, batch_size=CFG.batch_size, verbose=2))
        
del test_x
gc.collect()

In [None]:
# 1dCNN (percentile features are added)
class CFG:
    exp = expid
    target_size = 2048
    use_percentile_feat = True
    model_no = 5
    batch_size = 32
    train = train
    
save_resized_data('tdcsfog', CFG.target_size)
save_resized_data('defog', CFG.target_size)
test_x, names = get_data(False)

gpu_strategy = tf.distribute.get_strategy()
with gpu_strategy.scope():
    model = keras.models.load_model(f"/kaggle/input/pfgp-exp{CFG.exp}/model-{CFG.model_no}", custom_objects={'loss_func':loss_func})
    preds.append(model.predict(test_x, batch_size=CFG.batch_size, verbose=2))
        
del test_x
gc.collect()

In [None]:
# model 7: long 1dCNN
class CFG:
    exp = expid
    target_size = 4096
    use_percentile_feat = False
    model_no = 7
    batch_size = 16
    train = train
    
save_resized_data('tdcsfog', CFG.target_size)
save_resized_data('defog', CFG.target_size)
test_x, names = get_data(False)

gpu_strategy = tf.distribute.get_strategy()
with gpu_strategy.scope():
    model = keras.models.load_model(f"/kaggle/input/pfgp-exp{CFG.exp}/model-{CFG.model_no}", custom_objects={'loss_func':loss_func})
    preds.append(model.predict(test_x, batch_size=CFG.batch_size, verbose=2))
        
del test_x
gc.collect()

## Ensemble

In [None]:
tcols = ['StartHesitation', 'Turn', 'Walking']
scols = ['Id', 'StartHesitation', 'Turn' , 'Walking']

def reconst_sequence(x, target_size):
    res = np.array([
        ndi.zoom(x[:, 0], zoom=target_size/len(x), mode='nearest'),
        ndi.zoom(x[:, 1], zoom=target_size/len(x), mode='nearest'),
        ndi.zoom(x[:, 2], zoom=target_size/len(x), mode='nearest')])
    return res

dfs = []
for p in tqdm(preds):
    dfi = []
    for pi, ni in zip(p, names):
        ori = pd.read_csv(f"{DATA_DIR}test/{ni}")

        res = reconst_sequence(pi, len(ori))
        df = pd.DataFrame(res.transpose(), columns=tcols)
        df['Id'] = ni.split('/')[-1].replace('.csv', '') + '_' + df.index.astype(str)
        dfi.append(df[scols])
        
    dfs.append(pd.concat(dfi))

for c in tcols:
    dfs[0][c] = np.mean([dfs[i][c].values for i in range(len(preds))], axis=0)

In [None]:
sub = pd.read_csv(f'{DATA_DIR}sample_submission.csv')
sub['t'] = 0
res = pd.merge(sub[['Id','t']], dfs[0], how='left', on='Id').fillna(0.0)
res[scols].to_csv('submission.csv', index=False)

In [None]:
subs = pd.read_csv('submission.csv')
subs