In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams['font.family'] = 'DejaVu Sans Mono'

In [6]:
from scipy.interpolate import interp1d
from sklearn.preprocessing import LabelEncoder
import itertools

class Preprocessor():
    def __init__(self, gap_max=1.0, seg_dur=5.0, ol_rate=0.5, resamp_gap=0.01):
        self.gap_max = gap_max
        self.seg_dur = seg_dur
        self.ol_rate = ol_rate
        self.resamp_gap = resamp_gap

    def transform(self, df):
        gap_max = self.gap_max
        seg_dur = self.seg_dur
        ol_rate = self.ol_rate
        resamp_gap = self.resamp_gap

        users = sorted(df['user'].unique())
        activities = sorted(df['activity'].unique())

        segments = []
        seg_usrs = []
        seg_acts = []

        for user, activity in itertools.product(users, activities):
            mask_ua = (df['user'] == user) & (df['activity'] == activity)
            if not mask_ua.any():
                continue
            cpnt_nums = np.cumsum( (df.loc[mask_ua, 'timestamp'].diff() > gap_max) | (df.loc[mask_ua, 'timestamp'].diff() < 0) )
            for num in range(cpnt_nums.min(), cpnt_nums.max()+1):
                cpnt = df[mask_ua][cpnt_nums == num]
                cpnt['timestamp'] -= cpnt['timestamp'].min()
                if cpnt['timestamp'].max() < seg_dur:
                    continue
                f = interp1d(cpnt['timestamp'], cpnt[['ax', 'ay', 'az']], axis=0, kind='cubic')
                for i in np.arange( (cpnt['timestamp'].max()-seg_dur)/(seg_dur*(1-ol_rate)), dtype=np.int32 ):
                    t = np.arange(0, seg_dur, resamp_gap) + i*seg_dur*(1-ol_rate)
                    segments.append( f(t) ), seg_usrs.append( user ), seg_acts.append( activity )
        segments = np.array(segments); seg_usrs = np.array(seg_usrs)

        X = segments
        label_encoder = LabelEncoder()
        y = label_encoder.fit_transform(seg_acts)

        self.seg_usrs = seg_usrs
        self.label_encoder = label_encoder
        return X, y

df = pd.read_csv(
    '../input/wisdm-ar-v11/WISDM_ar_v1.1_raw_modified.txt', 
    names=['user', 'activity', 'timestamp', 'ax', 'ay', 'az'],
    header=None)
df['timestamp'] *= 1e-9
df = df[df['timestamp'] != 0]
df = df[~df['timestamp'].duplicated()]
df = df.dropna()
df = df.reset_index(drop=True)

users = sorted(df['user'].unique())
activities = sorted(df['activity'].unique())

In [7]:
from tools import Embedder, SineFilter

key = (5, 4, 0, 5, 'identity')
seg_dur, lag, reduce, dim_raw, method = key
preprocessor = Preprocessor(seg_dur=seg_dur)
X, y = preprocessor.transform(df)
embedder = Embedder(lag=lag, reduce=reduce, dim_raw=dim_raw, channel_last=True)
pts = embedder.transform(X)
# weights = np.load(f'./output/w_WISDM/{key}_20221110.npy')
# # weights = np.ones(pts.shape[:-1] + (1, ))
# sine_filter = SineFilter(dim=pts.shape[-1], n_filters=16, random_state=42)
# scale = np.average(np.linalg.norm(pts, axis=-1, keepdims=True), axis=-2, weights=weights/np.sum(weights, axis=-2, keepdims=True))[...,np.newaxis]
# fs = sine_filter.apply(pts/scale, weights)
fs = np.empty_like(pts)[..., :1]

In [8]:
import tensorflow as tf
from tensorflow import keras
import random

class MyModel(keras.Model):
    def __init__(self, n_classes, use_weights=False):
        super(MyModel, self).__init__()
        self.n_classes = n_classes

        self.main_layers = [
            keras.layers.GRU(
                units=256,
                return_sequences=True
            ), 
            keras.layers.Conv1D(
                filters=128,
                kernel_size=8,
                activation='elu'
            ),
            keras.layers.Conv1D(
                filters=64,
                kernel_size=8,
                activation='elu'
            ),
            keras.layers.AveragePooling1D(
                pool_size=2
            ),
            keras.layers.Conv1D(
                filters=32,
                kernel_size=8,
                activation='elu'
            ),
            keras.layers.AveragePooling1D(
                pool_size=2
            ),
            keras.layers.Conv1D(
                filters=16,
                kernel_size=8,
                activation='elu'
            ),
            keras.layers.AveragePooling1D(
                pool_size=2
            ),

            keras.layers.Flatten(),
            keras.layers.BatchNormalization(),
            keras.layers.Dense(
                units=16,
                activation='elu'
            ),
            keras.layers.BatchNormalization()
        ]
        self.aux_layers = [
            keras.layers.Flatten(),
            keras.layers.Dense(
                units=16,
                activation='elu'
            )
        ]
        self.final_layers = [
            keras.layers.Dense(
                units=n_classes,
                activation='softmax'
            )
        ]
        self.use_weights = use_weights
    
    def call(self, inputs, training=None):
        outputs, weights = inputs
        for layer in self.main_layers:
            outputs = layer(outputs, training=training)
        for layer in self.aux_layers:
            weights = layer(weights, training=training)
        if self.use_weights:
            outputs = tf.concat([outputs, weights], axis=-1)
        for layer in self.final_layers:
            outputs = layer(outputs, training=training)
        return outputs

rng = np.random.default_rng(42)
usrs_test = rng.choice(users, 12, replace=False)
usrs_val = usrs_test[:6]
usrs_test = usrs_test[6:]
mask_test = np.isin(preprocessor.seg_usrs, usrs_test)
mask_val = np.isin(preprocessor.seg_usrs, usrs_val)
mask_train = ~(mask_val | mask_test)

callbacks = [
    keras.callbacks.EarlyStopping(
        patience=20, 
        min_delta=1e-4,
        restore_best_weights=True
    )
]

keras.backend.clear_session()
for use_weights in (False, ):
    tf.random.set_seed(42); random.seed(42); np.random.seed(42)

    model = MyModel(n_classes=6, use_weights=use_weights)
    model.compile(
        loss='sparse_categorical_crossentropy',
        optimizer=keras.optimizers.Adam(1e-3),
        metrics=['accuracy']
    )
    model.fit(
        (X[mask_train], fs[mask_train]), 
        y[mask_train],
        epochs=50,
        batch_size=1<<10,
        callbacks=callbacks,
        validation_data=((X[mask_val], fs[mask_val]), y[mask_val]),
    )
    model.evaluate((X[mask_test], fs[mask_test]), y[mask_test])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
