In [None]:
# This is the template for the submission. If you want, you can develop your algorithm in a regular Python script and copy the code here for submission.

# Team members (e-mail, legi):
# chozhang@student.ethz.ch, 22-945-562
# minghli@student.ethz.ch, 22-953-293
# changli@student.ethz.ch, 22-944-474

In [1]:
from typing import Sequence

In [3]:
import os
import sys
curr_environ = os.environ.get('KAGGLE_KERNEL_RUN_TYPE', 'Localhost')
if curr_environ != 'Localhost': 
    sys.path.append('/kaggle/input/mobile-health-2023-path-detection')
    input_dir = '/kaggle/input/mobile-health-2023-path-detection'
else:
    input_dir = os.path.abspath('')

In [5]:
import numpy as np
import pandas as pd

from Lilygo.Recording import Recording
from Lilygo.Dataset import Dataset
from os import listdir
from os.path import isfile, join

In [6]:
import matplotlib.pyplot as plt

In [47]:
# for signal processing and calculations
from scipy import signal

# for tuning parameters
from sklearn.model_selection import GridSearchCV, cross_val_score
from sklearn.base import BaseEstimator
from sklearn.ensemble import RandomForestClassifier

In [13]:
### signal processing functions ###
def parse(signal, ds_freq:float=20.0, zero_mean:bool=False):
    """downsampling the signal to specific frequency ds_freq, and make the data
     with zero mean if zero_mean is True"""
    ori_time_seq = np.array(signal.timestamps)
    ori_value_seq = np.array(signal.values)
    if zero_mean: ori_value_seq = ori_value_seq - np.mean(ori_value_seq)
    dt = 1./ds_freq
    time_seq = np.arange(start=np.min(ori_time_seq), stop=np.max(ori_time_seq), step=dt)
    value_seq = np.interp(time_seq, ori_time_seq, ori_value_seq)
    return time_seq, value_seq
    
def bp_filter(value_seq, fp:float=3, fs:float=20.0):
    """apply band pass filter to the sequence. fp is the threshold frequency,
     and fs is the sampling frequency."""
    sos = signal.butter(N=4, Wn=[0.5,fp], btype='bandpass', fs=fs, output='sos')
    filtered = signal.sosfilt(sos, value_seq)
    return filtered

def lp_filter(value_seq, alpha=0.95):
    x = value_seq[0]
    a = []
    for v in value_seq:
        x = x * alpha + v * (1-alpha)
        a.append(x)    
    return np.array(a)
    
def get_envelop(value_seq, 
                fs:float=20, 
                half_window_size:float=0.5, 
                _min:float=20., 
                _max:float=500.):
    """
    get the envelop as the adaptive local norm of the signal, currently the mode
     of vector (no negative values). The envelop is calculated by the maximum in
     a window, half_window_size is the seconds of time. _min and _max for clip.
    """
    half_win = int(fs*half_window_size)
    seq = np.concatenate([np.zeros((half_win,)),value_seq,np.zeros((half_win,))])
    envelop = np.array([np.max(seq[k-half_win:k+half_win+1]) 
                        for k in range(half_win,half_win+len(value_seq))])
    return np.clip(envelop, _min, _max)

def split_fea(t, v, n_split, fn = lambda x: np.percentile(x, 50)):
    tmin, tmax = np.min(t)-1e-8, np.max(t)+1e-8
    t_split = np.linspace(tmin, tmax, n_split+1, endpoint=True)
    outputs = []
    for i in range(n_split):
        start, end = t_split[i], t_split[i+1]
        array = v[(t>start) & (t<end)]
        fea = fn(array)
        outputs.append(fea)
    return outputs

# Tasks

## Step Count

---

## Path Index

All traces must contain GPS data, so can use this for sanity check.

In [156]:
from scipy.spatial.transform import Rotation

def madgwick_update(q, gyro, accel, mag, beta, dt):
    q = np.array([q[0], q[1], q[2], q[3]])

    f_g = np.array([2 * (q[1] * q[3] - q[0] * q[2]) - accel[0],
                    2 * (q[0] * q[1] + q[2] * q[3]) - accel[1],
                    2 * (0.5 - q[1]**2 - q[2]**2) - accel[2]])

    f_b = np.array([2 * (q[1] * q[2] + q[0] * q[3]) - mag[0],
                    2 * (q[0] * q[1] - q[2] * q[3]) - mag[1],
                    2 * (q[0]**2 + q[3]**2 - 0.5) - mag[2]])

    j_g = np.array([[-2 * q[2], 2 * q[3], -2 * q[0], 2 * q[1]],
                    [2 * q[1],  2 * q[0],  2 * q[3], 2 * q[2]],
                    [0,         -4 * q[1], -4 * q[2],0]])

    j_b = np.array([[2 * q[3],  2 * q[2],  2 * q[1], 2 * q[0]],
                    [2 * q[0],  -2 * q[1], -2 * q[2],2 * q[3]],
                    [-4 * q[0], -4 * q[3],  0,       0]])

    step_size = beta * dt
    q += step_size * (j_g.T @ f_g + j_b.T @ f_b)
    q /= np.linalg.norm(q)
    return q

class PathDetector(BaseEstimator):
    # hack labels via GPS, delete later
    def __init__(self):
        pass
    
    def extract(self,trace):
        data = trace.data
        fea_alti = self.get_fea_alti(data['altitude'])
        fea_ori = self.get_fea_ori(data['ax'],data['ay'],data['az'],data['gx'],data['gy'],data['gz'],data['mx'],data['my'],data['mz'])
        fea = fea_alti + fea_ori
        return fea

    # required
    def fit(self, data, labels):
        self.clf = RandomForestClassifier(min_samples_leaf=10, max_depth=15, max_features=12)
        self.clf.fit(data, labels)
        print('fit score', self.clf.score(data,labels))

    # required    
    def predict(self, feature):
        fea = [feature]
        res = self.clf.predict(fea)[0]
        return res
    
    def get_fea_alti(self, alti):
        t, alti = parse(alti)
        t = t[600:]
        alti = alti[600:]
        alti = lp_filter(alti, .998)
        alti = alti - np.min(alti)
        alti[alti>60] = 60
        alti /= 60.0
        t = (t-np.min(t)) / (np.max(t)-np.min(t))
        fea_alti = split_fea(t, alti, 5, lambda x: np.nanpercentile(x,50))
        return fea_alti
    
    def get_fea_ori(self, ax, ay, az, gx, gy, gz, mx, my, mz):
        deg2rad = np.pi/180
        t, ax = parse(ax)
        t, ay = parse(ay)
        t, az = parse(az)
        t, gx = parse(gx)
        t, gy = parse(gy)
        t, gz = parse(gz)
        t, mx = parse(mx)
        t, my = parse(my)
        t, mz = parse(mz)
        gx, gy, gz = gx * deg2rad, gy * deg2rad, gz * deg2rad
        acc = np.array([ax,ay,az])
        gyro = np.array([gx,gy,gz])
        mag = np.array([mx,my,mz])

        acc /= (1e-8 + np.linalg.norm(acc,ord=2,axis=0,keepdims=True))
        mag /= (1e-8 + np.linalg.norm(mag,ord=2,axis=0,keepdims=True))

        dt = t[1] - t[0]
        beta = 0.1  # Madgwick filter gain
        
        ref_acc_vector = np.array([[0, 0, 1],[0, 0, 1],[0, 0, 1]])  # a - g for a = 0
        rotation_matrix = Rotation.align_vectors(ref_acc_vector, [acc[:,0],acc[:,1],acc[:,2]])[0].as_matrix()
        q = Rotation.from_matrix(rotation_matrix).as_quat()
        
        quats = []
        yaws = []
        for i in range(len(t)):
            q = madgwick_update(q, gyro[:,i], acc[:,i], mag[:,i], beta, dt)
            quats.append(q)
            try:
                yaw = Rotation.from_quat(q).as_euler('xyz')[-1]
            except: 
                yaw = 0
            yaws.append(yaw)
        t, yaws = np.array(t[400:]), np.array(yaws[400:])
        yaws -= yaws[0]  # sometime can jump 2pi
        sinyaws, cosyaws = np.sin(yaws), np.cos(yaws)
        sinyaws, cosyaws = lp_filter(sinyaws, .97), lp_filter(cosyaws, .97)
        fea_sinyaw = split_fea(t, sinyaws, 10)
        fea_cosyaw = split_fea(t, cosyaws, 10)
        fea_ori = fea_sinyaw + fea_cosyaw
        return fea_ori

path_detector = PathDetector()

In [126]:
dir_traces_train = '/kaggle/input/mobile-health-2023-path-detection/data/train'
filenames_train = [join(dir_traces_train, f) for f in listdir(dir_traces_train) if isfile(join(dir_traces_train, f))]
filenames_train.sort()
labels = []
features = []
for f in filenames_train:
    trace = Recording(f, no_labels=False, mute=True)
    label = trace.labels['path_idx']
    labels.append(label)
    fea = path_detector.extract(trace)
    features.append(fea)
    print(f, label, len(fea), end = '     \r')
    
labels = np.array(labels)
features = np.array(features)

  rotation_matrix = Rotation.align_vectors(ref_acc_vector, [acc[:,0],acc[:,1],acc[:,2]])[0].as_matrix()


/kaggle/input/mobile-health-2023-path-detection/data/train/train_trace_270.json 1 25     

NameError: name 'shape' is not defined

In [127]:
np.save('pathfea.npy', features)
np.save('pathlabels.npy', labels)
print(features.shape, labels.shape)

(263, 25) (263,)


In [157]:
features = np.load('pathfea.npy')
labels = np.load('pathlabels.npy')

# clf = RandomForestClassifier(min_samples_leaf=10, max_depth=15, max_features=12)
# scores = cross_val_score(clf, features, labels, cv=5, scoring='accuracy')
# print(scores, np.mean(scores), np.std(scores))
path_detector.fit(features, labels)
path_detector.predict(features[0])

fit score 0.8745247148288974


4

In [158]:
# Get the path of all traces
dir_traces = '/kaggle/input/mobile-health-2023-path-detection/data/test'
filenames = [join(dir_traces, f) for f in listdir(dir_traces) if isfile(join(dir_traces, f))]
filenames.sort()

pathdec_fea_test = []
for i, filename in enumerate(filenames):
    trace = Recording(filename, no_labels=True, mute=True)
    pathdec_fea_test.append(path_detector.extract(trace))
    print(filename, end = '   \r')
pathdec_fea_test = np.array(pathdec_fea_test)
print(pathdec_fea_test.shape)

np.save('pathfea_test.npy', pathdec_fea_test)

  rotation_matrix = Rotation.align_vectors(ref_acc_vector, [acc[:,0],acc[:,1],acc[:,2]])[0].as_matrix()


(376, 25)nput/mobile-health-2023-path-detection/data/test/test_trace_385.json   


In [160]:
pathdec_fea_test = np.load('pathfea_test.npy')

---

# Prediction

In [161]:
# Get the path of all traces
dir_traces = '/kaggle/input/mobile-health-2023-path-detection/data/test'
filenames = [join(dir_traces, f) for f in listdir(dir_traces) if isfile(join(dir_traces, f))]
filenames.sort()

In [None]:
# initialize predictors
# step_counter = StepCounter()

In [162]:
# Loop through all traces and calculate the step count for each trace
solution_file = []
for i, filename in enumerate(filenames):
#     trace = Recording(filename, no_labels=True, mute=True)
    categorization_results = {'watch_loc': 114514, 'path_idx': 0, 'step_count': 1919810, 'stand': 114514, 'walk': 114514, 'run': 114514, 'cycle': 114514}

    # Your algorithm goes here
    categorization_results['path_idx'] = path_detector.predict(pathdec_fea_test[i])

    # Append your calculated results and the id of each trace and category to the solution file
    trace_id = ''.join([*filename][-8:-5])
    for counter_label, category in enumerate(categorization_results):
        solution_file.append([trace_id + f'_{counter_label+1}', categorization_results[category]])


In [163]:
# Write the detected step counts into a .csv file to then upload the .csv file to Kaggle
# When cross-checking the .csv file on your computer, we recommend using the text editor and NOT excel so that the results are displayed correctly
# IMPORTANT: Do NOT change the name of the columns ('Id' and 'Category') of the .csv file
submission_file_df = pd.DataFrame(np.asarray(solution_file), columns=['Id', 'Category'])
submission_file_df.to_csv('/kaggle/working/submission_path.csv', header=['Id', 'Category'], index=False)