In [61]:
# This is the template for the submission. If you want, you can develop your algorithm in a regular Python script and copy the code here for submission.

# Team members (e-mail, legi):
# chozhang@ethz.ch, 22-945-562
# minghli@student.ethz.ch, 22-953-293
# changli@student.ethz.ch, 22-944-474

In [62]:
import pandas as pd
import numpy as np
from scipy import signal

from Lilygo.Recording import Recording
from Lilygo.Dataset import Dataset
from os import listdir
from os.path import isfile, join

# import matplotlib.pyplot as plt
# %matplotlib inline

In [63]:
# Get the path of all traces
dir_traces = '/kaggle/input/mobile-health-2023-step-count/data/traces'
filenames = [join(dir_traces, f) for f in listdir(dir_traces) if isfile(join(dir_traces, f))]
filenames.sort()

In [64]:
### signal processing functions ###
def parse(signal, ds_freq:float=20.0, zero_mean:bool=False):
    """downsampling the signal to specific frequency ds_freq, and make the data with zero mean if zero_mean is True"""
    ori_time_seq = np.array(signal.timestamps)
    ori_value_seq = np.array(signal.values)
    if zero_mean: ori_value_seq = ori_value_seq - np.mean(ori_value_seq)
    dt = 1./ds_freq
    time_seq = np.arange(start=np.min(ori_time_seq), stop=np.max(ori_time_seq), step=dt)
    value_seq = np.interp(time_seq, ori_time_seq, ori_value_seq)
    return time_seq, value_seq
    
def bp_filter(value_seq, fp:float=3, fs:float=20.0):
    """apply band pass filter to the sequence. fp is the threshold frequency, and fs is the sampling frequency."""
    sos = signal.butter(N=4, Wn=[0.5,fp], btype='bandpass', fs=fs, output='sos')
    filtered = signal.sosfilt(sos, value_seq)
    return filtered
    
def get_envelop(value_seq, fs:float=20, half_window_size:float=0.5, _min:float=20., _max:float=500.):
    """
    get the envelop as the adaptive local norm of the signal, currently the mode of vector (no negative values).
    the envelop is calculated by the maximum in a window, half_window_size is the seconds of time.
    _min and _max for clip. designed for gyro. not tuned for acceleration.
    """
    half_win = int(fs*half_window_size)
    seq = np.concatenate([np.zeros((half_win,)),value_seq,np.zeros((half_win,))])
    envelop = np.array([np.max(seq[k-half_win:k+half_win+1]) for k in range(half_win,half_win+len(value_seq))])
    return np.clip(envelop, _min, _max)

In [65]:
# Loop through all traces and calculate the step count for each trace
stepCounts = []
ids = []
file_id = 0
# plt.figure(figsize=(15.0,20.0))
# plt.subplots(20,1, sharex=True)
for filename in filenames:
    trace = Recording(filename, no_labels=True, mute=True)
    stepCount = 0  # This is your variable of the step counting that you need to change

    #
    # Your algorithm goes here
    # Make sure, you only use data from the LilyGo Wristband, namely the following 10 keys (as in trace.data[key]):
    # 3-axis accelerometer: key in [ax, ay, az]
    # 3-axis gyro: key in [gx, gy, gz]
    # 3-axis magnetometer: key in [mx, my, mz]
    # IMU temperature: key==temperature
    #
    data = trace.data
    ax, ay, az, gx, gy, gz, mx, my, mz, temp = \
        data['ax'], data['ay'],data['az'],data['gx'], data['gy'],data['gz'],data['mx'], data['my'],data['mz'],data['temperature']
    # interval of m and temp: 80ms; others 50ms
    # acc are in unit "g". gyro should be within -255, 255
    g_t, gx_v = parse(ax)  # use acceleration seems better.
    _, gy_v = parse(ay)
    _, gz_v = parse(az)
    g_v = np.sqrt(np.sum(np.square([gx_v,gy_v,gz_v]),axis=0))  # calculate the mode.
#     plt.subplot(20,1,file_id*2+1)
#     plt.plot(g_t,g_v)
#     plt.plot(g_t,get_envelop(g_v))
#     plt.xlim([20,40])
    
    g_v /= get_envelop(g_v)  # an adaptive local norm
    filtered_gv = bp_filter(g_v)  # band pass it.
    filtered_gv = filtered_gv * (filtered_gv>0) * 4  # amp 1/4 after filtering, should be amplified 4x.
#     plt.subplot(20,1,file_id*2+2)
#     plt.plot(g_t,filtered_gv)
#     plt.hlines(0.3, 0,np.max(g_t),color='r')
#     plt.xlim([20,40])
#     file_id+= 1
    
    peaks, _ = signal.find_peaks(filtered_gv, height=0.01, distance=20*0.2)  # 0.5 optimal for gyro. not tuned for acc but I am lazy.
#     plt.plot(peaks*0.05, filtered_gv[peaks], "x")
    stepCount = len(peaks)  # peaks are the steps.
    print(stepCount)
    
    # Append your calculated step counts and the id of the trace to the corresponding array
    stepCounts.append(stepCount)
    ids.append(filename.split('_')[1][:2])

102
202
104
104
107
115
131
44
158
75


In [66]:
# Write the detected step counts into a .csv file to then upload the .csv file to Kaggle
# When cross-checking the .csv file on your computer, we recommend using the text editor and NOT excel so that the results are displayed correctly
# IMPORTANT: Do NOT change the name of the columns ('Id' and 'Predicted') of the .csv file
df = pd.DataFrame({'Id':ids,'Predicted':stepCounts})
df.to_csv('/kaggle/working/submission.csv', index=False)