#### This notebook train a simple Logistic Regression model on accelerometer data aggregated from X, Y, Z in different windows size.


In [1]:
import os
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression

In [2]:
from collections import Counter
def extract_features_from_window(x, y, z, action):
    # Compute magnitude
    mag = np.sqrt(x**2 + y**2 + z**2)

    # Features
    mean_mag = np.mean(mag)
    std_mag = np.std(mag)
    max_mag = np.max(mag)
    energy = np.sum(mag**2)
    # peaks, _ = find_peaks(mag)
    # num_peaks = len(peaks)

    ACTION = Counter(action).most_common()[0][0]
    return {
        "mean_magnitude": mean_mag,
        "std_magnitude": std_mag,
        "max_magnitude": max_mag,
        "energy": energy,
        "action": ACTION
        # "num_peaks": num_peaks
    }

In [3]:
def process_csv(df, window_size=20, step_size=5):
    features_list = []

    for start in range(0, len(df) - window_size + 1, step_size):
        window = df.iloc[start:start + window_size]
        features = extract_features_from_window(window['X'].values, window['Y'].values, window['Z'].values, window['ACTION'].values)
        features_list.append(features)
    return pd.DataFrame(features_list)

In [4]:
ACTIONS = ['BENDING_DOWN', 'FALLING_BACKWARDS', 'FALLING_FORWARD', 'HAND_WAVING', 'SITTING', 'WALKING']
df = []
DATA_DIR = '../data'
for action in ACTIONS:
    no_csv = len(os.listdir(f'{DATA_DIR}/{action}'))
    action_df = pd.concat([pd.read_csv(f'{DATA_DIR}/{action}/{action}_{i}.csv') for i in range(1, no_csv + 1, 1)]) 
    df.append(action_df)
df = pd.concat(df)

In [47]:
df_bending_down = df[df['ACTION'] == 'BENDING_DOWN']
df_falling_bacwards = df[df['ACTION'] == 'FALLING_BACKWARDS']
df_falling_forward = df[df['ACTION'] == 'FALLING_FORWARD']
df_hand_waving = df[df['ACTION'] == 'HAND_WAVING']
df_sitting = df[df['ACTION'] == 'SITTING']
df_walking = df[df['ACTION'] == 'WALKING']

#### In training we overlap window size by 10% to augment the sample count for training.

In [52]:
window_size = 40
step_size = 40
cutoff_value = 300
df_bending_down_train = process_csv(df_bending_down.iloc[:-cutoff_value], window_size, step_size)
df_falling_bacwards_train = process_csv(df_falling_bacwards.iloc[:-cutoff_value], window_size, step_size)
df_falling_forward_train = process_csv(df_falling_forward.iloc[:-cutoff_value], window_size, step_size)
df_hand_waving_train = process_csv(df_hand_waving.iloc[:-cutoff_value], window_size, step_size)
df_sitting_train = process_csv(df_sitting.iloc[:-cutoff_value], window_size, step_size)
df_walking_train = process_csv(df_walking.iloc[:-cutoff_value], window_size, step_size)

#### In testing we don't set any window overlap.

In [7]:
step_size = 40
df_bending_down_val = process_csv(df_bending_down.iloc[-cutoff_value:], window_size, step_size)
df_falling_bacwards_val = process_csv(df_falling_bacwards.iloc[-cutoff_value:], window_size, step_size)
df_falling_forward_val= process_csv(df_falling_forward.iloc[-cutoff_value:], window_size, step_size)
df_hand_waving_val = process_csv(df_hand_waving.iloc[-cutoff_value:], window_size, step_size)
df_sitting_val = process_csv(df_sitting.iloc[-cutoff_value:], window_size, step_size)
df_walking_val = process_csv(df_walking.iloc[-cutoff_value:], window_size, step_size)

In [8]:
df.groupby('ACTION').size() // 20

ACTION
BENDING_DOWN          46
FALLING_BACKWARDS     40
FALLING_FORWARD       38
HAND_WAVING          119
SITTING               48
WALKING               69
dtype: int64

#### Combine all action data and balance the action distribution

In [9]:
df_train = pd.concat([
                df_bending_down_train[:20],
                df_falling_bacwards_train,
                df_falling_forward_train,
                df_hand_waving_train[:20],
                df_sitting_train[:20],
                df_walking_train[:20]
                ], axis=0)

#### Convert Action into 0 and 1

bending_down: 0 <br>
hand_waving: 0 <br>
sitting: 0 <br>
walking: 0 <br>

falling_backwards: 1 <br>
falling_forward: 1 <br>

In [10]:
df_train['action'] = df_train['action'].apply(lambda x: 1 if "FALL" in x else 0)

In [11]:
df_train['action'].value_counts()

1    90
0    80
Name: action, dtype: int64

In [23]:
from sklearn.metrics import f1_score, accuracy_score, classification_report

lr = LogisticRegression()
print('Training performance')
model = lr.fit(df_train.iloc[:, :-1].values, df_train.iloc[:, -1].values)
y_true = df_train.iloc[:, -1].values
y_pred = model.predict(df_train.iloc[:, :-1].values)
print(f'Accuracy: {accuracy_score(y_true, y_pred)}, F1: {f1_score(y_true, y_pred)}')
print(classification_report(y_true, y_pred))

Training performance
Accuracy: 0.7588235294117647, F1: 0.7807486631016044
              precision    recall  f1-score   support

           0       0.77      0.70      0.73        80
           1       0.75      0.81      0.78        90

    accuracy                           0.76       170
   macro avg       0.76      0.76      0.76       170
weighted avg       0.76      0.76      0.76       170



In [25]:
df_val.shape

(21, 5)

In [14]:
df_val = pd.concat([
                # df_bending_down_val,
                df_falling_bacwards_val,
                df_falling_forward_val,
                # df_hand_waving_val,
                # df_sitting_val,
                df_walking_val
                ], axis=0)
df_val['action'] = df_val['action'].apply(lambda x: 1 if "FALL" in x else 0)
df_val['action'].value_counts()

1    14
0     7
Name: action, dtype: int64

In [15]:
print('Validation performance')
y_true = df_val.iloc[:, -1].values
y_pred = model.predict(df_val.iloc[:, :-1].values)
print(f'Accuracy: {accuracy_score(y_true, y_pred)}, F1: {f1_score(y_true, y_pred)}')
print(classification_report(y_true, y_pred))

Validation performance
Accuracy: 0.8571428571428571, F1: 0.88
              precision    recall  f1-score   support

           0       0.70      1.00      0.82         7
           1       1.00      0.79      0.88        14

    accuracy                           0.86        21
   macro avg       0.85      0.89      0.85        21
weighted avg       0.90      0.86      0.86        21



#### Extract the features coefficient and bias

In [16]:
coefficients = lr.coef_[0]
bias = lr.intercept_.item()

print(coefficients, bias)

[ 1.97815808e-01 -2.51094097e-02  3.08715255e+00  2.55277462e-03] -5.63985741613418


### Check final value for correctness

The following image are computed from RPI Pico simulation here: https://wokwi.com/projects/427470808780055553
![rpi_simulator](wokwi.png)

In [17]:
# simoid function to transform continuous value into bounded value [0,1]
def sigmoid(s):
    return 1 / 1 + np.exp(-s)

In [18]:
feat = np.array([[9.81, 0, 9.81, 3846.82]]).flatten()

In [19]:
s = -5.63985741613418
for i, w in enumerate(lr.coef_[0]):
    s += w * feat[i]

In [20]:
print(s)

36.405746598076654


In [21]:
sigmoid(s)

1.0000000000000002