In [1]:
import os
import pandas as pd
import seaborn as sns

In [2]:
files = os.listdir()

In [3]:
res = [file for file in files if file.endswith('.csv')]

In [4]:
not_fall_files = [file for file in res if not('backwardFall' in file or 'lateralFall' in file or 'forwardFall' in file)]

In [5]:
fall_files = [file for file in res if 'backwardFall' in file or 'lateralFall' in file or 'forwardFall' in file]

In [6]:
master_not_fall_dfs = []

In [7]:
for file in not_fall_files:
    temp_data = pd.read_csv(file, skiprows=40, on_bad_lines='skip', delimiter = ';')
    temp_data.columns = temp_data.columns.str.lower()
    temp_data.columns = temp_data.columns.str.replace('%', '')
    temp_data.columns = temp_data.columns.str.strip()
    temp_data.columns = temp_data.columns.str.replace(' ', '_')
    
    master_not_fall_dfs.append(temp_data)

In [8]:
master_not_fall_df = pd.concat(master_not_fall_dfs, ignore_index=True)

In [9]:
master_not_fall_df.drop(columns=[''], inplace=True)

In [10]:
master_not_fall_df['fall'] = 0

In [11]:
master_not_fall_df = master_not_fall_df[master_not_fall_df['sensor_id'] == 3]

In [12]:
master_not_fall_df

Unnamed: 0,timestamp,sample_no,x-axis,y-axis,z-axis,sensor_type,sensor_id,fall
4768,111,0,-0.736572,0.518311,0.372559,0,3,0
4769,145,1,-0.734863,0.520508,0.444824,0,3,0
4770,150,2,-0.734863,0.520508,0.444824,0,3,0
4771,228,3,-0.727295,0.523193,0.370361,0,3,0
4772,232,4,-0.727295,0.523193,0.370361,0,3,0
...,...,...,...,...,...,...,...,...
3384463,14770,295,26.0,-6.5,-172.333328,2,3,0
3384464,14832,296,28.5,-7.166667,-127.0,2,3,0
3384465,14896,297,26.666666,-7.5,-122.833336,2,3,0
3384466,14949,298,25.833334,-11.0,-123.666664,2,3,0


In [13]:
master_fall_dfs = []

In [14]:
for file in fall_files:
    temp_data = pd.read_csv(file, skiprows=40, on_bad_lines='skip', delimiter = ';')
    temp_data.columns = temp_data.columns.str.lower()
    temp_data.columns = temp_data.columns.str.replace('%', '')
    temp_data.columns = temp_data.columns.str.strip()
    temp_data.columns = temp_data.columns.str.replace(' ', '_')
    
    master_fall_dfs.append(temp_data)

In [15]:
master_fall_df = pd.concat(master_fall_dfs, ignore_index=True)

In [16]:
master_fall_df.drop(columns=[''], inplace=True)

In [17]:
master_fall_df['fall'] = 1

In [18]:
master_fall_df = master_fall_df[master_fall_df['sensor_id'] == 3]

In [19]:
master_fall_df

Unnamed: 0,timestamp,sample_no,x-axis,y-axis,z-axis,sensor_type,sensor_id,fall
4777,160,0,-0.931641,-0.162598,0.198730,0,3,1
4778,185,1,-0.931641,-0.162598,0.198730,0,3,1
4779,216,2,-0.929199,-0.167725,0.190430,0,3,1
4780,232,3,-0.929199,-0.167725,0.190430,0,3,1
4781,290,4,-0.935303,-0.165039,0.192139,0,3,1
...,...,...,...,...,...,...,...,...
1302041,14625,277,-10.000000,22.833334,-116.333336,2,3,1
1302042,14719,278,-9.166667,22.833334,-116.333336,2,3,1
1302043,14801,279,-7.166667,21.666666,-115.166664,2,3,1
1302044,14934,280,-10.166667,22.666666,-113.333336,2,3,1


In [20]:
df = pd.concat([master_fall_df, master_not_fall_df])

In [21]:
df['x-axis'] = pd.to_numeric(df['x-axis'], errors='coerce')
df['y-axis'] = pd.to_numeric(df['y-axis'], errors='coerce')
df['z-axis'] = pd.to_numeric(df['z-axis'], errors='coerce')

In [22]:
df.ffill(inplace=True)

In [23]:
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [24]:
X = df[['x-axis', 'y-axis', 'z-axis', 'sensor_type']]

In [25]:
y = df['fall']

In [26]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [32]:
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
gb_classifier = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, random_state=42)
xgb_classifier = XGBClassifier(n_estimators=100, learning_rate=0.1, random_state=42)

In [33]:
models = {
    'Random Forest': rf_classifier,
    'Gradient Boosting': gb_classifier,
    'XGBoost': xgb_classifier
}

In [34]:
results = []

In [35]:
for model_name, model in models.items():

    model.fit(X_train, y_train)
    
    y_pred = model.predict(X_test)
    
    accuracy = accuracy_score(y_test, y_pred) * 100
    precision = precision_score(y_test, y_pred, average='weighted') * 100
    recall = recall_score(y_test, y_pred, average='weighted') * 100
    f1 = f1_score(y_test, y_pred, average='weighted') * 100
    
    
    results.append([model_name, accuracy, precision, recall, f1])

In [36]:
pd.DataFrame(results, columns=['Model', 'Accuracy', 'Precision', 'Recall', 'F1-Score'])


Unnamed: 0,Model,Accuracy,Precision,Recall,F1-Score
0,Random Forest,87.594368,87.369288,87.594368,87.126518
1,Gradient Boosting,79.401628,78.496343,79.401628,77.158802
2,XGBoost,82.636338,82.075368,82.636338,81.429001
