In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

In [2]:
# Reading and Merging Data from Multiple Sources

In [3]:
dataset_path = Path('data')
csv_files = dataset_path.glob('**/*.csv')

dataframes = []

for file_path in csv_files:
    activity = file_path.parent.name
    df = pd.read_csv(file_path)
    df['activity'] = activity
    dataframes.append(df)
combined_df = pd.concat(dataframes, ignore_index=True)

combined_df

Unnamed: 0,accelerometer_X,accelerometer_Y,accelerometer_Z,activity
0,1.000776,4.616021,8.576031,idle
1,0.718261,4.209007,8.446744,idle
2,-0.909797,-0.282516,9.203311,idle
3,5.099650,0.148441,8.418014,idle
4,1.762132,-0.162806,9.251195,idle
...,...,...,...,...
193855,5.109226,-15.452178,-1.470040,walking
193856,6.373365,-11.741165,-8.226476,walking
193857,3.289633,-9.993398,-0.383072,walking
193858,-2.978387,-3.050213,1.273715,walking


In [4]:
# Normilize data

In [5]:
columns_to_normalize = ['accelerometer_X', 'accelerometer_Y', 'accelerometer_Z']

scaler = MinMaxScaler()
combined_df[columns_to_normalize] = scaler.fit_transform(combined_df[columns_to_normalize])

combined_df

Unnamed: 0,accelerometer_X,accelerometer_Y,accelerometer_Z,activity
0,0.512769,0.558895,0.609421,idle
1,0.509164,0.553702,0.607771,idle
2,0.488392,0.496395,0.617424,idle
3,0.565066,0.501894,0.607405,idle
4,0.522483,0.497923,0.618035,idle
...,...,...,...,...
193855,0.565188,0.302847,0.481244,walking
193856,0.581317,0.350196,0.395039,walking
193857,0.541972,0.372495,0.495112,walking
193858,0.461999,0.461083,0.516251,walking


In [6]:
# Data separation

In [7]:
## Defining the features and target variable
X = combined_df[['accelerometer_X', 'accelerometer_Y', 'accelerometer_Z']]  # Ознаки
y = combined_df['activity']

## Separation into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [8]:
# time domain features

In [9]:
X_train['time_mean'] = X_train.mean(axis=1)
X_test['time_mean'] = X_test.mean(axis=1)

In [10]:
# Creating SVM model
svm_model = SVC()
svm_model.fit(X_train, y_train)

In [11]:
# Creating RF(random forest) model
rf_model = RandomForestClassifier(n_jobs=-1)
rf_model.fit(X_train, y_train)

In [12]:
# Comparing the accuracy of models

In [13]:
score_svm = svm_model.score(X_test, y_test)
score_rf = rf_model.score(X_test, y_test)

print(f"Accuracy of the SVM model:", score_svm)
print(f"Accuracy of the RF model:", score_rf)

Accuracy of the SVM model: 0.8920354895285257
Accuracy of the RF model: 0.9995873310636542


In [14]:
# Model Evaluation

## SVM predictions
svm_predictions = svm_model.predict(X_test)

## RF predicitons
rf_predictions = rf_model.predict(X_test)

print("SVM Classification Report:")
print(classification_report(y_test, svm_predictions))

print("Random Forest Classification Report:")
print(classification_report(y_test, rf_predictions))

SVM Classification Report:
              precision    recall  f1-score   support

        idle       0.95      0.99      0.97      9306
     running       0.93      0.90      0.92     30609
      stairs       1.00      0.00      0.01      1537
     walking       0.80      0.91      0.85     16706

    accuracy                           0.89     58158
   macro avg       0.92      0.70      0.68     58158
weighted avg       0.90      0.89      0.88     58158

Random Forest Classification Report:
              precision    recall  f1-score   support

        idle       1.00      1.00      1.00      9306
     running       1.00      1.00      1.00     30609
      stairs       1.00      0.99      1.00      1537
     walking       1.00      1.00      1.00     16706

    accuracy                           1.00     58158
   macro avg       1.00      1.00      1.00     58158
weighted avg       1.00      1.00      1.00     58158

