In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

In [23]:

# Шлях до папки з даними
base_path = Path("data")
from os import listdir
activity_folders = listdir(base_path)
activity_folders

['idle', 'running', 'stairs', 'walking']

In [3]:
# Функція для завантаження CSV-файлів з підпапки
def load_data_from_folder(folder_path, activity_label):
    all_files = folder_path.glob("*.csv")
    data_list = []
    
    for file in all_files:
        df = pd.read_csv(file)
        df['activity'] = activity_label  # Додамо стовпчик з назвою активності
        data_list.append(df)
    
    return pd.concat(data_list, ignore_index=True)

# Завантажимо дані з усіх підпапок
all_data = []
for activity in activity_folders:
    folder_path = base_path / activity
    activity_data = load_data_from_folder(folder_path, activity)
    all_data.append(activity_data)

# Об'єднаємо всі дані в один DataFrame
df = pd.concat(all_data, ignore_index=True)
df

Unnamed: 0,accelerometer_X,accelerometer_Y,accelerometer_Z,activity
0,1.000776,4.616021,8.576031,idle
1,0.718261,4.209007,8.446744,idle
2,-0.909797,-0.282516,9.203311,idle
3,5.099650,0.148441,8.418014,idle
4,1.762132,-0.162806,9.251195,idle
...,...,...,...,...
193855,5.109226,-15.452178,-1.470040,walking
193856,6.373365,-11.741165,-8.226476,walking
193857,3.289633,-9.993398,-0.383072,walking
193858,-2.978387,-3.050213,1.273715,walking


In [4]:

# Функція для додавання статистичних фічей
def add_stat_features(df):
    features = {}
    
    features['mean_X'] = df['accelerometer_X'].mean()
    features['std_X'] = df['accelerometer_X'].std()
    features['min_X'] = df['accelerometer_X'].min()
    features['max_X'] = df['accelerometer_X'].max()
    
    features['mean_Y'] = df['accelerometer_Y'].mean()
    features['std_Y'] = df['accelerometer_Y'].std()
    features['min_Y'] = df['accelerometer_Y'].min()
    features['max_Y'] = df['accelerometer_Y'].max()
    
    features['mean_Z'] = df['accelerometer_Z'].mean()
    features['std_Z'] = df['accelerometer_Z'].std()
    features['min_Z'] = df['accelerometer_Z'].min()
    features['max_Z'] = df['accelerometer_Z'].max()
    
    return pd.Series(features)

# Згрупуємо дані по файлах і додамо статистичні фічі
grouped = df.groupby(['activity', df.index // 30]).apply(add_stat_features).reset_index()
grouped

  grouped = df.groupby(['activity', df.index // 30]).apply(add_stat_features).reset_index()


Unnamed: 0,activity,level_1,mean_X,std_X,min_X,max_X,mean_Y,std_Y,min_Y,max_Y,mean_Z,std_Z,min_Z,max_Z
0,idle,0,0.178448,1.036361,-0.909797,5.099650,0.167435,1.157603,-0.282516,4.616021,9.605697,0.406903,8.418014,9.806650
1,idle,1,-0.098641,0.125848,-0.320823,0.407014,-0.131202,0.048059,-0.244209,0.023942,9.771216,0.025419,9.667787,9.806650
2,idle,2,-0.099918,0.015642,-0.124498,-0.062249,0.220905,0.016383,0.177171,0.244209,9.768503,0.014528,9.739613,9.792285
3,idle,3,0.400949,0.052889,0.268151,0.560243,-0.029528,0.054525,-0.205901,0.086191,9.752541,0.017967,9.725247,9.797073
4,idle,4,0.400949,0.053187,0.268151,0.560243,-0.027134,0.054004,-0.205901,0.086191,9.753659,0.018902,9.725247,9.797073
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6457,walking,6457,0.396639,3.474121,-5.990292,7.192182,-10.702081,5.285310,-25.201366,0.181959,-0.202230,8.666662,-20.058622,32.599450
6458,walking,6458,0.418028,3.533930,-7.359776,7.192182,-10.736398,5.329992,-25.201366,0.181959,-0.090980,8.675810,-20.058622,32.599450
6459,walking,6459,1.151771,3.948034,-7.359776,11.113884,-10.878295,5.412533,-25.201366,0.181959,0.364557,7.863650,-16.903065,32.599450
6460,walking,6460,1.551123,4.016935,-7.359776,11.113884,-11.146126,5.294201,-25.201366,0.181959,0.233036,7.943439,-16.903065,32.599450


In [9]:

# Розділення даних на тренувальну і тестову вибірки
X = grouped.drop(columns=['activity', 'level_1'])
y = grouped['activity']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# Навчання моделей SVM та RandomForest
svm_model = SVC(kernel='linear')
svm_model.fit(X_train, y_train)
y_pred_svm = svm_model.predict(X_test)
print(f" SVM Accuracy: {svm_model.score(X_test, y_test)}")
print("SVM Classification Report:\n", classification_report(y_test, y_pred_svm))

 SVM Accuracy: 0.9902011346054668
SVM Classification Report:
               precision    recall  f1-score   support

        idle       1.00      1.00      1.00       312
     running       1.00      1.00      1.00      1023
      stairs       0.89      0.69      0.78        49
     walking       0.97      0.99      0.98       555

    accuracy                           0.99      1939
   macro avg       0.97      0.92      0.94      1939
weighted avg       0.99      0.99      0.99      1939


In [10]:

rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)
print("\nRandomForest Classification Report:\n", classification_report(y_test, y_pred_rf))



RandomForest Classification Report:
               precision    recall  f1-score   support

        idle       1.00      1.00      1.00       312
     running       1.00      1.00      1.00      1023
      stairs       1.00      0.96      0.98        49
     walking       1.00      1.00      1.00       555

    accuracy                           1.00      1939
   macro avg       1.00      0.99      0.99      1939
weighted avg       1.00      1.00      1.00      1939
