In [8]:
from pathlib import Path
import pandas as pd

csv_data_path  = Path("data")
class_path = list(csv_data_path.iterdir())
class_list = [d.name for d in class_path if d.name != '.DS_Store']
coordinate = ['x', 'y', 'z']

new_rows = []

for c in class_list:
    files_path = csv_data_path / c
    if files_path.is_dir():
        for file in files_path.iterdir():
            if file.is_file():
                try:
                    data = pd.read_csv(file)
                    
                    for index, row in data.iterrows():
                        new_rows.append({
                            'activity': c,
                            'x': row['accelerometer_X'],
                            'y': row['accelerometer_Y'],
                            'z': row['accelerometer_Z']
                        })
                        
                except Exception as e:
                    print(f"Помилка під час читання файлу {c}: {e}")

df = pd.DataFrame(new_rows)
df.to_csv("combined_data.csv", index=False)
df.head()

Unnamed: 0,activity,x,y,z
0,running,1.757344,-5.875371,-3.878607
1,running,2.461239,12.98615,1.762132
2,running,22.682667,22.922087,-1.73819
3,running,11.87524,4.611233,-8.389283
4,running,-5.295974,4.783615,-1.273715


In [2]:
df.isna().sum()

activity    0
x           0
y           0
z           0
dtype: int64

In [3]:
size = df.shape
size

(193860, 4)

## time domain features

In [4]:
import numpy as np

def time_domain_features(data, col):
    features = {}
    
    features[f'{col}_mean'] = data[col].mean()    
    features[f'{col}_std'] = data[col].std()
    features[f'{col}_min'] = data[col].min()
    features[f'{col}_max'] = data[col].max()
    features[f'{col}_range'] = data[col].median
    features[f'{col}_rmse'] = np.sqrt(np.mean(data[col] ** 2))
    features[f'{col}_ir'] = np.percentile(data[col], 75) - np.percentile(data[col], 25)
    features[f'{col}_range'] = np.max(data[col]) - np.min(data[col])

    return features

In [5]:
features_X = time_domain_features(df, 'x')
features_Y = time_domain_features(df, 'y')
features_Z = time_domain_features(df, 'z')

combined_features = {**features_X, **features_Y, **features_Z}
features_df = pd.DataFrame([combined_features])
repeated_df = pd.concat([features_df] * size[0], ignore_index=True)
combined_df = pd.concat([df, repeated_df], axis=1)
combined_df.head()

Unnamed: 0,activity,x,y,z,x_mean,x_std,x_min,x_max,x_range,x_rmse,...,y_range,y_rmse,y_ir,z_mean,z_std,z_min,z_max,z_range,z_rmse,z_ir
0,running,1.757344,-5.875371,-3.878607,1.92355,8.404867,-39.188293,39.188293,78.376586,8.62215,...,78.376586,12.575993,16.998832,1.804896,7.19159,-39.188293,39.188293,78.376586,7.414604,9.682152
1,running,2.461239,12.98615,1.762132,1.92355,8.404867,-39.188293,39.188293,78.376586,8.62215,...,78.376586,12.575993,16.998832,1.804896,7.19159,-39.188293,39.188293,78.376586,7.414604,9.682152
2,running,22.682667,22.922087,-1.73819,1.92355,8.404867,-39.188293,39.188293,78.376586,8.62215,...,78.376586,12.575993,16.998832,1.804896,7.19159,-39.188293,39.188293,78.376586,7.414604,9.682152
3,running,11.87524,4.611233,-8.389283,1.92355,8.404867,-39.188293,39.188293,78.376586,8.62215,...,78.376586,12.575993,16.998832,1.804896,7.19159,-39.188293,39.188293,78.376586,7.414604,9.682152
4,running,-5.295974,4.783615,-1.273715,1.92355,8.404867,-39.188293,39.188293,78.376586,8.62215,...,78.376586,12.575993,16.998832,1.804896,7.19159,-39.188293,39.188293,78.376586,7.414604,9.682152


## алгоритм SVM

In [6]:
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix,  precision_score, recall_score, f1_score, classification_report

In [7]:
X = combined_df.drop(columns=['activity'])
y = combined_df['activity']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
svm_model = SVC()
svm_model.fit(X_train, y_train)

y_pred = svm_model.predict(X_test)

In [9]:
accuracy_svm = accuracy_score(y_test, y_pred)
report_svm = classification_report(y_test, y_pred)

print(f'Accuracy RF: {accuracy_svm}')
print(f"SVM Classification Report:\n{report_svm}")

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy RF: 0.8536056948313215
SVM Classification Report:
              precision    recall  f1-score   support

        idle       0.86      0.88      0.87      6178
     running       0.90      0.87      0.88     20510
      stairs       0.00      0.00      0.00       915
     walking       0.78      0.87      0.83     11169

    accuracy                           0.85     38772
   macro avg       0.63      0.66      0.65     38772
weighted avg       0.84      0.85      0.84     38772



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


## алгоритм випадковий ліс

In [10]:
rm_fr_model = RandomForestClassifier(random_state=42)
rm_fr_model.fit(X_train, y_train)

y_pred_rf = rm_fr_model.predict(X_test)

In [12]:
accuracy_rf = accuracy_score(y_test, y_pred_rf)
report_rf = classification_report(y_test, y_pred_rf)

print(f'Accuracy RF: {accuracy_rf}')
print(f"SVM Classification Report:\n{report_rf}")

Accuracy RF: 0.999871040957392
SVM Classification Report:
              precision    recall  f1-score   support

        idle       1.00      1.00      1.00      6178
     running       1.00      1.00      1.00     20510
      stairs       1.00      1.00      1.00       915
     walking       1.00      1.00      1.00     11169

    accuracy                           1.00     38772
   macro avg       1.00      1.00      1.00     38772
weighted avg       1.00      1.00      1.00     38772

