In [22]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

# Funkcja do obliczania cech domeny czasowej
def calculate_time_domain_features(data):
    features = []
    features.append(np.mean(data))
    features.append(np.std(data))
    features.append(np.min(data))
    features.append(np.max(data))
    features.append(np.median(data))
    features.append(pd.Series(data).skew())
    features.append(pd.Series(data).kurtosis())
    return features

# Ścieżki do folderów z danymi
folders = ['running', 'idle', 'walking', 'stairs']

# Wczytaj dane i oblicz cechy domeny czasowej dla każdej aktywności
activity_data = {}
for folder in folders:
    folder_path = f'/drive/notebooks/hmk5/{folder}'  # dostosuj ścieżkę do foldera
    activity_files = []  # lista danych dla danej aktywności
    for file_name in os.listdir(folder_path):
        if file_name.endswith('.csv'):
            file_path = os.path.join(folder_path, file_name)
            df = pd.read_csv(file_path)
            accelerometer_data = df[['accelerometer_X', 'accelerometer_Y', 'accelerometer_Z']]
            time_domain_features = accelerometer_data.apply(calculate_time_domain_features)
            features = np.concatenate(time_domain_features.values, axis=0)
            activity_files.append(features)
    activity_data[folder] = np.array(activity_files)

# Przygotuj dane do klasyfikacji
X = np.concatenate([activity_data[folder] for folder in folders], axis=0)
y = np.concatenate([np.full(len(activity_data[folder]), folder) for folder in folders], axis=0)

# Podziel dane na zbiór treningowy i testowy
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalizacja cech
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Utwórz modele
svm_classifier = SVC(kernel='linear')
rf_classifier = RandomForestClassifier(n_estimators=100)

# Dostosuj modele do danych treningowych
svm_classifier.fit(X_train_scaled, y_train)
rf_classifier.fit(X_train_scaled, y_train)

# Przetestuj modele na danych testowych
svm_pred = svm_classifier.predict(X_test_scaled)
rf_pred = rf_classifier.predict(X_test_scaled)

# Ocena wydajności
print("SVM Classification Report:")
print(classification_report(y_test, svm_pred))

print("Random Forest Classification Report:")
print(classification_report(y_test, rf_pred))

SVM Classification Report:
              precision    recall  f1-score   support

        idle       1.00      1.00      1.00       190
     running       1.00      1.00      1.00       693
      stairs       0.87      0.79      0.83        33
     walking       0.98      0.99      0.99       377

    accuracy                           0.99      1293
   macro avg       0.96      0.94      0.95      1293
weighted avg       0.99      0.99      0.99      1293

Random Forest Classification Report:
              precision    recall  f1-score   support

        idle       1.00      1.00      1.00       190
     running       1.00      1.00      1.00       693
      stairs       1.00      0.97      0.98        33
     walking       1.00      1.00      1.00       377

    accuracy                           1.00      1293
   macro avg       1.00      0.99      1.00      1293
weighted avg       1.00      1.00      1.00      1293



In [23]:
print("Rozmiar X_test:", X_test.shape)
print("Rozmiar y_test:", y_test.shape)


Rozmiar X_test: (1293, 21)
Rozmiar y_test: (1293,)
