In [2]:
import os
import glob
import pandas as pd
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from google.colab import drive

from prettytable import PrettyTable

In [3]:
all_data_folder = 'data'
# drive.mount('/content/drive/')


In [4]:
all_data = pd.DataFrame()

In [5]:
# Method 1 to get all data
for activity_name in os.listdir(all_data_folder):
  activity_path = os.path.join(all_data_folder, activity_name)
  if not os.path.isdir(activity_path):
    continue

  csv_files = [f for f in os.listdir(activity_path) if f.endswith('.csv')]
  for file_name in os.listdir(activity_path):
        csv_file_path = os.path.join(activity_path, file_name)
        if not file_name.endswith('.csv'):
            continue

        df = pd.read_csv(csv_file_path)
        df['activity'] = activity_name
        all_data = pd.concat([all_data, df], ignore_index=True)

print(all_data.head(), all_data.shape)

   accelerometer_X  accelerometer_Y  accelerometer_Z activity
0         0.301669        -0.014365         9.758766     idle
1         0.253785         0.038307         9.749189     idle
2         0.277727        -0.014365         9.777920     idle
3         0.282516        -0.028730         9.773131     idle
4         0.210690         0.004788         9.768343     idle (96900, 4)


In [41]:
# Method 2 to get all data
for activity_name in os.listdir(all_data_folder):

  activity_path = os.path.join(all_data_folder, activity_name)

  if not os.path.isdir(activity_path):
    continue

  csv_paths = glob.glob(os.path.join(activity_path, '*.csv'))

  for csv_path in csv_paths:

    try:
      df = pd.read_csv(csv_path)
    except Exception as e:
      logger.error(f"Error reading {csv_path}: {e}")
      continue

    df['activity'] = activity_name
    all_data = pd.concat([all_data, df], ignore_index=True)

print(all_data.head(), all_data.shape)

   accelerometer_X  accelerometer_Y  accelerometer_Z activity
0         0.301669        -0.014365         9.758766     idle
1         0.253785         0.038307         9.749189     idle
2         0.277727        -0.014365         9.777920     idle
3         0.282516        -0.028730         9.773131     idle
4         0.210690         0.004788         9.768343     idle (835920, 4)


In [6]:
X = all_data.drop('activity', axis=1)
y = all_data['activity']

In [7]:
from numpy.random import test
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=42)

In [8]:
X_train['time_mean'] = X_train.mean(axis=1)
X_test['time_mean'] = X_test.mean(axis=1)

In [9]:
svm_model = SVC()
rfc_model = RandomForestClassifier()

In [11]:
%%time
svm_model.fit(X_train, y_train)

CPU times: user 1min, sys: 669 ms, total: 1min 1s
Wall time: 1min 13s


In [10]:
%%time
rfc_model.fit(X_train, y_train)

CPU times: user 7.85 s, sys: 46.5 ms, total: 7.89 s
Wall time: 7.92 s


In [12]:
%%time
svm_prediction = svm_model.predict(X_test)

CPU times: user 14.1 s, sys: 11.5 ms, total: 14.1 s
Wall time: 14.1 s


In [13]:
%%time
rfc_prediction = rfc_model.predict(X_test)

CPU times: user 223 ms, sys: 0 ns, total: 223 ms
Wall time: 225 ms


In [14]:
svm_accuracy = accuracy_score(y_test, svm_prediction)


print("-"*45)
print("Accuracy (SVM):", svm_accuracy)

---------------------------------------------
Accuracy (SVM): 0.8785345717234262


In [15]:
rfc_accuracy = accuracy_score(y_test, rfc_prediction)
print("-"*45)
print("Accuracy (Random Forest):", rfc_accuracy)

---------------------------------------------
Accuracy (Random Forest): 0.9985036119711043


In [20]:
svm_report = classification_report(y_test, svm_prediction)
print("SVM Results:")
print(svm_report)

rfc_report = classification_report(y_test, rfc_prediction)
print("Random Forest Results:")
print(rfc_report)

SVM Results:
              precision    recall  f1-score   support

        idle       0.97      0.99      0.98      4921
     running       0.90      0.89      0.90      7615
      stairs       1.00      0.01      0.01       957
     walking       0.78      0.91      0.84      5887

    accuracy                           0.88     19380
   macro avg       0.91      0.70      0.68     19380
weighted avg       0.89      0.88      0.86     19380

Random Forest Results:
              precision    recall  f1-score   support

        idle       1.00      1.00      1.00      4921
     running       1.00      1.00      1.00      7615
      stairs       1.00      0.99      1.00       957
     walking       1.00      1.00      1.00      5887

    accuracy                           1.00     19380
   macro avg       1.00      1.00      1.00     19380
weighted avg       1.00      1.00      1.00     19380



In [26]:
print("-"*5)
print("| {0:^15} | {1:^15} | {2:^15} |".format('Activity', 'Rows', 'Columns'))
print("-"*55)

for activity_name in all_data['activity'].unique():
  activity_df = all_data[all_data['activity'] == activity_name]
  print("| {0:^15} | {1:^15} | {2:^15} |".format(
    activity_name, len(activity_df), len(activity_df.columns)))

print("-"*55)

-------------------------------------------------------
|    Activity     |      Rows       |     Columns     |
-------------------------------------------------------
|      idle       |      24630      |        4        |
|     stairs      |      4950       |        4        |
|     walking     |      29160      |        4        |
|     running     |      38160      |        4        |
-------------------------------------------------------


In [62]:
svm_report = classification_report(y_test, svm_prediction, target_names=['Precision', 'Recall', 'F1-score', 'Support'])  # Replace with your class names

print("SVM Results:")
svm_table = PrettyTable()
svm_table.field_names = ['Precision', 'Recall', 'F1-score', 'Support']  # Replace with your class names
for line in svm_report.split('\n'):
    if line:
      print(line)
      if len(line) == 5:
        metric, rows, columns, _, _, = line.split()
        svm_table.add_row([metric, rows, columns])
      elif len(line) == 4:
        metric, rows, columns, _ = line.split()
        svm_table.add_row([metric, rows, columns])
      elif len(line) == 3:
        metric, macro_avg_value, weighted_avg = line.split()
        svm_table.add_row([metric, rows, columns])

print(svm_table)


SVM Results:
              precision    recall  f1-score   support
   Precision       0.97      0.99      0.98      4921
      Recall       0.90      0.89      0.90      7615
    F1-score       1.00      0.01      0.01       957
     Support       0.78      0.91      0.84      5887
    accuracy                           0.88     19380
   macro avg       0.91      0.70      0.68     19380
weighted avg       0.89      0.88      0.86     19380
+-----------+--------+----------+---------+
| Precision | Recall | F1-score | Support |
+-----------+--------+----------+---------+
+-----------+--------+----------+---------+
