# Environment prep

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, models
import matplotlib.pyplot as plt
import os

# Load data

In [None]:
import os
import pandas as pd

# Specify the path to your main folder
main_folder_path = '/content/drive/MyDrive/Wifi_Exercise_Dataset/segments'

# Initialize an empty list to store flattened arrays and labels
all_flattened_arrays = []
all_labels = []

# Iterate through specific subfolders
for activity_folder in ["SO", "LL", "RA"]:
    activity_folder_path = os.path.join(main_folder_path, activity_folder)

    # Check if it's a directory
    if os.path.isdir(activity_folder_path):
        # Iterate through files in the subfolder
        for file_name in os.listdir(activity_folder_path):
            if file_name.endswith('.csv'):
                file_path = os.path.join(activity_folder_path, file_name)

                # Read the data from each CSV file into a DataFrame
                df = pd.read_csv(file_path)  # Adjust the read_csv parameters based on your file format

                # Drop the 'timestamp' column
                df = df.drop(columns=['timestamp'])

                # Flatten the DataFrame into a 1D array
                flattened_array = df.values.flatten()

                # Append the flattened array to the list
                all_flattened_arrays.append(flattened_array)

                # Append the label to the list
                all_labels.append(activity_folder)

# Create a DataFrame from the flattened arrays
combined_df = pd.DataFrame(all_flattened_arrays)

# Add a label column with the folder names
combined_df['label'] = all_labels


In [None]:
combined_df.shape

(525, 11001)

In [None]:
#combined_df = combined_df.iloc[:, 1:]
combined_df.head(10)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,10991,10992,10993,10994,10995,10996,10997,10998,10999,label
0,95.0,105.60303,4.0,11.151044,10.707436,9.316612,12.144976,9.425203,10.150993,9.600242,...,14.014939,18.581728,14.814212,18.369108,18.438068,19.351728,17.369322,19.920948,21.335201,SO
1,295.0,105.60303,4.0,21.506546,21.487823,22.237087,22.065025,20.665798,19.98072,20.726309,...,6.811122,6.554436,5.996309,6.98931,7.431928,8.145256,8.993933,8.906126,8.952628,SO
2,495.0,105.60303,4.0,17.746916,19.175842,20.094381,21.879972,21.342686,19.496647,21.977829,...,12.046574,12.480197,16.537952,16.45737,15.557218,18.807453,19.396845,17.038677,17.995457,SO
3,695.0,105.60303,4.0,17.766068,16.753246,19.04383,17.250135,18.304783,18.869814,18.925077,...,12.481363,11.611756,12.46504,13.227288,13.615556,13.061897,14.12526,14.368896,14.130817,SO
4,895.0,105.60303,4.0,13.907796,17.014135,14.053236,17.398851,15.159801,16.416302,16.852371,...,15.162306,14.735621,16.487059,15.695503,16.295233,16.722143,17.177563,17.701513,17.42176,SO
5,1095.0,105.60303,4.0,18.332218,19.316801,18.656826,19.351206,18.549267,19.59511,20.402458,...,13.360408,14.871668,15.721128,14.085009,14.890542,11.189156,12.656915,11.509663,12.924667,SO
6,1295.0,105.60303,4.0,26.206268,26.14736,17.714241,18.659363,17.782763,19.69788,17.509909,...,18.12972,19.989136,17.627092,17.5678,17.752438,18.823847,18.042463,18.242094,17.533295,SO
7,1495.0,105.60303,4.0,11.107969,9.253821,8.815681,6.830923,5.513859,6.323463,5.064917,...,17.529305,18.763878,15.482011,13.943518,11.983328,13.858007,11.676786,9.471568,9.792299,SO
8,1695.0,105.60303,4.0,5.227323,15.211393,9.476759,19.716702,12.707226,15.794682,17.629936,...,26.146471,25.994068,23.582901,24.0748,23.335043,24.082153,23.42843,24.009565,21.0771,SO
9,1895.0,105.60303,4.0,12.837535,12.206369,10.99589,11.399932,11.291079,9.269518,13.626994,...,11.171714,10.11223,13.896391,16.110277,18.566053,13.522039,18.489637,17.689028,14.998214,SO


In [None]:
combined_df.shape

(105000, 55)

In [None]:
len(all_dataframes)

525

# Data prep

In [None]:
from sklearn.model_selection import train_test_split
X = combined_df.drop('label', axis=1)
y = combined_df['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
print(X_train.head(10))

      0          1      2          3          4          5          6      \
314  1365.0  105.60303    4.0  26.968600  30.951720  27.649836  30.052702   
467  2744.0  105.60303    4.0  24.225454  23.725327  24.561783  25.502316   
94   5837.0  105.60303    4.0  19.300188  18.593597  19.051060  18.203154   
456   544.0  105.60303    4.0  23.800772  27.856167  28.224888  25.693254   
354  2213.0  105.60303    4.0  20.126020  19.159122  23.023151  14.101392   
23   4695.0  105.60303    4.0  19.222513  17.926594  18.370720  14.439596   
249  4620.0  105.60303    4.0  21.785194  29.186796  21.808517  24.994963   
272  1020.0  105.60303    4.0  24.919470  25.269474  25.781660  25.999743   
501  1785.0  105.60303    4.0  30.630825  31.919424  30.474092  30.103798   
5    1095.0  105.60303    4.0  18.332218  19.316801  18.656826  19.351206   

         7          8          9      ...      10990      10991      10992  \
314  26.842928  28.532768  28.073906  ...  15.311355  15.234358  13.885149

In [None]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(y_train)
y_test = label_encoder.transform(y_test)

In [None]:
print(y_train)

[1 0 0 ... 1 2 2]


# Model training

In [None]:
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

In [None]:
# Logistic Regression model
logreg_model = LogisticRegression(multi_class='auto', penalty='l2', C=1.0, solver='liblinear')
logreg_model.fit(X_train, y_train)

# Predictions and evaluation
logreg_predictions = logreg_model.predict(X_test)
logreg_accuracy = accuracy_score(y_test, logreg_predictions)
print("Logistic Regression Accuracy:", logreg_accuracy)
print("Logistic Regression Classification Report:\n", classification_report(y_test, logreg_predictions))

Logistic Regression Accuracy: 0.8227848101265823
Logistic Regression Classification Report:
               precision    recall  f1-score   support

           0       0.82      0.74      0.78        50
           1       0.82      0.96      0.89        53
           2       0.82      0.76      0.79        55

    accuracy                           0.82       158
   macro avg       0.82      0.82      0.82       158
weighted avg       0.82      0.82      0.82       158



In [None]:
# Support Vector Classifier (SVC)
svm_model_rbf = SVC(kernel='linear', C=1)
svm_model_rbf.fit(X_train, y_train)

# Predictions and evaluation for SVC
svm_predictions = svm_model_rbf.predict(X_test)
svm_accuracy = accuracy_score(y_test, svm_predictions)
print("SVM Accuracy:", svm_accuracy)
print("SVM Classification Report:\n", classification_report(y_test, svm_predictions))

SVM Accuracy: 0.8291139240506329
SVM Classification Report:
               precision    recall  f1-score   support

           0       0.70      0.90      0.79        50
           1       0.92      0.91      0.91        53
           2       0.90      0.69      0.78        55

    accuracy                           0.83       158
   macro avg       0.84      0.83      0.83       158
weighted avg       0.85      0.83      0.83       158



In [None]:
# Random Forest
rf_model = RandomForestClassifier(n_estimators=100, random_state=42, max_features="auto", max_depth=20)
rf_model.fit(X_train, y_train)

# Predictions and evaluation for Random Forest
rf_predictions = rf_model.predict(X_test)
rf_accuracy = accuracy_score(y_test, rf_predictions)
print("Random Forest Accuracy:", rf_accuracy)
print("Random Forest Classification Report:\n", classification_report(y_test, rf_predictions))

  warn(


Random Forest Accuracy: 0.930379746835443
Random Forest Classification Report:
               precision    recall  f1-score   support

           0       0.92      0.90      0.91        50
           1       0.94      0.96      0.95        53
           2       0.93      0.93      0.93        55

    accuracy                           0.93       158
   macro avg       0.93      0.93      0.93       158
weighted avg       0.93      0.93      0.93       158



In [None]:
!pip install xgboost



In [None]:
import xgboost as xgb

# XGBoost Classifier
xgb_model = xgb.XGBClassifier(objective='multi:softmax', num_class=3, random_state=42)
xgb_model.fit(X_train, y_train)

# Predictions and evaluation
xgb_predictions = xgb_model.predict(X_test)
xgb_accuracy = accuracy_score(y_test, xgb_predictions)
print("XGBoost Accuracy:", xgb_accuracy)
print("XGBoost Classification Report:\n", classification_report(y_test, xgb_predictions))

XGBoost Accuracy: 0.9050632911392406
XGBoost Classification Report:
               precision    recall  f1-score   support

           0       0.93      0.84      0.88        50
           1       0.91      0.96      0.94        53
           2       0.88      0.91      0.89        55

    accuracy                           0.91       158
   macro avg       0.91      0.90      0.90       158
weighted avg       0.91      0.91      0.90       158

