In [77]:
import numpy as np
import pandas as pd
from sklearn.mixture import GaussianMixture
from sklearn.impute import SimpleImputer, KNNImputer
from sklearn.experimental import enable_iterative_imputer  # noqa
from sklearn.impute import IterativeImputer
from sklearn.linear_model import BayesianRidge

import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score

# classical models
from sklearn.linear_model import Perceptron
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, VotingClassifier

# for the CNN
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense

In [78]:
activities = ['walking', 'biking', 'sitting', 'stairs']

In [85]:
import os
import pandas as pd
import numpy as np
from scipy.fft import fft

SAMPLE_RATE = 60  
window_sizes = {
    "walking": 300,
    "running": 300,
    "biking": 300,
    "sitting": 600,
    "stairs": 180
}
#sitting, bus 600 10sec
#walking, biking, walking 300 5sec
#stairs 120 2sec

all_sensors = {
    "Accelerometer.csv": ["Acceleration x (m/s^2)", "Acceleration y (m/s^2)", "Acceleration z (m/s^2)"],
    "Gyroscope.csv": ["Gyroscope x (rad/s)", "Gyroscope y (rad/s)", "Gyroscope z (rad/s)"],
    "Linear Acceleration.csv": ["Linear Acceleration x (m/s^2)", "Linear Acceleration y (m/s^2)", "Linear Acceleration z (m/s^2)"],
}

def extract_features_from_file(file_name, axes, activity):
    window_size = window_sizes[activity]
    df = pd.read_csv(file_name, sep=";")
    features = []

    time_column = df.columns[0]  
    time_values = df[time_column].dropna().values

    for axis in axes:
        signal = df[axis].dropna().values

        for start in range(0, len(signal) - window_size, window_size):
            segment = signal[start:start + window_size]
            segment_time = time_values[start:start + window_size]

            if len(segment) < window_size or len(segment_time) < window_size:
                continue

            row = {
                "sensor": file_name,
                "axis": axis,
                "start_time": segment_time[0],
                "end_time": segment_time[-1],
                "start_idx": start,
                "end_idx": start + window_size,
                "mean": np.mean(segment),
                "std": np.std(segment),
                "min": np.min(segment),
                "max": np.max(segment),
                "range": np.ptp(segment),
                "median": np.median(segment),
                "variance": np.var(segment)
            }

            # Frequency domain
            fft_result = fft(segment)
            freqs = np.fft.fftfreq(len(segment), d=1 / SAMPLE_RATE)
            amplitudes = np.abs(fft_result)

            pos_mask = freqs > 0
            freqs = freqs[pos_mask]
            amplitudes = amplitudes[pos_mask]

            if len(freqs) == 0:
                continue

            row["dominant_freq"] = freqs[np.argmax(amplitudes)]
            row["spectral_centroid"] = np.sum(freqs * amplitudes) / np.sum(amplitudes)
            # print(row)
            features.append(row)

    return features

all_features = []
root = "./data/"
files = ["Accelerometer.csv", "Gyroscope.csv", "Linear Acceleration.csv"]

for activity in activities:
    for file in files:
        print(file, activity)
        axes = all_sensors[file]
        file_features = extract_features_from_file(root + activity + "/" + file, axes, activity)
        
        for f in file_features:
            f["sensor"] = file.replace(".csv", "")
            f["activity"] = activity
        all_features.extend(file_features)

features_df = pd.DataFrame(all_features)
features_df.to_csv("features_per_activity.csv", index=False)

Accelerometer.csv walking
Gyroscope.csv walking
Linear Acceleration.csv walking
Accelerometer.csv biking
Gyroscope.csv biking
Linear Acceleration.csv biking
Accelerometer.csv sitting
Gyroscope.csv sitting
Linear Acceleration.csv sitting
Accelerometer.csv stairs
Gyroscope.csv stairs
Linear Acceleration.csv stairs


In [86]:
df = features_df
# 1) turn activity into codes 0–4
df['activity_code'] = pd.Categorical(df['activity']).codes

# 2) pivot sensor×axis measurements out into wide columns
#    we keep all the statistic columns (mean, std, min, … spectral_centroid)
stats = ['mean','std','min','max','range','median','variance','dominant_freq','spectral_centroid']
wide = df.pivot_table(
    index=['start_idx','activity_code'],
    columns=['sensor','axis'],
    values=stats
)

# 3) flatten the MultiIndex columns
wide.columns = [
    f"{stat}_{sensor.replace(' ','')}_{axis.replace(' ','')}"
    for stat, sensor, axis in wide.columns
]

# 4) make start_idx the index, drop the old activity column
wide = wide.reset_index().set_index('start_idx')

# 5) (optional) if you want the activity_code back as a column
#    wide = wide.reset_index().set_index('start_idx')[['activity_code'] + wide.columns.difference(['activity_code']).tolist()]

# your new df:
print(wide.head())

           activity_code  dominant_freq_Accelerometer_Accelerationx(m/s^2)  \
start_idx                                                                    
0                      0                                          0.200000   
0                      1                                          0.400000   
0                      2                                          0.333333   
0                      3                                          0.200000   
180                    2                                          1.666667   

           dominant_freq_Accelerometer_Accelerationy(m/s^2)  \
start_idx                                                     
0                                                  0.200000   
0                                                  0.100000   
0                                                  0.333333   
0                                                  0.200000   
180                                                1.666667   

           

In [87]:
# for the CNN
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense
print(len(wide))
# drops columns with any NaNs
wide = wide.dropna(axis=0, how = 'any')

X = wide.drop(columns='activity_code').values
y = wide['activity_code'].values

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.4, random_state=40, stratify=y
)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled  = scaler.transform(X_test)


# helper to fit & report
def fit_and_report(clf, Xtr, Xte, ytr, yte, name):
    clf.fit(Xtr, ytr)
    ypred = clf.predict(Xte)
    print(f"\n{name} — accuracy: {accuracy_score(yte, ypred):.3f}")
    print(classification_report(yte, ypred, zero_division=0))

# 2) Perceptron
fit_and_report(Perceptron(), X_train_scaled, X_test_scaled, y_train, y_test, "Perceptron")

# 3) Multi-layer Perceptron
fit_and_report(
    MLPClassifier(hidden_layer_sizes=(100,100), max_iter=500, random_state=42),
    X_train_scaled, X_test_scaled, y_train, y_test,
    "MLPClassifier"
)

# 4) 1D‐CNN (treat each sample as a “1×n_features” signal)
#    reshape for Conv1D: (samples, timesteps, channels)
Xtr_cnn = X_train_scaled.reshape(-1, X_train_scaled.shape[1], 1)
Xte_cnn = X_test_scaled.reshape(-1, X_test_scaled.shape[1], 1)

cnn = Sequential([
    Conv1D(64, kernel_size=3, activation='relu', input_shape=(Xtr_cnn.shape[1],1)),
    Flatten(),
    Dense(100, activation='relu'),
    Dense(len(np.unique(y)), activation='softmax')
])
cnn.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
cnn.fit(Xtr_cnn, y_train, epochs=10, batch_size=32, validation_split=0.1, verbose=2)

loss, acc = cnn.evaluate(Xte_cnn, y_test, verbose=0)
print(f"\nCNN — accuracy: {acc:.3f}")

# 5) Support Vector Machine
fit_and_report(SVC(kernel='rbf', C=1.0), X_train_scaled, X_test_scaled, y_train, y_test, "SVM (RBF)")

# 6) k-Nearest Neighbors
fit_and_report(KNeighborsClassifier(n_neighbors=5), X_train_scaled, X_test_scaled, y_train, y_test, "kNN (k=5)")

# 7) Decision Tree
fit_and_report(DecisionTreeClassifier(max_depth=10, random_state=42),
               X_train_scaled, X_test_scaled, y_train, y_test, "Decision Tree")

# 8) Naive Bayes
fit_and_report(GaussianNB(), X_train_scaled, X_test_scaled, y_train, y_test, "Gaussian NB")

# 9) Ensembles
# 9a) Random Forest
rf = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)
# 9b) Gradient Boosting
gb = GradientBoostingClassifier(n_estimators=100, max_depth=3, random_state=42)
# 9c) Voting (RF + GB + MLP)
vot = VotingClassifier([
    ('rf', rf),
    ('gb', gb),
    ('mlp', MLPClassifier(hidden_layer_sizes=(100,), max_iter=300, random_state=42))
], voting='soft')

for clf, name in zip([rf, gb, vot], ["Random Forest", "Gradient Boosting", "Voting Ensemble"]):
    fit_and_report(clf, X_train_scaled, X_test_scaled, y_train, y_test, name)

1652

Perceptron — accuracy: 0.965
              precision    recall  f1-score   support

           0       0.96      0.99      0.98        83
           1       0.93      0.93      0.93       122
           2       0.97      0.96      0.96       263
           3       0.99      0.98      0.99       182

    accuracy                           0.96       650
   macro avg       0.96      0.96      0.96       650
weighted avg       0.96      0.96      0.96       650


MLPClassifier — accuracy: 0.985
              precision    recall  f1-score   support

           0       0.96      0.99      0.98        83
           1       0.98      0.96      0.97       122
           2       0.98      1.00      0.99       263
           3       0.99      0.98      0.99       182

    accuracy                           0.98       650
   macro avg       0.98      0.98      0.98       650
weighted avg       0.98      0.98      0.98       650

Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


28/28 - 1s - 26ms/step - accuracy: 0.8870 - loss: 0.3567 - val_accuracy: 0.9694 - val_loss: 0.1473
Epoch 2/10
28/28 - 0s - 5ms/step - accuracy: 0.9555 - loss: 0.1251 - val_accuracy: 0.9694 - val_loss: 0.0932
Epoch 3/10
28/28 - 0s - 5ms/step - accuracy: 0.9726 - loss: 0.0751 - val_accuracy: 0.9694 - val_loss: 0.1082
Epoch 4/10
28/28 - 0s - 5ms/step - accuracy: 0.9886 - loss: 0.0503 - val_accuracy: 0.9898 - val_loss: 0.0443
Epoch 5/10
28/28 - 0s - 5ms/step - accuracy: 0.9954 - loss: 0.0291 - val_accuracy: 0.9898 - val_loss: 0.0402
Epoch 6/10
28/28 - 0s - 5ms/step - accuracy: 0.9977 - loss: 0.0183 - val_accuracy: 1.0000 - val_loss: 0.0247
Epoch 7/10
28/28 - 0s - 5ms/step - accuracy: 0.9977 - loss: 0.0118 - val_accuracy: 0.9898 - val_loss: 0.0282
Epoch 8/10
28/28 - 0s - 5ms/step - accuracy: 1.0000 - loss: 0.0081 - val_accuracy: 1.0000 - val_loss: 0.0176
Epoch 9/10
28/28 - 0s - 5ms/step - accuracy: 1.0000 - loss: 0.0054 - val_accuracy: 0.9898 - val_loss: 0.0145
Epoch 10/10
28/28 - 0s - 5ms/