In [121]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.integrate import cumulative_trapezoid

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


# Conversion constant: from milli-g to m/s²
MG_TO_MS2 = 0.00980665

def process_data(data):
    # Rename columns: input data has columns: time, x, y, z
    data = data.rename(columns={'time': 't', 'x': 'ax', 'y': 'ay', 'z': 'az'})
    
    # Convert time to seconds
    data['t'] = data['t'] / 1000.0
    
    # Remove any NaN entries
    data = data.dropna()
    
    # Convert acceleration from mg to m/s²
    for acc in ['ax', 'ay', 'az']:
        data[acc] = data[acc] * MG_TO_MS2

    # Replace outliers: if the absolute ratio to the median exceeds threshold, replace with median.
    for acc in ['ax', 'ay', 'az']:
        median_val = np.median(data[acc])
        # Use a threshold to decide what constitutes an outlier.
        if median_val != 0:
            outlier_idx = np.abs(data[acc] / median_val) > 1.5
            data.loc[outlier_idx, acc] = median_val

    # Compute velocity by integrating acceleration over time.
    data['vx'] = cumulative_trapezoid(data['ax'], data['t'], initial=0)
    data['vy'] = cumulative_trapezoid(data['ay'], data['t'], initial=0)
    data['vz'] = cumulative_trapezoid(data['az'], data['t'], initial=0)

    # Compute position by integrating velocity over time.
    data['x_pos'] = cumulative_trapezoid(data['vx'], data['t'], initial=0)
    data['y_pos'] = cumulative_trapezoid(data['vy'], data['t'], initial=0)
    data['z_pos'] = cumulative_trapezoid(data['vz'], data['t'], initial=0)
    
    return data



In [122]:
# Read and preprocess the dataset.
# Replace "data/trial1.csv" with the correct path if needed.
data = pd.read_csv("data/trial1.csv")
data = process_data(data)
data.head()


Unnamed: 0,t,ax,ay,az,vx,vy,vz,x_pos,y_pos,z_pos
0,0.003,2.314369,-1.019892,-9.532064,0.0,0.0,0.0,0.0,0.0,0.0
1,0.025,2.549729,-1.019892,-9.845877,0.053505,-0.022438,-0.213157,0.000589,-0.000247,-0.002345
2,0.046,2.510502,-1.019892,-9.57129,0.106638,-0.043855,-0.417038,0.00227,-0.000943,-0.008962
3,0.067,2.432049,-1.019892,-9.68897,0.158534,-0.065273,-0.61927,0.005054,-0.002089,-0.019843
4,0.089,2.432049,-1.019892,-9.767423,0.212039,-0.087711,-0.833291,0.009131,-0.003772,-0.035821


In [None]:
# Visualize the acceleration signals.
plt.figure(figsize=(10, 6))
for acc in ['ax', 'ay', 'az']:
    plt.plot(data['t'], data[acc], label=acc)
plt.xlabel("Time (s)")
plt.ylabel("Acceleration (m/s²)")
plt.title("Accelerometer Data")
plt.legend()
plt.show()


In [123]:


# Label the data based on the movement sequence.
# According to the experiment:
#   - 0 ≤ t < 5 s: Steady (label 0)
#   - 5 ≤ t < 10 s: Shaken (label 1)
#   - 10 ≤ t < 15 s: Steady (label 0)
#   - 15 ≤ t < 20 s: Shaken (label 1)
def label_motion(t):
    if 0 <= t < 5:
        return 0
    elif 5 <= t < 10:
        return 1
    elif 10 <= t < 15:
        return 0
    elif 15 <= t < 20:
        return 1
    else:
        return np.nan

data['label'] = data['t'].apply(label_motion)
data = data.dropna(subset=['label'])
data['label'] = data['label'].astype(int)

data['acc_mag'] = np.sqrt(data['ax']**2 + data['ay']**2 + data['az']**2)
data['vel_mag'] = np.sqrt(data['vx']**2 + data['vy']**2 + data['vz']**2)
data['pos_mag'] = np.sqrt(data['x_pos']**2 + data['y_pos']**2 + data['z_pos']**2)

# plt.figure(figsize=(10, 4))
# plt.scatter(data['t'], data['ax'], c=data['label'], cmap='bwr', label="ax")
# plt.xlabel("Time (s)")
# plt.ylabel("ax (m/s²)")
# plt.title("Acceleration on x-axis with movement labels")
# plt.colorbar(label="Label (0=Steady, 1=Shake)")
# plt.show()

data.sample(5)

Unnamed: 0,t,ax,ay,az,vx,vy,vz,x_pos,y_pos,z_pos,label,acc_mag,vel_mag,pos_mag
644,14.271,5.177911,-1.176798,-9.139798,57.371433,-15.093356,-116.492097,341.025088,-103.540204,-861.971327,0,10.570313,130.727577,932.74501
387,8.601,5.021005,-1.019892,-9.061345,27.473605,-8.768126,-71.978399,101.735466,-37.694833,-336.807861,1,10.409545,77.540756,353.851015
299,6.593,5.021005,-1.019892,-9.061345,18.941584,-6.720183,-58.514908,55.196465,-22.144571,-205.07749,1,10.409545,61.870339,213.527068
837,18.685,5.021005,-1.019892,-3.059675,75.656736,-19.631678,-148.136646,636.734815,-180.368322,-1450.897726,1,5.967602,167.492717,1594.700024
215,4.705,2.039783,-1.019892,-10.002783,11.37083,-4.79553,-45.407163,27.085445,-11.274292,-106.650003,0,10.259462,47.054259,110.611727


In [124]:
feature_cols = ['ax', 'ay', 'az', 'acc_mag', 'vel_mag', 'pos_mag']
X = data[feature_cols]
y = data['label']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data (70% training, 30% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, shuffle=True)

print("Number of training samples:", X_train.shape[0])
print("Number of testing samples:", X_test.shape[0])


Number of training samples: 625
Number of testing samples: 269


In [125]:

# Train a logistic regression model.
# The model fits the following relationship:
#   p = σ(b0 + b1*t + b2*ax + b3*ay + b4*az)
lr = LogisticRegression()
lr.fit(X_train, y_train)

# Make predictions on the test set and report accuracy.
preds = lr.predict(X_test)
accuracy = accuracy_score(y_test, preds)
print("Classification Accuracy: {:.2f}%".format(accuracy * 100))

# Detailed performance metrics.
print("\nClassification Report:")
print(classification_report(y_test, preds))
print("Confusion Matrix:")
print(confusion_matrix(y_test, preds))


Classification Accuracy: 71.38%

Classification Report:
              precision    recall  f1-score   support

           0       0.64      0.90      0.75       129
           1       0.85      0.54      0.66       140

    accuracy                           0.71       269
   macro avg       0.75      0.72      0.71       269
weighted avg       0.75      0.71      0.71       269

Confusion Matrix:
[[116  13]
 [ 64  76]]


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
