In [1]:
import pandas as pd
import os
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns
from ipywidgets import interact
import plotly.graph_objects as go
import ruptures as rpt
from itertools import combinations as comb
from statsmodels.stats import power
import numpy as np

from IPython.display import display
pd.set_option('display.max_rows', None)
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from scipy.spatial.distance import cdist
import colorsys

In [2]:
folder_path = 'data'
def read_data(filename):
    accelerometer = pd.read_csv(os.path.join(folder_path, filename, 'Accelerometer.csv'),sep=';')
    gyroscope = pd.read_csv(os.path.join(folder_path, filename,'Gyroscope.csv'),sep=';')
    accelerometer.rename(columns={'Acceleration x (m/s^2)':'accelerometer_x','Acceleration y (m/s^2)':'accelerometer_y','Acceleration z (m/s^2)':'accelerometer_z'}, inplace=True)
    gyroscope.rename(columns={'Gyroscope x (rad/s)':'gyroscope_x','Gyroscope y (rad/s)':'gyroscope_y','Gyroscope z (rad/s)':'gyroscope_z'}, inplace=True)
    accelerometer =accelerometer[9000:-9000]
    gyroscope = gyroscope[9000:-9000]
    return accelerometer, gyroscope

In [3]:
accelerometer_lucas1, gyroscope_lucas1 = read_data('lucas1')
accelerometer_nick1, gyroscope_nick1 = read_data('nick1')
accelerometer_till1, gyroscope_till1 = read_data('till1')
accelerometer_luisa1, gyroscope_luisa1  = read_data('luisa1')

In [4]:
accelerometer_nick1.head(), gyroscope_nick1.head()

(       Time (s)  accelerometer_x  accelerometer_y  accelerometer_z
 9000  19.955428         0.036491       -12.204592        -0.468993
 9001  19.957651         0.241675       -12.189637        -0.460020
 9002  19.959872         0.470190       -12.174682        -0.536590
 9003  19.962089         0.717249       -12.107084        -0.675972
 9004  19.964303         0.945763       -12.078370        -0.738186,
        Time (s)  gyroscope_x  gyroscope_y  gyroscope_z
 9000  19.962089     0.687529     1.240056     0.452040
 9001  19.964303     0.656833     1.288926     0.449750
 9002  19.966516     0.627359     1.323134     0.450055
 9003  19.968732     0.598953     1.342682     0.452651
 9004  19.970944     0.567036     1.337795     0.460287)

In [5]:
lucas1 = pd.merge(accelerometer_lucas1,gyroscope_lucas1, on= 'Time (s)', how='inner')
lucas1.head()

Unnamed: 0,Time (s),accelerometer_x,accelerometer_y,accelerometer_z,gyroscope_x,gyroscope_y,gyroscope_z


In [6]:
till1 = pd.merge(accelerometer_till1,gyroscope_till1, on= 'Time (s)', how='inner')
till1.head(10)

Unnamed: 0,Time (s),accelerometer_x,accelerometer_y,accelerometer_z,gyroscope_x,gyroscope_y,gyroscope_z
0,20.615457,5.86421,-11.45504,-0.691526,0.450513,0.644616,-0.018173
1,20.617746,5.892924,-11.454442,-0.76331,0.428369,0.607047,-0.050091
2,20.620034,5.813363,-11.405988,-0.815953,0.405004,0.578031,-0.08094
3,20.622323,5.729016,-11.32822,-0.906282,0.381638,0.544128,-0.109192
4,20.624612,5.573482,-11.263615,-1.007378,0.360869,0.518625,-0.13271
5,20.6269,5.361717,-11.155937,-1.141376,0.338725,0.500451,-0.151189
6,20.629189,5.134997,-11.086546,-1.228116,0.319177,0.482431,-0.167224
7,20.631478,4.832903,-10.978271,-1.348356,0.295812,0.465479,-0.180816
8,20.633766,4.487739,-10.917253,-1.416551,0.273668,0.453415,-0.191964
9,20.636055,4.138387,-10.798809,-1.565504,0.250455,0.438907,-0.198226


In [7]:
nick1 = pd.merge(accelerometer_nick1,gyroscope_nick1, on= 'Time (s)', how='inner')
nick1.head(10)

Unnamed: 0,Time (s),accelerometer_x,accelerometer_y,accelerometer_z,gyroscope_x,gyroscope_y,gyroscope_z
0,19.962089,0.717249,-12.107084,-0.675972,0.687529,1.240056,0.45204
1,19.964303,0.945763,-12.07837,-0.738186,0.656833,1.288926,0.44975
2,19.966516,1.192822,-12.154343,-0.882353,0.627359,1.323134,0.450055
3,19.968732,1.482354,-12.149557,-1.102493,0.598953,1.342682,0.452651
4,19.970944,1.772483,-12.072389,-1.260419,0.567036,1.337795,0.460287
5,19.973164,2.072184,-12.004791,-1.470988,0.540158,1.320691,0.462883
6,19.975379,2.395216,-11.927623,-1.67677,0.509462,1.288773,0.466854
7,19.977597,2.679961,-11.850454,-1.89691,0.477544,1.239904,0.467006
8,19.97981,2.950948,-11.778071,-2.097907,0.447001,1.178664,0.463494
9,19.982023,3.192624,-11.720643,-2.313261,0.408974,1.100474,0.451429


In [8]:
nick1['label'] = 0
till1['label'] = 1

In [12]:
def create_sliding_windows(data, window_length, stride):
    # Extract the feature columns and class labels
    features = data.iloc[:, 1:-1].values  # Exclude the first column and the last column
    labels = data.iloc[:, -1].values      # The last column is the class label

    # Number of windows that can be created from the data with the given stride
    num_windows = (len(data) - window_length) // stride + 1

    # Create sliding windows using numpy stride tricks
    windows = np.lib.stride_tricks.sliding_window_view(features, (window_length, features.shape[1]))
    windows = windows[::stride, 0, :, :]  # Apply stride to the windows
    y_windows = labels[np.arange(0, num_windows * stride, stride)]  # Class labels at the start of each window

    return windows, y_windows


window_length = 4500
stride = 450

X_windows1, y_windows1 = create_sliding_windows(nick1, window_length, stride)
X_windows2, y_windows2 = create_sliding_windows(till1, window_length ,stride)



X_train1, X_test1 = X_windows1[:int(0.8 * len(X_windows1))], X_windows1[int(0.8 * len(X_windows1)):]
y_train1, y_test1 = y_windows1[:int(0.8 * len(y_windows1))], y_windows1[int(0.8 * len(y_windows1)):]
X_train2, X_test2 = X_windows2[:int(0.8 * len(X_windows2))], X_windows2[int(0.8 * len(X_windows2)):]
y_train2, y_test2 = y_windows2[:int(0.8 * len(y_windows2))], y_windows2[int(0.8 * len(y_windows2)):]
                                                                                      
X_train = np.concatenate((X_train1, X_train2), axis=0)
y_train = np.concatenate((y_train1, y_train2), axis=0)

X_test = np.concatenate((X_test1, X_test2), axis=0)
y_test = np.concatenate((y_test1, y_test2), axis=0)


In [13]:
n_samples, window_length, n_features = X_train.shape
X_train_reshaped = X_train.reshape(n_samples, window_length * n_features)
X_test_reshaped = X_test.reshape(X_test.shape[0], window_length * n_features)

model = xgb.XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')
model.fit(X_train_reshaped, y_train)

y_pred = model.predict(X_test_reshaped)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

print("X_train shape:", X_train_reshaped.shape)
print("X_test shape:", X_test_reshaped.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)

Accuracy: 1.0
X_train shape: (1207, 27000)
X_test shape: (303, 27000)
y_train shape: (1207,)
y_test shape: (303,)


In [46]:
def check_leakage(X_train, X_test, tol=1e-100):
    # Check if any instance in the test set is present in the training set
    for test_instance in X_test:
        dists = np.linalg.norm(X_train - test_instance, axis=1)
        if dists.min() < tol:
            return True

    # For time series data, check if any subsequence of the test set is present in the training set
    for test_instance in X_test:
        for i in range(X_train.shape[0] - len(test_instance) + 1):
            if np.allclose(X_train[i:i+len(test_instance)], test_instance, atol=tol):
                return True

    return False

leakage_exists = check_leakage(X_train_reshaped, X_test_reshaped)
print(f"Leakage exists: {leakage_exists}")

Leakage exists: False


In [47]:
unique_classes = np.unique(y_train)

base_colors = ['rgb(228,26,28)', 'rgb(55,126,184)', 'rgb(77,175,74)']  

fig = go.Figure()

for j, cls in enumerate(unique_classes):

    indices = np.where(y_train == cls)[0]
    idx = np.random.choice(indices)
    window = X_train[idx]
    for i in range(6):
        rgb = base_colors[j].lstrip('rgb(').rstrip(')').split(',')
        hls = colorsys.rgb_to_hls(int(rgb[0])/255, int(rgb[1])/255, int(rgb[2])/255)
        hls = (hls[0], hls[1]*(1 - 0.1*i), hls[2])  # Adjust lightness
        rgb = colorsys.hls_to_rgb(*hls)
        color = f'rgb({int(rgb[0]*255)},{int(rgb[1]*255)},{int(rgb[2]*255)})'
        fig.add_trace(go.Scatter(y=window[:, i], mode='lines', name=f'Class {cls} Dimension {i+1}', line=dict(color=color)))

fig.show()