In [9]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

def read_data(file_path):

    df = pd.read_csv(file_path)
    df = df.rename(columns={"Time (s)": "t", "X (m/s^2)": "x", "Y (m/s^2)": "y", "Z (m/s^2)": "z"})
    return df

def label_data(df, label):

    df['activity'] = label
    return df

def preprocess_data(df, window_size):

    # Remove first and last 3 seconds
    df = df.iloc[int(3 * df.shape[0] / 30):int(-3 * df.shape[0] / 30)]



    # Apply rolling mean with window size
    df_windowed = df.rolling(window_size).mean().dropna()
    df_windowed = df_windowed.sample(frac=1).reset_index(drop=True)

    # Apply normalization to x,y,z columns only
    scaler = MinMaxScaler()
    df_windowed[['x', 'y', 'z']] = scaler.fit_transform(df_windowed[['x', 'y', 'z']]) 

    return df_windowed


kieran_walking = read_data('Accelerometer_Walking_Kieran.csv').reset_index(drop=True)
kieran_jumping = read_data('Accelerometer_Jumping_Kieran.csv').reset_index(drop=True)
amir_walking = read_data('Accelerometer_Walking_Amir.csv').reset_index(drop=True)
amir_jumping = read_data('Accelerometer_Jumping_Amir.csv').reset_index(drop=True)
jack_walking = read_data('Accelerometer_Walking_Jack.csv').reset_index(drop=True)
jack_jumping = read_data('Accelerometer_Jumping_Jack.csv').reset_index(drop=True)


kieran_walking = label_data(kieran_walking, 0)
kieran_jumping = label_data(kieran_jumping, 1)
amir_walking = label_data(amir_walking, 0)
amir_jumping = label_data(amir_jumping, 1)
jack_walking = label_data(jack_walking, 0)
jack_jumping = label_data(jack_jumping, 1)


window_size = 500
kieran_walking_windowed = preprocess_data(kieran_walking, window_size)
kieran_jumping_windowed = preprocess_data(kieran_jumping, window_size)
amir_walking_windowed = preprocess_data(amir_walking, window_size)
amir_jumping_windowed = preprocess_data(amir_jumping, window_size)
jack_walking_windowed = preprocess_data(jack_walking, window_size)
jack_jumping_windowed = preprocess_data(jack_jumping, window_size)


df_concat = pd.concat([kieran_walking_windowed, kieran_jumping_windowed,
amir_walking_windowed, amir_jumping_windowed,
jack_walking_windowed, jack_jumping_windowed], ignore_index=True)


df_concat.to_csv('accelerometer_data.csv', index=True)

In [None]:
df_concat = df_concat.reset_index(drop=False)
df_concat = df_concat.rename(columns={"index": "index_col"})
df_concat.to_csv('accelerometer_data1.csv', index=False)


Create a labls dataframe for y train and test

In [10]:
activity_labels = pd.DataFrame(df_concat['activity'])
pd.DataFrame(activity_labels)


# Feature Extraction 

In [None]:
def extract_features(data):
    features = []
    for window in data:
        feature_list = []
        for axis in range(1, 4):  # Iterate over x, y, and z axes
            axis_data = window[:, axis]

            # Calculate the features for each axis
            max_value = np.max(axis_data)
            min_value = np.min(axis_data)
            range_value = max_value - min_value
            mean_value = np.mean(axis_data)
            median_value = np.median(axis_data)
            variance = np.var(axis_data)
            skewness = stats.skew(axis_data)
            rms = np.sqrt(np.mean(axis_data ** 2))
            kurtosis = stats.kurtosis(axis_data)
            std_dev = np.std(axis_data)

            # Add the features to the feature list
            feature_list.extend([max_value, min_value, range_value, mean_value, median_value,
                                 variance, skewness, rms, kurtosis, std_dev])

        features.append(feature_list)

    return np.array(features)