# Import

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import os
import warnings
from scipy import stats

# Aggregate Data

In [2]:
# Function to create time intervals
def create_time_intervals(start_time, end_time, delta_t):
    return np.arange(start_time, end_time, delta_t)

# Function to aggregate data within time steps
def aggregate_data(df, time_column, delta_t, numerical_columns, label):
    start_time = df[time_column].min()
    end_time = df[time_column].max()
    time_intervals = create_time_intervals(start_time, end_time, delta_t)
    
    aggregated_data = []

    for t in time_intervals:
        window = df[(df[time_column] >= t) & (df[time_column] < t + delta_t)]
        if not window.empty:
            aggregated_row = {}
            aggregated_row['time'] = t
            for col in numerical_columns:
                aggregated_row[col] = window[col].mean()
            # Add the label to the aggregated row
            aggregated_row['label'] = label
            aggregated_data.append(aggregated_row)

    return pd.DataFrame(aggregated_data)


# Load Paths

In [3]:
# Define the base path and folders
base_path = '/Users/priyank/Desktop/ML4QS-project'  # Update this to your actual path
gesture_folders = ['Handshake_right_5mins', 'Highfive_right_5mins', 'Clapping_right_5mins', 'Wave_right_5mins']
gesture_labels = ['handshake', 'highfive', 'clapping', 'waving']  # Corresponding labels for gestures
file_names = ['Accelerometer.csv', 'Gyroscope.csv', 'Linear Accelerometer.csv', 'Magnetometer.csv']
sensor_types = ['accelerometer', 'gyroscope', 'linear_accelerometer', 'magnetometer']

# Specify numerical columns for each sensor type
numerical_columns_dict = {
    'accelerometer': ['X (m/s^2)', 'Y (m/s^2)', 'Z (m/s^2)'],
    'gyroscope': ['X (rad/s)', 'Y (rad/s)', 'Z (rad/s)'],
    'linear_accelerometer': ['X (m/s^2)', 'Y (m/s^2)', 'Z (m/s^2)'],
    'magnetometer': ['X (µT)', 'Y (µT)', 'Z (µT)']
}

# Define time step
delta_t = 0.5


# Save data

In [4]:
# Function to save aggregated data back into a specific folder with modified filenames
def save_aggregated_data(base_path, gesture_folder, file_names, aggregated_data_dict, interval_label="0.5s"):
    target_path = os.path.join(base_path, gesture_folder, 'aggregated_data')
    os.makedirs(target_path, exist_ok=True)
    
    for sensor_type, file_name in zip(sensor_types, file_names):
        new_file_name = file_name.replace('.csv', f'_{interval_label}.csv')
        output_path = os.path.join(target_path, new_file_name)
        aggregated_data_dict[sensor_type].to_csv(output_path, index=False)
        print(f"Saved aggregated data to {output_path}")

In [5]:
# Aggregate data for each sensor type in each folder
for gesture_folder, gesture_label in zip(gesture_folders, gesture_labels):
    sub_folders = [f.path for f in os.scandir(os.path.join(base_path, gesture_folder)) if f.is_dir()]
    aggregated_data_dict = {sensor_type: pd.DataFrame() for sensor_type in sensor_types}
    
    for sensor_type, file_name in zip(sensor_types, file_names):
        data_dict = []
        for sub_folder in sub_folders:
            file_path = os.path.join(sub_folder, file_name)
            if os.path.exists(file_path):
                df = pd.read_csv(file_path)
                aggregated_df = aggregate_data(df, 'Time (s)', delta_t, numerical_columns_dict[sensor_type], gesture_label)
                data_dict.append(aggregated_df)
                    
        aggregated_data_dict[sensor_type] = pd.concat(data_dict, ignore_index=True)
    
    save_aggregated_data(base_path, gesture_folder, file_names, aggregated_data_dict)

# Function to load and adjust timestamps for combined data
def load_and_adjust_timestamps(base_path, gesture_folders, filename):
    combined_data = pd.DataFrame()
    for gesture_folder in gesture_folders:
        target_path = os.path.join(base_path, gesture_folder, 'aggregated_data')
        file_path = os.path.join(target_path, filename)
        if os.path.exists(file_path):
            df = pd.read_csv(file_path)
            combined_data = pd.concat([combined_data, df], ignore_index=True)
    
    # Adjust the timestamps to start at 0 and increment by delta_t
    combined_data['time'] = np.arange(0, delta_t * len(combined_data), delta_t)
    return combined_data


Saved aggregated data to /Users/priyank/Desktop/ML4QS-project/Handshake_right_5mins/aggregated_data/Accelerometer_0.5s.csv
Saved aggregated data to /Users/priyank/Desktop/ML4QS-project/Handshake_right_5mins/aggregated_data/Gyroscope_0.5s.csv
Saved aggregated data to /Users/priyank/Desktop/ML4QS-project/Handshake_right_5mins/aggregated_data/Linear Accelerometer_0.5s.csv
Saved aggregated data to /Users/priyank/Desktop/ML4QS-project/Handshake_right_5mins/aggregated_data/Magnetometer_0.5s.csv
Saved aggregated data to /Users/priyank/Desktop/ML4QS-project/Highfive_right_5mins/aggregated_data/Accelerometer_0.5s.csv
Saved aggregated data to /Users/priyank/Desktop/ML4QS-project/Highfive_right_5mins/aggregated_data/Gyroscope_0.5s.csv
Saved aggregated data to /Users/priyank/Desktop/ML4QS-project/Highfive_right_5mins/aggregated_data/Linear Accelerometer_0.5s.csv
Saved aggregated data to /Users/priyank/Desktop/ML4QS-project/Highfive_right_5mins/aggregated_data/Magnetometer_0.5s.csv
Saved aggregated

In [6]:
# Define the sensor files and columns for the final combination step
sensor_files = {
    'accelerometer': 'Accelerometer_0.5s.csv',
    'gyroscope': 'Gyroscope_0.5s.csv',
    'linear_accelerometer': 'Linear Accelerometer_0.5s.csv',
    'magnetometer': 'Magnetometer_0.5s.csv'
}

# Process each sensor file to combine data from all gesture folders
for sensor_type, filename in sensor_files.items():
    combined_data = load_and_adjust_timestamps(base_path, gesture_folders, filename)
    output_path = os.path.join(base_path, f'Combined_{filename}')
    combined_data.to_csv(output_path, index=False)

print("Data concatenation and timestamp adjustment completed.")


Data concatenation and timestamp adjustment completed.
