In [10]:
import os
import pandas as pd
import numpy as np
from scipy.fft import fft


base_path = '/Users/karthik/Documents/Task 4/Experimental_data'


data_list = []
labels_list = []


task_mapping = {
    'laying': 'LAYING',
    'sitting': 'SITTING',
    'standing': 'STANDING',
    'walking': 'WALKING',
    'climbing_up': 'WALKING_UPSTAIRS',
    'climbing_down': 'WALKING_DOWNSTAIRS'
}

# Function to compute FFT features
def compute_fft_features(signal):
    signal = np.array(signal)  
    fft_values = fft(signal)
    fft_magnitude = np.abs(fft_values)
    return [np.mean(fft_magnitude), np.std(fft_magnitude), np.max(fft_magnitude), np.min(fft_magnitude)]

# Function to compute time-domain features
def compute_time_domain_features(df):
    features = []
    for axis in ['x', 'y', 'z']:
        col = f'a{axis} (m/s^2)'
        features.extend([
            df[col].mean(),  # Mean
            df[col].std(),   # Standard Deviation
            df[col].min(),   # Min
            df[col].max()    # Max
        ])
        # Compute FFT features
        fft_features = compute_fft_features(df[col])
        features.extend(fft_features)
    return features

# Loop through each subject's folder (karthik, prathamesh, sambhav)
for subject in os.listdir(base_path):
    subject_path = os.path.join(base_path, subject)
    
    # Check if the subject path is a directory
    if os.path.isdir(subject_path):
        # Loop through each task folder (laying, sitting, etc.)
        for task in os.listdir(subject_path):
            task_path = os.path.join(subject_path, task)
            
            # Check if the task path is a directory
            if os.path.isdir(task_path):
                # Read the linear_acc.csv file
                linear_acc_file = os.path.join(task_path, 'linear_acc.csv')
                if os.path.exists(linear_acc_file):
                    df = pd.read_csv(linear_acc_file)
                    
                    # Compute features for each row in the DataFrame
                    for _, row in df.iterrows():
                        features = compute_time_domain_features(pd.DataFrame([row]))
                        data_list.append(features)
                        
                        # Map the task folder name to the corresponding label
                        label = task_mapping.get(task, 'UNKNOWN')  # Default to 'UNKNOWN' if task is not in the mapping
                        labels_list.append(label)


columns = [
    'a_x_mean', 'a_x_std', 'a_x_min', 'a_x_max',
    'a_y_mean', 'a_y_std', 'a_y_min', 'a_y_max',
    'a_z_mean', 'a_z_std', 'a_z_min', 'a_z_max',
    'a_x_fft_mean', 'a_x_fft_std', 'a_x_fft_max', 'a_x_fft_min',
    'a_y_fft_mean', 'a_y_fft_std', 'a_y_fft_max', 'a_y_fft_min',
    'a_z_fft_mean', 'a_z_fft_std', 'a_z_fft_max', 'a_z_fft_min'
]

X_test = pd.DataFrame(data_list, columns=columns)
y_test = pd.Series(labels_list, name='Activity')

print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)


X_test shape: (20887, 24)
y_test shape: (20887,)


In [11]:



output_dir = '/Users/karthik/Documents/Task 4/'


X_test_file = os.path.join(output_dir, 'X_test.csv')
y_test_file = os.path.join(output_dir, 'y_test.csv')

# Save DataFrames to CSV
X_test.to_csv(X_test_file, index=False)
y_test.to_csv(y_test_file, index=False)

print(f"X_test saved to {X_test_file}")
print(f"y_test saved to {y_test_file}")


X_test saved to /Users/karthik/Documents/Task 4/X_test.csv
y_test saved to /Users/karthik/Documents/Task 4/y_test.csv
