In [6]:
import pandas as pd
import numpy as np

# Function to extract features from Actigraphy data
def extract_features(data):
    required_columns = {'X', 'Y', 'Z', 'enmo', 'light'}
    if not required_columns.issubset(data.columns):
        raise ValueError(f"Missing required columns: {required_columns - set(data.columns)}")
    
    # Statistical features
    mean_x = data['X'].mean()
    mean_y = data['Y'].mean()
    mean_z = data['Z'].mean()
    std_x = data['X'].std()
    std_y = data['Y'].std()
    std_z = data['Z'].std()
    
    mean_enmo = data['enmo'].mean()
    std_enmo = data['enmo'].std()
    max_enmo = data['enmo'].max()
    min_enmo = data['enmo'].min()

    magnitude = np.sqrt(data['X']**2 + data['Y']**2 + data['Z']**2)
    mean_magnitude = magnitude.mean()
    std_magnitude = magnitude.std()

    activity_level = pd.cut(
        magnitude,
        bins=[0, 0.5, 1.0, 1.5, 2.0],
        labels=['Very Low', 'Low', 'Medium', 'High'],
        include_lowest=True
    ).mode()[0] if len(magnitude) > 0 else 'Unknown'

    inactivity_percentage = (data['enmo'] == 0).mean() * 100

    non_wear_percentage = (data['non_wear_flag'] == 1).mean() * 100 if 'non_wear_flag' in data.columns else None

    mean_light = data['light'].mean()
    max_light = data['light'].max()

    features = {
        'mean_x': mean_x,
        'mean_y': mean_y,
        'mean_z': mean_z,
        'std_x': std_x,
        'std_y': std_y,
        'std_z': std_z,
        'mean_enmo': mean_enmo,
        'std_enmo': std_enmo,
        'max_enmo': max_enmo,
        'min_enmo': min_enmo,
        'mean_magnitude': mean_magnitude,
        'std_magnitude': std_magnitude,
        'activity_level': activity_level,
        'inactivity_percentage': inactivity_percentage,
        'non_wear_percentage': non_wear_percentage,
        'mean_light': mean_light,
        'max_light': max_light
    }
    return pd.DataFrame([features])

# Loaded participant data
participant_file = "C:/Users/Lenovo/OneDrive/Desktop/SEM 2/ml/project/cleaned_series_train.parquet/id=0a418b57.parquet"
try:
    participant_data = pd.read_parquet(participant_file)
    print("Data loaded successfully")
    print(participant_data.head())  # Debugging: Check data structure
except Exception as e:
    print(f"Error loading file: {e}")

# Extracted features
try:
    participant_features = extract_features(participant_data)
    print("Features extracted successfully")
    print(participant_features)
except Exception as e:
    print(f"Error extracting features: {e}")

# Saved features
try:
    if participant_features is not None and not participant_features.empty:
        participant_id = participant_file.split('id=')[-1].split('.')[0]
        output_path = f"C:/Users/Lenovo/OneDrive/Desktop/SEM 2/ml/project/extracted_features_id={participant_id}.parquet"
        participant_features.to_parquet(output_path)
        print(f"Features saved successfully to {output_path}")
    else:
        print("No valid features extracted.")
except Exception as e:
    print(f"Error saving features: {e}")


Data loaded successfully
   step         X         Y         Z      enmo     anglez  non-wear_flag  \
0     0 -0.075242 -0.256743 -0.973791  0.038081 -72.952141            0.0   
1     1 -0.265893 -0.270508 -0.765470  0.077430 -52.849220            0.0   
2     2  0.334517 -0.548602 -0.588596  0.039162 -44.118084            0.0   
3     3  0.000193 -0.021069 -0.999681  0.001450 -88.759613            0.0   
4     4 -0.000685 -0.020681 -0.997677  0.000491 -88.756958            0.0   

   light  battery_voltage     time_of_day  weekday  quarter  \
0    5.0      4202.000000  51250000000000        2        4   
1    0.5      4185.333496  51255000000000        2        4   
2   11.5      4185.500000  51260000000000        2        4   
3    0.0      4185.666504  51265000000000        2        4   
4    8.5      4185.833496  51270000000000        2        4   

   relative_date_PCIAT  day_time  
0                 -9.0 -8.406829  
1                 -9.0 -8.406771  
2                 -9.0 -8.40

In [8]:
import pandas as pd
import numpy as np

# Function to extract features from Actigraphy data (excluding 'light')
def extract_features(data):
    required_columns = {'X', 'Y', 'Z', 'enmo'}
    if not required_columns.issubset(data.columns):
        raise ValueError(f"Missing required columns: {required_columns - set(data.columns)}")
    
    # Statistical features
    mean_x = data['X'].mean()
    mean_y = data['Y'].mean()
    mean_z = data['Z'].mean()
    std_x = data['X'].std()
    std_y = data['Y'].std()
    std_z = data['Z'].std()
    
    mean_enmo = data['enmo'].mean()
    std_enmo = data['enmo'].std()
    max_enmo = data['enmo'].max()
    min_enmo = data['enmo'].min()

    magnitude = np.sqrt(data['X']**2 + data['Y']**2 + data['Z']**2)
    mean_magnitude = magnitude.mean()
    std_magnitude = magnitude.std()

    activity_level = pd.cut(
        magnitude,
        bins=[0, 0.5, 1.0, 1.5, 2.0],
        labels=['Very Low', 'Low', 'Medium', 'High'],
        include_lowest=True
    ).mode()[0] if len(magnitude) > 0 else 'Unknown'

    inactivity_percentage = (data['enmo'] == 0).mean() * 100

    non_wear_percentage = (data['non-wear_flag'] == 1).mean() * 100 if 'non-wear_flag' in data.columns else None

    features = {
        'mean_x': mean_x,
        'mean_y': mean_y,
        'mean_z': mean_z,
        'std_x': std_x,
        'std_y': std_y,
        'std_z': std_z,
        'mean_enmo': mean_enmo,
        'std_enmo': std_enmo,
        'max_enmo': max_enmo,
        'min_enmo': min_enmo,
        'mean_magnitude': mean_magnitude,
        'std_magnitude': std_magnitude,
        'activity_level': activity_level,
        'inactivity_percentage': inactivity_percentage,
        'non_wear_percentage': non_wear_percentage
    }
    return pd.DataFrame([features])

# Path for the participant file (replace with actual file path)
participant_file = "C:/Users/Lenovo/OneDrive/Desktop/SEM 2/ml/project/cleaned_series_train.parquet/id=00f332d1.parquet"

# Loaded participant data
participant_data = pd.read_parquet(participant_file)

# Extracted features for the participant
participant_features = extract_features(participant_data)

# Checked if features were extracted successfully
if participant_features is not None and not participant_features.empty:
    # Dynamically created the output file path
    participant_id = participant_file.split('id=')[-1].split('.')[0]
    output_path = f"C:/Users/Lenovo/OneDrive/Desktop/SEM 2/ml/project/extracted_features_id={participant_id}.parquet"
    
    # Saved extracted features to a parquet file
    participant_features.to_parquet(output_path)
    print(f"Features extracted and saved successfully for participant {participant_id}.")
    print(participant_features)  # Print features for review
else:
    print("No valid features extracted.")


Features extracted and saved successfully for participant 00f332d1.
     mean_x    mean_y    mean_z     std_x     std_y     std_z  mean_enmo  \
0  0.217549  0.064164  0.159889  0.507957  0.464649  0.644808   0.029802   

   std_enmo  max_enmo  min_enmo  mean_magnitude  std_magnitude activity_level  \
0  0.104216  3.681586       0.0        0.980751       0.070811            Low   

   inactivity_percentage  non_wear_percentage  
0               15.59686            11.175805  
