In [1]:
# FILE READING & PREPROCESSING (for the example file)

In [2]:
import actipy # Actipy expicitly supports .cwa files
import h5py
import pandas as pd
import numpy as np
import datetime

# Load the CWA file
cwa_file = "data/raw/2290025_90001_0_0.cwa"

# Read accelerometer data with preprocessing options:
# - Apply a low-pass filter at 20Hz to remove noise
# - Calibrate gravity to standardize accelerometer readings
# - Detect non-wear periods (to filter out irrelevant data)
# - Resample the data to 30Hz for consistency
data, info = actipy.read_device(
    cwa_file, lowpass_hz=20, calibrate_gravity=True, detect_nonwear=True, resample_hz=30
)


# Ensure timestamps are datetime and set as index
data.index = pd.to_datetime(data.index)
data = data.sort_index()  # Ensure chronological order

# Show the first few rows of the dataset
print("First few rows of preprocessed data:")
data.head()

Reading file... Done! (9.60s)
Converting to dataframe... Done! (0.72s)
Quality control... Done! (4.03s)
Lowpass filter... Done! (7.90s)
Gravity calibration... Done! (11.24s)
Nonwear detection... Done! (6.94s)
Resampling... Done! (4.60s)
First few rows of preprocessed data:


Unnamed: 0_level_0,x,y,z,temperature,light
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2014-05-23 10:00:03.490000000,0.377573,0.77293,-0.790382,20.0,10.204639
2014-05-23 10:00:03.523333333,0.32211,-0.112775,1.171489,20.0,10.204639
2014-05-23 10:00:03.556666666,0.328128,-0.027956,1.01346,20.0,10.204639
2014-05-23 10:00:03.590000000,0.332283,0.011338,0.911394,20.0,10.204639
2014-05-23 10:00:03.623333333,0.329636,-0.02199,0.982622,20.0,10.204639


In [3]:
def segment_by_night(df, start_hour=22, end_hour=6):
    """
    Segments data into separate nights based on a defined sleep window (10 PM - 6 AM).
    This function groups the data by date and extracts only the hours that fall within the sleep window.
    
    Parameters:
    df (pd.DataFrame): The accelerometer data with timestamps as index.
    start_hour (int): The hour at which nighttime starts (default: 22, i.e., 10 PM).
    end_hour (int): The hour at which nighttime ends (default: 6, i.e., 6 AM).
    
    Returns:
    dict: A dictionary where each key is a date, and each value is a DataFrame containing nighttime data.
    """
    df = df.copy()
    df['datetime'] = df.index  # Keep timestamps as a column
    df['date'] = df['datetime'].dt.date  # Extract date
    df['hour'] = df['datetime'].dt.hour  # Extract hour
    
    nights = {}  # Dictionary to store segmented nights

    for date in df['date'].unique():
        # Define two time ranges to capture data that crosses midnight
        night_start = datetime.datetime.combine(date, datetime.time(start_hour, 0, 0))
        next_day = date + datetime.timedelta(days=1)
        night_end = datetime.datetime.combine(next_day, datetime.time(end_hour, 0, 0))

        # Filter data spanning 10 PM to 6 AM (including across midnight)
        night_data = df[(df['datetime'] >= night_start) & (df['datetime'] < night_end)]
        
        if not night_data.empty:
            nights[str(date)] = night_data[['datetime', 'x', 'y', 'z']]
    
    return nights

# Segment the data into nights
night_segments = segment_by_night(data)

In [4]:
# Save preprocessed data into HDF5 format
hdf5_filename = "data/preprocessed/preprocessed_data.h5"

with h5py.File(hdf5_filename, "w") as hf:
    for i, (night, df) in enumerate(night_segments.items()):
        # Create a group for each night
        grp = hf.create_group(f"night_{i+1}")
        
        # Store timestamps as Unix time (floats for deep learning models)
        timestamps = (df['datetime'].astype(np.int64) // 10**9).values  # Convert to seconds
        
        # Store timestamps and accelerometer data
        grp.create_dataset("timestamps", data=timestamps)
        grp.create_dataset("x", data=df['x'].values)
        grp.create_dataset("y", data=df['y'].values)
        grp.create_dataset("z", data=df['z'].values)

print(f"Processed data saved to {hdf5_filename}")

Processed data saved to data/preprocessed/preprocessed_data.h5
