In [1]:
import pandas as pd
import numpy as np
import os

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
def load_processed_results(base_directory):
    """
    Load processed wSMI results from a base directory where CSV files are stored.
    
    Parameters:
    - base_directory (str): Base directory from which to load the CSV files.
    
    Returns:
    - A dictionary containing DataFrames with keys based on the subject, week, and session.
    """
    results = {}
    for subject in os.listdir(base_directory):
        subject_path = os.path.join(base_directory, subject)
        if os.path.isdir(subject_path):
            results[subject] = {}
            for week in os.listdir(subject_path):
                week_path = os.path.join(subject_path, week)
                if os.path.isdir(week_path):
                    results[subject][week] = {}
                    for file in os.listdir(week_path):
                        if file.endswith('tau4.csv'):
                            # Correctly extract the session identifier
                            session_parts = file.split('_')
                            session_index = 2 if session_parts[0] == 'session' else 1  # Adjust based on your file naming if needed
                            session = '_'.join(session_parts[:session_index+1])  # This includes the full 'session_X'
                            file_path = os.path.join(week_path, file)
                            results[subject][week][session] = pd.read_csv(file_path)
                            print(f"Loaded data from {file_path}")
    return results

# Example usage
base_dir = r'C:/Users/hugma/diss/Breathwork EEG Data/Dreem_Pilot/Processed_Results_bplz_tau4'
loaded_results = load_processed_results(base_dir)

Loaded data from C:/Users/hugma/diss/Breathwork EEG Data/Dreem_Pilot/Processed_Results_bplz_tau4\s01\week_1\session_session_1_wsmi_tau4.csv
Loaded data from C:/Users/hugma/diss/Breathwork EEG Data/Dreem_Pilot/Processed_Results_bplz_tau4\s01\week_1\session_session_2_wsmi_tau4.csv
Loaded data from C:/Users/hugma/diss/Breathwork EEG Data/Dreem_Pilot/Processed_Results_bplz_tau4\s01\week_1\session_session_3_wsmi_tau4.csv
Loaded data from C:/Users/hugma/diss/Breathwork EEG Data/Dreem_Pilot/Processed_Results_bplz_tau4\s01\week_1\session_session_4_wsmi_tau4.csv
Loaded data from C:/Users/hugma/diss/Breathwork EEG Data/Dreem_Pilot/Processed_Results_bplz_tau4\s01\week_1\session_session_5_wsmi_tau4.csv
Loaded data from C:/Users/hugma/diss/Breathwork EEG Data/Dreem_Pilot/Processed_Results_bplz_tau4\s01\week_1\session_session_6_wsmi_tau4.csv
Loaded data from C:/Users/hugma/diss/Breathwork EEG Data/Dreem_Pilot/Processed_Results_bplz_tau4\s01\week_1\session_session_7_wsmi_tau4.csv
Loaded data from C:/

In [3]:
loaded_results['s14']['week_2']['session_session_4']

Unnamed: 0,Channel1,Channel2,Channel3,Channel4,Channel5,Channel6,Channel7,Channel8,timestamps
0,196,182,167,197,194,181,159,182,156.0
1,189,181,151,199,177,177,156,178,157.0
2,177,183,157,193,187,179,155,175,160.0
3,182,169,158,203,181,174,154,184,165.0
4,174,184,155,199,180,169,151,180,166.0
...,...,...,...,...,...,...,...,...,...
446,107,150,78,158,99,156,69,157,677.0
447,112,156,87,165,100,152,86,157,678.0
448,203,151,195,166,208,153,197,157,680.0
449,210,138,194,168,210,149,190,152,681.0


In [4]:
def process_wsmi_data(data, subject, week, session):
    """
    Process wSMI data directly from a DataFrame, calculate intervals, attach session identifiers, and average values by interval.

    Parameters:
    - data (DataFrame): The DataFrame containing the wSMI data.
    - subject (str): The subject identifier.
    - week (str): The week identifier.
    - session (str): The session identifier.

    Returns:
    - DataFrame: Aggregated DataFrame with mean wSMI values by interval.
    """
    # Debug: Print the current shape and column names of the data
    print("Current DataFrame shape:", data.shape)
    print("Current DataFrame columns:", data.columns.tolist())

    # Ensure the DataFrame has exactly four expected columns before renaming
    if len(data.columns) == 9:
        data.columns = ['channel_1_bplz_4', 'channel_2_bplz_4','channel_3_bplz_4','channel_4_bplz_4','channel_5_bplz_4','channel_6_bplz_4','channel_7_bplz_4','channel_8_bplz_4', 'timestamp']
    else:
        raise ValueError("Data does not have the expected number of columns (4)")

    data['timestamp'] = data['timestamp'].astype(float) * 4  # Convert epochs to seconds
    data['interval'] = (data['timestamp'] // 28).astype(int)  # Compute the interval index
    data['Subject'] = subject
    data['Week'] = week
    data['Session'] = session

    # Aggregate data by 'Subject', 'Week', 'Session', and 'interval'
    aggregated = data.groupby(['Subject', 'Week', 'Session', 'interval']).agg({
        'channel_1_bplz_4': 'mean',
        'channel_2_bplz_4': 'mean',
        'channel_3_bplz_4': 'mean',
        'channel_4_bplz_4': 'mean',
        'channel_5_bplz_4': 'mean',
        'channel_6_bplz_4': 'mean',
        'channel_7_bplz_4': 'mean',
        'channel_8_bplz_4': 'mean',

    }).reset_index()
    return aggregated
def add_wsmi_data_to_main(main_df, wsmi_data):
    """ Add wSMI data to the main DataFrame under the specified columns. """
    for index, row in wsmi_data.iterrows():
        condition = (
            (main_df['Subject'] == row['Subject']) &
            (main_df['Week'] == row['Week']) &
            (main_df['Session'] == row['Session']) &
            (main_df['interval'] == row['interval'])
        )
        if condition.any():
            main_df.loc[condition, 'channel_1_bplz_4'] = row['channel_1_bplz_4']
            main_df.loc[condition, 'channel_2_bplz_4'] = row['channel_2_bplz_4']
            main_df.loc[condition, 'channel_3_bplz_4'] = row['channel_3_bplz_4']
            main_df.loc[condition, 'channel_4_bplz_4'] = row['channel_4_bplz_4']
            main_df.loc[condition, 'channel_5_bplz_4'] = row['channel_5_bplz_4']
            main_df.loc[condition, 'channel_6_bplz_4'] = row['channel_6_bplz_4']
            main_df.loc[condition, 'channel_7_bplz_4'] = row['channel_7_bplz_4']
            main_df.loc[condition, 'channel_8_bplz_4'] = row['channel_8_bplz_4']
        else:
            print(f"No match found for {row['Subject']}, {row['Week']}, {row['Session']}, Interval {row['interval']}")
    return main_df
session_mapping = {
    'session_session_1': 'run_01',
    'session_session_2': 'run_02',
    'session_session_3': 'run_03',
    'session_session_4': 'run_04',
    'session_session_5': 'run_05',
    'session_session_6': 'run_06',
    'session_session_7': 'run_07',
    }

main_df_path = "C:/Users/hugma/diss/final/main_df.csv"
main_df = pd.read_csv(main_df_path)
def process_wsmi_data(data, subject, week, session):
    """
    Process wSMI data directly from a DataFrame, calculate intervals, attach session identifiers, and average values by interval.
    """
    # Apply correct column names
    data.columns = ['channel_1_bplz_4', 'channel_2_bplz_4','channel_3_bplz_4','channel_4_bplz_4','channel_5_bplz_4','channel_6_bplz_4','channel_7_bplz_4','channel_8_bplz_4', 'timestamp']
    
    # Convert timestamp to seconds and calculate interval
    data['timestamp'] = data['timestamp'].astype(float) * 4  # Assuming 'timestamp' is the last column
    data['interval'] = (data['timestamp'] // 28).astype(int)
    
    # Assign additional identifiers
    data['Subject'] = subject
    data['Week'] = week
    data['Session'] = session

    # Aggregate data by 'Subject', 'Week', 'Session', and 'interval'
    aggregated = data.groupby(['Subject', 'Week', 'Session', 'interval']).agg({
        'channel_1_bplz_4': 'mean',
        'channel_2_bplz_4': 'mean',
        'channel_3_bplz_4': 'mean',
        'channel_4_bplz_4': 'mean',
        'channel_5_bplz_4': 'mean',
        'channel_6_bplz_4': 'mean',
        'channel_7_bplz_4': 'mean',
        'channel_8_bplz_4': 'mean',
    }).reset_index()
    return aggregated

for subject, weeks in loaded_results.items():
    for week, sessions in weeks.items():
        for session, data in sessions.items():
            # Apply the session mapping
            standardized_session = session_mapping.get(session, session)  # Default to original if not found in map
            
            # Print mapping information for debugging
            print(f"Mapping {session} to {standardized_session}")

            # Process the data using the modified function
            processed_data = process_wsmi_data(data, subject, week, standardized_session)

            # Now call the function to add this processed data to the main DataFrame
            main_df = add_wsmi_data_to_main(main_df, processed_data)
   

Mapping session_session_1 to run_01
No match found for s01, week_1, run_01, Interval 43
Mapping session_session_2 to run_02
Mapping session_session_3 to run_03
Mapping session_session_4 to run_04
No match found for s01, week_1, run_04, Interval 43
Mapping session_session_5 to run_05
Mapping session_session_6 to run_06
Mapping session_session_7 to run_07
Mapping session_session_1 to run_01
No match found for s01, week_2, run_01, Interval 0
No match found for s01, week_2, run_01, Interval 1
No match found for s01, week_2, run_01, Interval 2
No match found for s01, week_2, run_01, Interval 3
No match found for s01, week_2, run_01, Interval 4
No match found for s01, week_2, run_01, Interval 5
No match found for s01, week_2, run_01, Interval 6
No match found for s01, week_2, run_01, Interval 7
No match found for s01, week_2, run_01, Interval 8
No match found for s01, week_2, run_01, Interval 9
No match found for s01, week_2, run_01, Interval 10
No match found for s01, week_2, run_01, Interv

In [7]:
main_df.to_csv("C:/Users/hugma/diss/final/main_df.csv",index=False)