In [1]:
# import PyQt6.QtCore
import os
os.environ["QT_API"] = "pyqt5"
import matplotlib.pyplot as plt
import mne
import numpy as np
import pandas as pd
from sklearn.model_selection import cross_val_score, KFold
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from mne.time_frequency import psd_array_multitaper
import os
from config import Config
configObj = Config()
configss = configObj.getConfigSnapshot()


mne.set_log_level(verbose='WARNING', return_old_level=False, add_frames=None)

# Assuming EEG data is in MNE format (Epochs or Raw). Placeholder for file path to participants' data
data_dir = "path_to_data"

# Frequency bands definitions
freq_bands = {'theta': (4, 8), 'alpha': (8, 12), 'beta': (12, 30)}

# Placeholder for results
results = []


In [2]:
import os
import mne

def load_participant_epochs(pnum, condition):
    """Load and concatenate epochs for a participant across multiple blocks for a given condition.
    
    Parameters:
    - pnum: Participant number (integer)
    - condition: Condition (e.g., 'D' or 'ND')
    
    Returns:
    - concatenated_epochs: Concatenated MNE epochs object for the participant
    """
    
    # Initialize an empty list to store the epochs from each block
    epochs_list = []
    
    # Loop through all 4 blocks
    for b_cnt in range(0, 4):
        # Create the block numbers by appending the current block count to the condition
        block_num = f"{condition}{b_cnt}"
        
        # Construct the participant's folder name
        participant_number = 'P' + str(pnum)
        # Create the full path to the epochs file
        participant_data_path = participant_number + '/' + block_num + '-epo.fif'
        path = os.path.join(configss['root'], configss['data_dir'], participant_data_path)
        
        # Load the epochs file for this block
        epochs = mne.read_epochs(path, preload=True)
        
        # Append the loaded epochs to the list
        epochs_list.append(epochs)
    
    # Concatenate all the loaded epochs from the list
    concatenated_epochs = mne.concatenate_epochs(epochs_list)
    
    return concatenated_epochs


In [3]:
# Function to calculate PSD for each epoch and frequency band
def calculate_psd_per_epoch(epochs, bands, sfreq):
    psds = []  # Store all PSDs per epoch
    freqs = None
    
    # Get PSD for each epoch
    for epoch in epochs.get_data():
        psd, freqs = mne.time_frequency.psd_array_multitaper(epoch, sfreq=sfreq, fmin=4., fmax=30., verbose=False)
        psds.append(psd)  # Append the PSD for the current epoch
    
    psds = np.array(psds)  # Convert to numpy array (n_epochs, n_channels, n_frequencies)
    
    band_powers = {band: [] for band in bands}  # Dictionary to store band powers
    
    # Calculate power in each frequency band for each epoch
    for band, (fmin, fmax) in bands.items():
        freq_mask = (freqs >= fmin) & (freqs <= fmax)  # Mask to select frequencies in the current band
        band_power = np.mean(psds[:, :, freq_mask], axis=-1)  # Average PSD over the selected frequencies
        band_powers[band] = band_power  # Store the band power for the current band (n_epochs, n_channels)
    
    return band_powers

In [4]:
# Function to calculate engagement index per epoch and channel
def calculate_engagement_index(band_powers):
    # Engagement index = Beta / (Alpha + Theta)
    beta_power = band_powers['beta']  # (n_epochs, n_channels)
    alpha_power = band_powers['alpha']  # (n_epochs, n_channels)
    theta_power = band_powers['theta']  # (n_epochs, n_channels)
    
    # Calculate engagement index
    engagement_index = beta_power / (alpha_power + theta_power)
    
    return engagement_index  # (n_epochs, n_channels)

In [5]:
# Define frequency bands (example: theta, alpha, beta)
freq_bands = {
    'theta': (4, 8),
    'alpha': (8, 12),
    'beta': (12, 30)
}


event_count_condition1 = {}
event_count_condition2 = {}
engegement_index_D = None
engegement_index_ND = None

valid_pids = [el for el in list(range(1, 32)) if el not in [14, 5, 13, 16, 17, 20, 31]]

# Iterate through all participants
for participant_id in valid_pids:

    epochs_condition1 = load_participant_epochs(participant_id, 'D')
    epochs_condition2 = load_participant_epochs(participant_id, 'ND')

    power_bands_D =  calculate_psd_per_epoch(epochs_condition1, freq_bands, epochs_condition1.info['sfreq'])
    power_bands_ND =  calculate_psd_per_epoch(epochs_condition2, freq_bands, epochs_condition1.info['sfreq'])

    # get the average engagement index for D and ND condition
    engegement_index_D_epochs = calculate_engagement_index(power_bands_D)
    engegement_index_ND_epochs = calculate_engagement_index(power_bands_ND)

    avg_engegement_index_D = np.reshape(np.mean(engegement_index_D_epochs, axis = 0 ), (1,62))
    avg_engegement_index_ND = np.reshape(np.mean(engegement_index_ND_epochs, axis = 0 ), (1,62))

    if engegement_index_D is None:
        engegement_index_D = avg_engegement_index_D
    else:
        engegement_index_D = np.vstack((engegement_index_D, avg_engegement_index_D))


    if engegement_index_ND is None:
        engegement_index_ND = avg_engegement_index_ND
    else:
        engegement_index_ND = np.vstack((engegement_index_ND, avg_engegement_index_ND))   



    # Add average engagement index values to seperate lists 


# check for statsistical difference between D and ND for all parctiapant for each eeg channel
# with stat corrctions     



  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_

In [10]:
engegement_index_ND.shape

(24, 62)

In [9]:
import numpy as np
from scipy.stats import wilcoxon, ttest_rel
from statsmodels.stats.multitest import multipletests

# Assuming engagement_index_D and engagement_index_ND are (24, 62) arrays
# engagement_index_D: engagement indices for condition D
# engagement_index_ND: engagement indices for condition ND

# Initialize arrays to store p-values
wilcoxon_p_values = []
t_test_p_values = []


# Iterate over each EEG channel (62 channels)
for channel in range(62):
    # Wilcoxon signed-rank test for condition D vs ND for this channel
    stat_w, p_value_w = wilcoxon(engegement_index_D[:, channel], engegement_index_ND[:, channel])
    wilcoxon_p_values.append(p_value_w)
    
    # Paired t-test (equivalent to ANOVA for two conditions)
    stat_t, p_value_t = ttest_rel(engegement_index_D[:, channel], engegement_index_ND[:, channel])
    t_test_p_values.append(p_value_t)

# Convert p-values to NumPy arrays for easier handling
wilcoxon_p_values = np.array(wilcoxon_p_values)
t_test_p_values = np.array(t_test_p_values)

# Apply correction for multiple comparisons (Bonferroni correction)
# You can also use other corrections like FDR if needed
corrected_wilcoxon_p_values = multipletests(wilcoxon_p_values, method='bonferroni')[1]
corrected_t_test_p_values = multipletests(t_test_p_values, method='bonferroni')[1]

# Alternatively, apply FDR correction for multiple comparisons
fdr_wilcoxon_p_values = multipletests(wilcoxon_p_values, method='fdr_bh')[1]
fdr_t_test_p_values = multipletests(t_test_p_values, method='fdr_bh')[1]

# # Now you have corrected p-values from Wilcoxon and paired t-tests for each EEG channel
# print("Corrected Wilcoxon p-values (Bonferroni):", corrected_wilcoxon_p_values)
# print("Corrected T-test p-values (Bonferroni):", corrected_t_test_p_values)
# print("Corrected Wilcoxon p-values (FDR):", fdr_wilcoxon_p_values)
# print("Corrected T-test p-values (FDR):", fdr_t_test_p_values)


# Significance threshold
alpha = 0.1

# Print significant channels for Wilcoxon test (Bonferroni corrected)
print("Significant channels (Wilcoxon, Bonferroni corrected):")
for channel in range(62):
    if corrected_wilcoxon_p_values[channel] < alpha:
        print(f"Channel {channel + 1}: p-value = {corrected_wilcoxon_p_values[channel]}")

# Print significant channels for Paired t-test (Bonferroni corrected)
print("\nSignificant channels (Paired t-test, Bonferroni corrected):")
for channel in range(62):
    if corrected_t_test_p_values[channel] < alpha:
        print(f"Channel {channel + 1}: p-value = {corrected_t_test_p_values[channel]}")

# Optionally, you can do the same for FDR corrected p-values

# Print significant channels for Wilcoxon test (FDR corrected)
print("\nSignificant channels (Wilcoxon, FDR corrected):")
for channel in range(62):
    if fdr_wilcoxon_p_values[channel] < alpha:
        print(f"Channel {channel + 1}: p-value = {fdr_wilcoxon_p_values[channel]}")

# Print significant channels for Paired t-test (FDR corrected)
print("\nSignificant channels (Paired t-test, FDR corrected):")
for channel in range(62):
    if fdr_t_test_p_values[channel] < alpha:
        print(f"Channel {channel + 1}: p-value = {fdr_t_test_p_values[channel]}")


Significant channels (Wilcoxon, Bonferroni corrected):

Significant channels (Paired t-test, Bonferroni corrected):

Significant channels (Wilcoxon, FDR corrected):

Significant channels (Paired t-test, FDR corrected):


In [12]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import mixedlm

# Define frequency bands (example: theta, alpha, beta)
freq_bands = {
    'theta': (4, 8),
    'alpha': (8, 12),
    'beta': (12, 30)
}

# Initialize dictionaries and engagement index arrays
event_count_condition1 = {}
event_count_condition2 = {}

# List to accumulate data for Linear Mixed-Effects Model
data = []

valid_pids = [el for el in list(range(1, 32)) if el not in [14, 5, 13, 16, 17, 20, 31]]

# Iterate through all participants
for participant_id in valid_pids:

    # Load epochs for both conditions
    epochs_condition1 = load_participant_epochs(participant_id, 'D')
    epochs_condition2 = load_participant_epochs(participant_id, 'ND')

    # Calculate power bands for each condition
    power_bands_D = calculate_psd_per_epoch(epochs_condition1, freq_bands, epochs_condition1.info['sfreq'])
    power_bands_ND = calculate_psd_per_epoch(epochs_condition2, freq_bands, epochs_condition1.info['sfreq'])

    # Get the average engagement index for D and ND condition
    engegement_index_D_epochs = calculate_engagement_index(power_bands_D)
    engegement_index_ND_epochs = calculate_engagement_index(power_bands_ND)

    avg_engegement_index_D = np.reshape(np.mean(engegement_index_D_epochs, axis=0), (1, 62))
    avg_engegement_index_ND = np.reshape(np.mean(engegement_index_ND_epochs, axis=0), (1, 62))

    # Collect the data for both conditions for each participant and each channel
    for channel in range(62):
        # Append data for condition D
        data.append({
            'Participant': participant_id,
            'Channel': channel,
            'Condition': 'D',
            'Engagement': avg_engegement_index_D[0, channel]
        })
        # Append data for condition ND
        data.append({
            'Participant': participant_id,
            'Channel': channel,
            'Condition': 'ND',
            'Engagement': avg_engegement_index_ND[0, channel]
        })

# Convert the accumulated data to a pandas DataFrame for LMM analysis
df = pd.DataFrame(data)

# Fit the Linear Mixed-Effects Model (LMM)
# 'Condition' is the fixed effect (D vs ND)
# 'Participant' is the random effect to account for variability between participants
model = mixedlm("Engagement ~ Condition", df, groups=df["Participant"], re_formula="~1")
result = model.fit()

# Print the summary of the model
print(result.summary())


  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_

          Mixed Linear Model Regression Results
Model:             MixedLM Dependent Variable: Engagement
No. Observations:  2976    Method:             REML      
No. Groups:        24      Scale:              0.0035    
Min. group size:   124     Log-Likelihood:     4136.4420 
Max. group size:   124     Converged:          Yes       
Mean group size:   124.0                                 
---------------------------------------------------------
                Coef. Std.Err.   z    P>|z| [0.025 0.975]
---------------------------------------------------------
Intercept       0.246    0.013 18.362 0.000  0.220  0.272
Condition[T.ND] 0.007    0.002  3.119 0.002  0.003  0.011
Group Var       0.004    0.021                           





In [22]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import mixedlm

# Define frequency bands (example: theta, alpha, beta)
freq_bands = {
    'theta': (4, 8),
    'alpha': (8, 12),
    'beta': (12, 30)
}

# Initialize dictionaries and engagement index arrays
event_count_condition1 = {}
event_count_condition2 = {}

# List to accumulate data for Linear Mixed-Effects Model
data = []

valid_pids = [el for el in list(range(1, 32)) if el not in [14, 5, 13, 16, 17, 20, 31]]

# Iterate through all participants
for participant_id in valid_pids:

    # Load epochs for both conditions
    epochs_condition1 = load_participant_epochs(participant_id, 'D')
    epochs_condition2 = load_participant_epochs(participant_id, 'ND')

    # Calculate power bands for each condition
    power_bands_D = calculate_psd_per_epoch(epochs_condition1, freq_bands, epochs_condition1.info['sfreq'])
    power_bands_ND = calculate_psd_per_epoch(epochs_condition2, freq_bands, epochs_condition1.info['sfreq'])

    # Get the average engagement index for D and ND condition
    engegement_index_D_epochs = calculate_engagement_index(power_bands_D)
    engegement_index_ND_epochs = calculate_engagement_index(power_bands_ND)

    avg_engegement_index_D = np.reshape(np.mean(engegement_index_D_epochs, axis=0), (1, 62))
    avg_engegement_index_ND = np.reshape(np.mean(engegement_index_ND_epochs, axis=0), (1, 62))

    # Collect the data for both conditions for each participant and each channel
    for channel in range(62):
        # Append data for condition D
        data.append({
            'Participant': participant_id,
            'Channel': channel,
            'Condition': 'D',
            'Engagement': avg_engegement_index_D[0, channel]
        })
        # Append data for condition ND
        data.append({
            'Participant': participant_id,
            'Channel': channel,
            'Condition': 'ND',
            'Engagement': avg_engegement_index_ND[0, channel]
        })

# Convert the accumulated data to a pandas DataFrame for LMM analysis
df = pd.DataFrame(data)

# Fit the Linear Mixed-Effects Model (LMM)
# 'Condition' is the fixed effect (D vs ND)
# 'Participant' is the random effect to account for variability between participants
model_interaction  = mixedlm("Engagement ~ Condition + Channel", df, groups=df["Participant"], re_formula="~1")
result = model_interaction.fit()

# Print the summary of the model
print(result.summary())


  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_

          Mixed Linear Model Regression Results
Model:             MixedLM Dependent Variable: Engagement
No. Observations:  2976    Method:             REML      
No. Groups:        24      Scale:              0.0034    
Min. group size:   124     Log-Likelihood:     4169.8987 
Max. group size:   124     Converged:          Yes       
Mean group size:   124.0                                 
---------------------------------------------------------
                Coef. Std.Err.   z    P>|z| [0.025 0.975]
---------------------------------------------------------
Intercept       0.229    0.013 16.963 0.000  0.203  0.255
Condition[T.ND] 0.007    0.002  3.163 0.002  0.003  0.011
Channel         0.001    0.000  9.259 0.000  0.000  0.001
Group Var       0.004    0.022                           





In [24]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import mixedlm

# Define frequency bands (example: theta, alpha, beta)
freq_bands = {
    'theta': (4, 8),
    'alpha': (8, 12),
    'beta': (12, 30)
}

# Initialize list to accumulate data for Linear Mixed-Effects Model
data = []

valid_pids = [el for el in list(range(1, 32)) if el not in [14, 5, 13, 16, 17, 20, 31]]

# Iterate through all participants
for participant_id in valid_pids:

    # Load epochs for both conditions
    epochs_condition1 = load_participant_epochs(participant_id, 'D')
    epochs_condition2 = load_participant_epochs(participant_id, 'ND')

    # Calculate power bands for each condition
    power_bands_D = calculate_psd_per_epoch(epochs_condition1, freq_bands, epochs_condition1.info['sfreq'])
    power_bands_ND = calculate_psd_per_epoch(epochs_condition2, freq_bands, epochs_condition1.info['sfreq'])

    # Get the average engagement index for D and ND condition
    engagement_index_D_epochs = calculate_engagement_index(power_bands_D)
    engagement_index_ND_epochs = calculate_engagement_index(power_bands_ND)

    avg_engagement_index_D = np.reshape(np.mean(engagement_index_D_epochs, axis=0), (1, -1))
    avg_engagement_index_ND = np.reshape(np.mean(engagement_index_ND_epochs, axis=0), (1, -1))

    # Get channel names
    channel_names = epochs_condition1.ch_names

    # Collect the data for both conditions for each participant and each channel
    for idx, channel_name in enumerate(channel_names):
        # Append data for condition D
        data.append({
            'Participant': participant_id,
            'Channel': channel_name,
            'Condition': 'D',
            'Engagement': avg_engagement_index_D[0, idx]
        })
        # Append data for condition ND
        data.append({
            'Participant': participant_id,
            'Channel': channel_name,
            'Condition': 'ND',
            'Engagement': avg_engagement_index_ND[0, idx]
        })

# Convert the accumulated data to a pandas DataFrame for LMM analysis
df = pd.DataFrame(data)

# Ensure 'Channel' is a categorical variable
df['Channel'] = df['Channel'].astype('category')

# Fit the Linear Mixed-Effects Model (LMM)
# Use 'C(Channel)' to treat 'Channel' as a categorical variable
model_interaction = mixedlm("Engagement ~ Condition * C(Channel)", df, groups=df["Participant"])
result = model_interaction.fit()

# Print the summary of the model
print(result.summary())


  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_

                   Mixed Linear Model Regression Results
Model:                    MixedLM       Dependent Variable:       Engagement
No. Observations:         2976          Method:                   REML      
No. Groups:               24            Scale:                    0.0028    
Min. group size:          124           Log-Likelihood:           4077.3099 
Max. group size:          124           Converged:                Yes       
Mean group size:          124.0                                             
----------------------------------------------------------------------------
                                  Coef.  Std.Err.   z    P>|z| [0.025 0.975]
----------------------------------------------------------------------------
Intercept                          0.248    0.017 14.495 0.000  0.215  0.282
Condition[T.ND]                    0.011    0.015  0.712 0.476 -0.019  0.041
C(Channel)[T.10R]                  0.015    0.015  0.949 0.343 -0.015  0.044
C(Channel)[T.1L]   



In [25]:
# Extract parameters and p-values
params = result.params
pvalues = result.pvalues

# Filter for interaction terms
interaction_terms = [term for term in params.index if 'Condition[T.ND]:C(Channel)' in term]
interaction_params = params[interaction_terms]
interaction_pvalues = pvalues[interaction_terms]

In [26]:
from statsmodels.stats.multitest import multipletests

# Adjust p-values using FDR correction
adjusted_pvalues = multipletests(interaction_pvalues, method='fdr_bh')[1]

# Create a DataFrame for easier viewing
interaction_results = pd.DataFrame({
    'Coefficient': interaction_params,
    'P-value': interaction_pvalues,
    'Adjusted P-value': adjusted_pvalues
})

print(interaction_results)


                                   Coefficient   P-value  Adjusted P-value
Condition[T.ND]:C(Channel)[T.10R]    -0.004610  0.831054          0.992257
Condition[T.ND]:C(Channel)[T.1L]     -0.007679  0.722302          0.992257
Condition[T.ND]:C(Channel)[T.1LA]    -0.007228  0.737999          0.992257
Condition[T.ND]:C(Channel)[T.1LB]    -0.002026  0.925307          0.992257
Condition[T.ND]:C(Channel)[T.1LC]    -0.013868  0.521016          0.992257
...                                        ...       ...               ...
Condition[T.ND]:C(Channel)[T.8R]     -0.009557  0.658291          0.992257
Condition[T.ND]:C(Channel)[T.8Z]     -0.004029  0.852098          0.992257
Condition[T.ND]:C(Channel)[T.9L]     -0.000631  0.976701          0.992257
Condition[T.ND]:C(Channel)[T.9R]      0.000788  0.970911          0.992257
Condition[T.ND]:C(Channel)[T.9Z]     -0.002845  0.895248          0.992257

[61 rows x 3 columns]


In [28]:
from statsmodels.formula.api import mixedlm

# Initialize lists to store results
channel_coefficients = []
channel_pvalues = []
channel_names = df['Channel'].unique()

for channel in channel_names:
    # Subset data for the current channel
    df_channel = df[df['Channel'] == channel]
    
    # Fit the model for this channel
    model = mixedlm("Engagement ~ Condition", df_channel, groups=df_channel["Participant"])
    result = model.fit()
    
    # Store the coefficient and p-value for Condition
    coef = result.params['Condition[T.ND]']
    pvalue = result.pvalues['Condition[T.ND]']
    channel_coefficients.append(coef)
    channel_pvalues.append(pvalue)

# Create a DataFrame of the results
channel_results = pd.DataFrame({
    'Channel': channel_names,
    'Coefficient': channel_coefficients,
    'P-value': channel_pvalues
})

# Adjust p-values for multiple comparisons
channel_results['Adjusted P-value'] = multipletests(channel_results['P-value'], method='fdr_bh')[1]

print(channel_results)




   Channel  Coefficient   P-value  Adjusted P-value
0       1Z     0.006517  0.033429          0.268434
1       2Z     0.005892  0.043757          0.268434
2       3Z     0.006032  0.057736          0.268434
3       4Z     0.004988  0.085954          0.296063
4       6Z     0.002000  0.502058          0.622551
..     ...          ...       ...               ...
57     5RC     0.006700  0.228055          0.441857
58     1RD     0.009523  0.017620          0.268434
59     2RD     0.002668  0.796656          0.882013
60     3RD    -0.001109  0.892843          0.939313
61     4RD     0.011090  0.169174          0.368459

[62 rows x 4 columns]


In [29]:
import matplotlib.pyplot as plt

# For significant channels
significant_channels = channel_results[channel_results['Adjusted P-value'] < 0.05]['Channel']

for channel in significant_channels:
    df_channel = df[df['Channel'] == channel]
    # Calculate mean engagement per condition
    means = df_channel.groupby('Condition')['Engagement'].mean()
    stds = df_channel.groupby('Condition')['Engagement'].std()
    
    # Plot
    plt.figure()
    means.plot(kind='bar', yerr=stds, capsize=4)
    plt.title(f'Engagement by Condition for Channel {channel}')
    plt.ylabel('Engagement')
    plt.show()



In [30]:
significant_channels

Series([], Name: Channel, dtype: category
Categories (62, object): ['10L', '10R', '1L', '1LA', ..., '8Z', '9L', '9R', '9Z'])

In [20]:
print(result.summary())

          Mixed Linear Model Regression Results
Model:             MixedLM Dependent Variable: Engagement
No. Observations:  2976    Method:             REML      
No. Groups:        24      Scale:              0.0035    
Min. group size:   124     Log-Likelihood:     4136.4420 
Max. group size:   124     Converged:          Yes       
Mean group size:   124.0                                 
---------------------------------------------------------
                Coef. Std.Err.   z    P>|z| [0.025 0.975]
---------------------------------------------------------
Intercept       0.246    0.013 18.362 0.000  0.220  0.272
Condition[T.ND] 0.007    0.002  3.119 0.002  0.003  0.011
Group Var       0.004    0.021                           



In [21]:
print(result.model.exog_names)

['Intercept', 'Condition[T.ND]']


In [15]:
import numpy as np
import pandas as pd
import statsmodels.formula.api as smf

# Assuming df is already created with long-format data for Participant, Channel, Condition, and Engagement

# List to store the significant channels
significant_channels = []

# Iterate over each EEG channel (62 channels)
for channel in range(62):
    # Subset data for the current channel
    df_channel = df[df["Channel"] == channel]
    
    # Fit the LMM for this channel
    model_channel = smf.mixedlm("Engagement ~ Condition", df_channel, groups=df_channel["Participant"], re_formula="~1")
    result_channel = model_channel.fit()
    
    # Extract the p-value for the Condition effect (D vs ND)
    p_value = result_channel.pvalues["Condition[T.ND]"]
    
    # If the p-value is less than 0.05, store the channel number and p-value
    if p_value < 0.05:
        significant_channels.append((channel + 1, p_value))

# Print the significant channels and their p-values
if significant_channels:
    print("Significant Channels (Condition effect):")
    for channel, p_value in significant_channels:
        print(f"Channel {channel}: p-value = {p_value:.5f}")
else:
    print("No significant channels found.")




Significant Channels (Condition effect):
Channel 1: p-value = 0.03343
Channel 2: p-value = 0.04376
Channel 16: p-value = 0.04712
Channel 17: p-value = 0.02652
Channel 36: p-value = 0.04747
Channel 41: p-value = 0.00386
Channel 54: p-value = 0.03356
Channel 57: p-value = 0.02396
Channel 59: p-value = 0.01762




In [16]:
import numpy as np
import pandas as pd
import statsmodels.formula.api as smf
from statsmodels.stats.multitest import multipletests

# Assuming df is already created with long-format data for Participant, Channel, Condition, and Engagement

# List to store the p-values for all channels
channel_p_values = []

# Iterate over each EEG channel (62 channels)
for channel in range(62):
    # Subset data for the current channel
    df_channel = df[df["Channel"] == channel]
    
    # Fit the LMM for this channel
    model_channel = smf.mixedlm("Engagement ~ Condition", df_channel, groups=df_channel["Participant"], re_formula="~1")
    result_channel = model_channel.fit()
    
    # Extract the p-value for the Condition effect (D vs ND)
    p_value = result_channel.pvalues["Condition[T.ND]"]
    
    # Store the p-value along with the channel number
    channel_p_values.append((channel + 1, p_value))

# Extract just the p-values for correction
p_values = [p for _, p in channel_p_values]

# Apply FDR correction (Benjamini-Hochberg)
_, corrected_p_values, _, _ = multipletests(p_values, method='fdr_bh')

# Print the significant channels after FDR correction (corrected p-value < 0.05)
significant_channels = [(channel_p_values[i][0], corrected_p_values[i]) for i in range(len(corrected_p_values)) if corrected_p_values[i] < 0.05]

if significant_channels:
    print("Significant Channels (Condition effect after FDR correction):")
    for channel, p_value in significant_channels:
        print(f"Channel {channel}: corrected p-value = {p_value:.5f}")
else:
    print("No significant channels found after FDR correction.")




No significant channels found after FDR correction.




In [13]:
#glmm

import numpy as np
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import mixedlm
import statsmodels.formula.api as smf

# Define frequency bands (example: theta, alpha, beta)
freq_bands = {
    'theta': (4, 8),
    'alpha': (8, 12),
    'beta': (12, 30)
}

# Initialize dictionaries and engagement index arrays
event_count_condition1 = {}
event_count_condition2 = {}

# List to accumulate data for Linear Mixed-Effects Model
data = []

valid_pids = [el for el in list(range(1, 32)) if el not in [14, 5, 13, 16, 17, 20, 31]]

# Iterate through all participants
for participant_id in valid_pids:

    # Load epochs for both conditions
    epochs_condition1 = load_participant_epochs(participant_id, 'D')
    epochs_condition2 = load_participant_epochs(participant_id, 'ND')

    # Calculate power bands for each condition
    power_bands_D = calculate_psd_per_epoch(epochs_condition1, freq_bands, epochs_condition1.info['sfreq'])
    power_bands_ND = calculate_psd_per_epoch(epochs_condition2, freq_bands, epochs_condition1.info['sfreq'])

    # Get the average engagement index for D and ND condition
    engegement_index_D_epochs = calculate_engagement_index(power_bands_D)
    engegement_index_ND_epochs = calculate_engagement_index(power_bands_ND)

    avg_engegement_index_D = np.reshape(np.mean(engegement_index_D_epochs, axis=0), (1, 62))
    avg_engegement_index_ND = np.reshape(np.mean(engegement_index_ND_epochs, axis=0), (1, 62))

    # Collect the data for both conditions for each participant and each channel
    for channel in range(62):
        # Append data for condition D
        data.append({
            'Participant': participant_id,
            'Channel': channel,
            'Condition': 'D',
            'Engagement': avg_engegement_index_D[0, channel]
        })
        # Append data for condition ND
        data.append({
            'Participant': participant_id,
            'Channel': channel,
            'Condition': 'ND',
            'Engagement': avg_engegement_index_ND[0, channel]
        })

# Convert the accumulated data to a pandas DataFrame for LMM analysis
df = pd.DataFrame(data)

# Fit the Linear Mixed-Effects Model (LMM)
# 'Condition' is the fixed effect (D vs ND)
# 'Participant' is the random effect to account for variability between participants
model_glmm = smf.mixedlm("Engagement ~ Condition", df, groups=df["Participant"], re_formula="~1", family=sm.families.Gamma())
result_glmm = model_glmm.fit()

# Print the summary of the model
print(result_glmm.summary())


  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_epochs(epochs_list)
  concatenated_epochs = mne.concatenate_

ValueError: argument family not permitted for MixedLM initialization

In [None]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf

# Assuming the same data preparation as before, with 'df' already in long format

# Convert to long format with Participant, Channel, Condition, and Engagement
df = pd.DataFrame(data)

# Fit a Generalized Linear Mixed Model (GLMM) with a Gamma family (for continuous skewed data)
# Engagement is modeled as a function of Condition with Participant as a random effect
model_glmm = smf.mixedlm("Engagement ~ Condition", df, groups=df["Participant"], re_formula="~1", family=sm.families.Gamma())
result_glmm = model_glmm.fit()

# Print the summary of the GLMM
print(result_glmm.summary())


In [10]:
len(epochs_condition1)

409