In [2]:
import statsmodels.formula.api as smf
import pandas as pd

from eeg_analyzer import EEGAnalyzer
from utils.config import DATASETS

ANALYZER_NAME = "eeg_analyzer"

# Trying to load the EEGAnalyzer
analyzer = EEGAnalyzer.load_analyzer(ANALYZER_NAME)
if analyzer is None:
    print(f"Analyzer {ANALYZER_NAME} not found. Creating a new one.")
    analyzer = EEGAnalyzer(DATASETS, ANALYZER_NAME)
    analyzer.save_analyzer()

# Creating a DataFrame with the data
df = analyzer.create_dataframe()

# Print out information about the DataFrame
print("DataFrame Information:")
print("="*30)
print(f"Total number of rows: {len(df)}")
print(f"Datasets: {df['dataset'].unique().tolist()}")

for dataset_name in df['dataset'].unique():
    df_dataset = df[df['dataset'] == dataset_name]
    print(f"\n--- Dataset: {dataset_name} ---")
    print(f"  Task orientation: {df_dataset['task_orientation'].iloc[0]}")
    print(f"  Subjects: {df_dataset['subject_id'].nunique()} ({df_dataset['subject_id'].unique().tolist()})")
    print(f"  Sessions: {df_dataset['session_id'].nunique()}")
    print(f"  Channels: {df_dataset['channel'].nunique()}")
    print(f"  Groups: {df_dataset['group'].unique().tolist()}")
    print(f"  States: {df_dataset['state'].unique().tolist()}")
    print(f"  Total data points: {len(df_dataset)}")
    
    print("\n  Data points per channel:")
    print(df_dataset.groupby('channel')['log_band_power'].count())
    
    print("\n  Data points per subject:")
    print(df_dataset.groupby('subject_id')['log_band_power'].count())

    # Fitting a linear mixed effects model for each dataset
for dataset_name in df['dataset'].unique():
    print(f"\n\nFitting model for dataset: {dataset_name}")
    df_dataset = df[df['dataset'] == dataset_name].copy()

    df["sub_ch"] = str(df["subject_id"]) + "_" + df["channel"]     # unique sensor instance
    df_dataset.head()

[EEGAnalyzer - eeg_analyzer] EEGAnalyzer state loaded from C:\Users\si_ve\Documents\Master_AttentionalDirectionResearch\data\eeg_analyzer_derivatives\eeg_analyzer\analyzer_state.pkl
[EEGAnalyzer - eeg_analyzer] Creating DataFrame from datasets...
[EEGAnalyzer - eeg_analyzer] Processing dataset: braboszcz2017
[EEGAnalyzer - eeg_analyzer] Processing dataset: jin2019
[EEGAnalyzer - eeg_analyzer] Processing dataset: touryan2022
[EEGAnalyzer - eeg_analyzer] DataFrame created with 2054400 rows and 14 columns.
DataFrame Information:
Total number of rows: 2054400
Datasets: ['braboszcz2017', 'jin2019']

--- Dataset: braboszcz2017 ---
  Task orientation: internal
  Subjects: 50 (['025', '026', '027', '028', '029', '030', '031', '032', '033', '034', '035', '036', '037', '038', '039', '040', '041', '042', '043', '044', '045', '046', '047', '048', '049', '050', '051', '052', '053', '054', '055', '060', '061', '062', '063', '064', '065', '066', '067', '068', '069', '070', '071', '072', '073', '074',