In [1]:
# %load_ext cuml.accel
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os  # 导入os库来处理文件和目录
import glob # 导入glob库来查找文件

from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge
from sklearn.pipeline import Pipeline
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.ensemble import RandomForestRegressor
from sklearn.decomposition import PCA

In [None]:
def load_data_from_subjects(root_dir, eeg_suffix='EEG_aligned.npy' , sound_suffix='Sound_aligned.npy',root_suffix = 'feature_normalized'):
    all_eeg_data = []
    all_sound_data = []
    subject_ids = []

    subject_folders = [f for f in os.listdir(root_dir) if os.path.isdir(os.path.join(root_dir,f))]

    if not subject_folders:
        print('No subjects found')
        return None, None

    print(f"found{len(subject_folders)}folders")
    for subject_folder in subject_folders:
        subject_folder_path = os.path.join(root_dir, subject_folder , root_suffix)


        eeg_files = glob.glob(os.path.join(subject_folder_path,f'*{eeg_suffix}'))

        if not eeg_files:
            print(f'No EEG found in folder {subject_folder_path}')
            continue

        for eeg_file_path in eeg_files:
            base_name = os.path.basename(eeg_file_path).replace('EEG_aligned.npy', '')
            sound_file_path = os.path.join(subject_folder_path,base_name + sound_suffix)

            if os.path.exists(sound_file_path):
                try:
                    eeg_data = np.load(eeg_file_path)
                    sound_data = np.load(sound_file_path)

                    if eeg_data.shape[0] == sound_data.shape[0]:
                        all_eeg_data.append(eeg_data)
                        all_sound_data.append(sound_data)
                    else :
                        print(f"warning: Size not matching for {os.path.basename(eeg_file_path)}and {os.path.basename(sound_file_path)}")
                except Exception as e:
                    print(e)
            else:
                print(f"warning: for {os.path.basename(eeg_file_path)} sound file not found")

    if not all_eeg_data:
        print('No EEG found')
        return None, None

    X = np.vstack(all_eeg_data)
    Y = np.vstack(all_sound_data)

    return X, Y