In [1]:
from EEG_cleaning import muse_clean

The `muse_clean()` function requests 4 inputs:
- `filepath`: The path to the folder containing the dataset in question (relative *or* absolute)
- `filename`: The name of the file to clean (include file type extension)
- `subject_id`: The unique custom identification of the subject for the output file name
- `record_id`: The unique custom identification of the recorded label for the output file name
- `new_folder` (*optional*): The name of the outputted folder within the cleaned datasets directory containing all of the cleaned datasets (default to be named "*cleaned datasets* as well")

Single File Cleaning

In [2]:
filepath = '../raw datasets/Local Recordings'
filename = 'Natalie_2025-07-09--15-49-07_8601963359185541618.csv'
subject_id = 'Natalie'
record_id = '1'

muse_clean(filepath, filename, subject_id, record_id, new_folder='local datasets')

Removed 0 rows with no data or no change compared to previous rows.
[]


Mendeley Cleaning Loop

In [3]:
import os

folder_list = ['../raw datasets/Mendeley/' + folder for folder in os.listdir('../raw datasets/Mendeley/')]

for patient in range(1, 6):

    current_folder = folder_list[patient - 1]
    file_list = os.listdir(current_folder)

    for experiment in range(1, 4):

        current_file = file_list[experiment - 1]
        muse_clean(current_folder, current_file, patient, experiment, 'Mendeley cleaned')
        print(f"Patient {patient} Experiment {experiment} complete...")

print("All done!")

Patient 1 Experiment 1 complete...
Patient 1 Experiment 2 complete...
Patient 1 Experiment 3 complete...
Patient 2 Experiment 1 complete...
Patient 2 Experiment 2 complete...
Patient 2 Experiment 3 complete...


  df = pd.read_csv(f'{filepath}/{filename}')


Patient 3 Experiment 1 complete...
Patient 3 Experiment 2 complete...
Patient 3 Experiment 3 complete...
Patient 4 Experiment 1 complete...
Patient 4 Experiment 2 complete...
Patient 4 Experiment 3 complete...


  df = pd.read_csv(f'{filepath}/{filename}')


Patient 5 Experiment 1 complete...
Patient 5 Experiment 2 complete...
Patient 5 Experiment 3 complete...
All done!


Emotion Cleaning Loop

In [2]:
import os

folder_list = [r'../raw datasets/EmoKey Short/muse_wearable_data/' + folder for folder in os.listdir('../raw datasets/EmoKey Short/muse_wearable_data/')]

for patient in range(1, 45):

    current_folder = folder_list[patient - 1]
    file_list = os.listdir(current_folder)

    for experiment in range(1, 5):
        # print(current_folder)
        # print(file_list)
        current_file = file_list[experiment - 1]
        muse_clean(current_folder, current_file, patient, experiment, 'Emotion cleaned')
        print(f"Patient {patient} Experiment {experiment} complete...")


Processing as an Emotions dataset...
Key moments found at timestamps: [1624462254.0234375, 1624462275.1640625, 1624462285.578125]
Removed 0 rows with no data or no change compared to previous rows.
[]
Patient 1 Experiment 1 complete...
Processing as an Emotions dataset...
Key moments found at timestamps: [1624462423.1328125, 1624462442.359375, 1624462479.625, 1624462626.5859375, 1624462679.2734375, 1624462699.8203125, 1624462734.4765625]
Removed 0 rows with no data or no change compared to previous rows.
[]
Patient 1 Experiment 2 complete...
Processing as an Emotions dataset...
Key moments found at timestamps: [1624461696.0625, 1624461738.3359375, 1624461757.8671875]
Removed 0 rows with no data or no change compared to previous rows.
[]
Patient 1 Experiment 3 complete...
Processing as an Emotions dataset...
Key moments found at timestamps: [1624462026.46875, 1624462046.1015627, 1624462067.9453125]
Removed 0 rows with no data or no change compared to previous rows.
[]
Patient 1 Experime

General Cleaning Loop

In [None]:
def cleaning_loop(folder_list: list, new_folder: str = 'cleaned datasets') -> None:

    """
    Inputs: A list of folder directories containing data, and a list of file names to be cleaned
    Outputs: A new folder (customizable name) that contains the cleaned versions of all specified files in all specified folders

    This code functions assuming it follows the following file naming scheme:
    ```subject{name}-{label}-{label_number}.csv```
    """

    import os

    file_exists = True

    for folder in folder_list:

        try:
            cwd = os.getcwd()
            os.chdir(folder)
            file_list = os.listdir()
        except FileNotFoundError:
            print(f"Folder {folder} not found, try again!")
            break

        for file in file_list:
            subject_id = file.split('-')[0][-1]
            label_id = file.split(('-'))[1] + file.split(('-'))[2]

            try:
                muse_clean(folder, file, subject_id, label_id, new_folder)
                print(f"Patient {subject_id} Experiment {label_id} complete...")
                print("All done!")
            except FileNotFoundError:
                print(f"File {file} does not exist, try again!")
                file_exists = False
                break

        if not file_exists:
            break
                
