In [None]:
# Autoreload possibly interferes with IntelliJ debugging
%reload_ext autoreload
%autoreload 2
import logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
log = lambda msg: logging.info(msg)


# Full night pipeline (multiple files)

In [None]:
import pandas as pd
import os

input_dir = "C:\\dev\\play\\brainwave-data"
stats_df = pd.read_csv(input_dir + os.path.sep + "stats.csv")

# Find Brainflow files that haven't been copied over

In [None]:
import pytz
from tqdm import tqdm
import paramiko
import re
from datetime import datetime, timedelta
import os
import dotenv
dotenv.load_dotenv()

# Define the time window
time_window = timedelta(minutes=10)

# Define the remote server details
hostname = os.getenv('SSH_HOST')
username = os.getenv('SSH_USERNAME')
password = os.getenv('SSH_PASSWORD')
remote_dir = '/home/graham/dev/Brainwave-Data'
local_dir = input_dir

# Create an SSH client
ssh = paramiko.SSHClient()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
ssh.connect(hostname, username=username, password=password, compress=True)

# List files in the remote directory
stdin, stdout, stderr = ssh.exec_command(f'ls {remote_dir}')
remote_brainflow_files = [f for f in stdout.read().decode().splitlines() if f.endswith(".brainflow.csv")]


In [None]:
remote_brainflow_files

In [None]:
files_to_copy = remote_brainflow_files.copy()

for root, dirs, files in os.walk(input_dir):
    #for idx, file_name in enumerate(tqdm(dirs, desc="Converting Brainflow to FIF")):
    for idx, file_name in enumerate(files):
        full_input_filename = os.path.join(root, file_name)
        if ".brainflow.csv.bz2" in full_input_filename:
            file_name_without_bz2 = file_name.removesuffix(".bz2")
            already_have = file_name_without_bz2 in remote_brainflow_files
            print(f"Already have {full_input_filename}: {already_have}")
            if already_have:
                files_to_copy.remove(file_name_without_bz2)



In [None]:
files_to_copy

In [None]:
import os
import bz2
from tqdm import tqdm

sftp = ssh.open_sftp()
for file in files_to_copy:
    remote_file_path = remote_dir + "/" + file
    local_file_path = os.path.join(input_dir, file)
    log(f"Copying Brainflow backup {remote_file_path} to {local_file_path}")

    # Get the file size
    remote_file_size = sftp.stat(remote_file_path).st_size

    with tqdm(total=remote_file_size, unit='B', unit_scale=True, desc=file, ascii=True) as pbar:
        local_file_path = os.path.join(input_dir, file)
    
        def callback(transferred_so_far, total_to_transfer):
            pbar.update(transferred_so_far - pbar.n)
    
        # Create a file-like object that writes to the local file
        with open(local_file_path, 'wb') as local_file:
            sftp.getfo(remote_file_path, local_file, callback=callback)
sftp.close()

In [None]:
ssh.close()


## Convert Brainflow files to FIF

In [None]:
def get_brainflow_compressed_filename(full_input_filename: str) -> str:
    full_output_dirname = webserver.output_dirname(full_input_filename)
    compressed_full_output_filename = str(os.path.join(full_output_dirname, os.path.basename(full_input_filename))) + '.bz2'
    return compressed_full_output_filename

In [None]:
from datetime import datetime

import webserver
import convert
# import zstandard as zstd
import os
import bz2
import time
import shutil

errors = []
processed = []

# Could get these working later
skip_list = ['2024-09-10-21-22-21']

def compress_bz2(input_file, output_file):
    start_time = time.time()
    with open(input_file, 'rb') as f_in:
        with bz2.open(output_file, 'wb', compresslevel=9) as f_out:
            shutil.copyfileobj(f_in, f_out)
    end_time = time.time()
    return end_time - start_time, os.path.getsize(output_file)

for root, dirs, files in os.walk(input_dir):
    #for idx, file_name in enumerate(tqdm(dirs, desc="Converting Brainflow to FIF")):
    for idx, file_name in enumerate(files):
        full_input_filename = os.path.join(root, file_name)
        try:
            if full_input_filename.endswith(".brainflow.csv"):
                full_output_dirname = webserver.output_dirname(full_input_filename)
                full_output_filename = str(os.path.join(full_output_dirname, 'raw.fif'))
                
                compressed_full_output_filename = get_brainflow_compressed_filename(full_input_filename)
                
                if not os.path.exists(compressed_full_output_filename):
                    log(f"Compressing file {full_input_filename} to " + compressed_full_output_filename)
                    processed.append("Compressing " + full_input_filename)
                    try:
                        os.mkdir(os.path.dirname(compressed_full_output_filename))
                    except:
                        pass
                    compress_bz2(full_input_filename, compressed_full_output_filename) 
                    
                if os.path.exists(full_output_filename):
                    log(f"Skipping file {full_input_filename} as {full_output_filename} and {compressed_full_output_filename} already exist")
                    continue
                should_skip = False
                for s in skip_list:
                    if s in full_input_filename:
                        log(f"Skipping file {full_input_filename}")
                        should_skip = True
                if not should_skip:
                    log(f"Processing file {full_input_filename}")
                    processed.append("Processing " + full_input_filename)
                    channels = None
                    date_time_str = os.path.basename(full_input_filename).removesuffix(".brainflow.csv")
                    date_time_obj = datetime.strptime(date_time_str, '%Y-%m-%d-%H-%M-%S')
                    if (date_time_obj > datetime(2024, 9, 1, 0, 0, 0)):
                        channels = ['Fpz-M1']
        
                    if channels is not None:
                        log(f"Processing file {full_input_filename} with channels {channels}")
                        convert.convert_and_save_brainflow_file(log, full_input_filename, full_output_filename, channels)

        except Exception as e:
            msg = "Error processing file: " + full_input_filename
            log(msg)
            log(e)
            errors.append(msg)


In [None]:
errors

In [None]:
processed

## Run pipeline on FIF files

In [None]:
import pandas as pd
import run_feature_pipeline
import os
from concurrent.futures import ThreadPoolExecutor, as_completed

errors = []
dataframes = []

# Could get these working later
skip_list = ['2024-07-23-22-40-25', '2024-07-28-22-29-49', '2024-09-18-21-25-08', '2024-09-18-21-28-11', '2024-09-19-21-29-42']

def process_file(root, dir_name):
    input_file = os.path.join(root, dir_name, "raw.fif")
    if dir_name in skip_list:
        log(f"Skipping {dir_name}: " + input_file)
        return None
    try:
        log(f"Processing file: " + input_file)
        if os.path.exists(input_file):
            yasa_df = run_feature_pipeline.cached_pipeline(log, input_file, stats_df)
            #log(f"Returning {yasa_df.head()}")
            return yasa_df
    except Exception as e:
        msg = f"Error processing file: " + input_file + " - " + str(e)
        log(msg)
        errors.append(msg)
        log(e)
    return None

with ThreadPoolExecutor() as executor:
    futures = []
    for root, dirs, files in os.walk(input_dir):
        for dir_name in dirs:
            futures.append(executor.submit(process_file, root, dir_name))

    for future in as_completed(futures):
        result = future.result()
        log(f"Got result {result}")
        if result is not None:
            dataframes.append(result)

all = pd.concat(dataframes)
log(f"Finished processing, have {len(all)} files in total")

In [None]:
dataframes

In [None]:
errors

# Recalculate scalings
N.b. can be run frequently but will only be picked up by new runs.  Maybe worth occasionally regenerating all old files.
And yes, for new features have to rerun the pipeline on everything, then generate the stats here, then rerun the pipeline again on everything to have them use those.

In [None]:
from scaling import only_eeg

only_eeg_cols = list(only_eeg(all).columns)
assert any(col.startswith("Main") for col in only_eeg_cols), "No column starting with 'Main' found in only_eeg_cols"

In [None]:
import scaling

stats = scaling.stats(all)
stats.to_csv(input_dir + "/stats.csv")
assert any(stats['Column'].str.startswith("Main")), "No row starting with 'Main' found in column_name"
stats

# Post-human processing
This requires user interaction first to provide sleep times etc.

In [None]:
from sleep_events import load_days_data
from sleep_events import pimp_my_days_data

days_data = load_days_data(True)

In [None]:
from models.eeg_states.eeg_states import load_and_prepare_tired_wired_eeg_state_events

tired_wired_eeg_state_events = load_and_prepare_tired_wired_eeg_state_events()

In [34]:
from run_post_human_pipeline import cached_post_human_pipeline
import run_feature_pipeline

dir_name = "2024-11-08-21-16-53"
input_file = f"C:\\dev\\play\\brainwave-data\\{dir_name}\\raw.fif"
row = days_data[days_data['dayAndNightOf'] == '2024-11-08']
yasa_df = run_feature_pipeline.cached_pipeline(log, input_file, stats_df)
post_human_df = cached_post_human_pipeline(log, dir_name, input_file, stats_df, days_data, yasa_df, tired_wired_eeg_state_events)
post_human_df

2024-11-21 17:20:16,600 - INFO - Loading cached file C:\dev\play\brainwave-data\2024-11-08-21-16-53\raw.with_features.csv
2024-11-21 17:20:16,648 - INFO - No cached file C:\dev\play\brainwave-data\2024-11-08-21-16-53\raw.post_human.csv, rebuilding
100%|██████████| 1138/1138 [00:00<00:00, 3081.26it/s]
2024-11-21 17:20:17,888 - INFO - Saving to: C:\dev\play\brainwave-data\2024-11-08-21-16-53\raw.post_human.csv


['Main_eeg_abspow_s', 'Main_eeg_abspow_c7min_norm_s', 'Main_eeg_abspow_p2min_norm_s', 'Main_eeg_alpha_s', 'Main_eeg_alpha_c7min_norm_s', 'Main_eeg_alpha_p2min_norm_s', 'Main_eeg_alphaaa_s', 'Main_eeg_alphaaa_c7min_norm_s', 'Main_eeg_alphaaa_p2min_norm_s', 'Main_eeg_alphaab_s', 'Main_eeg_alphaab_c7min_norm_s', 'Main_eeg_alphaab_p2min_norm_s', 'Main_eeg_alphaabs_s', 'Main_eeg_alphaabs_c7min_norm_s', 'Main_eeg_alphaabs_p2min_norm_s', 'Main_eeg_alphaabsaa_s', 'Main_eeg_alphaabsaa_c7min_norm_s', 'Main_eeg_alphaabsaa_p2min_norm_s', 'Main_eeg_alphaabsab_s', 'Main_eeg_alphaabsab_c7min_norm_s', 'Main_eeg_alphaabsab_p2min_norm_s', 'Main_eeg_at_s', 'Main_eeg_at_c7min_norm_s', 'Main_eeg_at_p2min_norm_s', 'Main_eeg_auc_s', 'Main_eeg_auc_c7min_norm_s', 'Main_eeg_auc_p2min_norm_s', 'Main_eeg_beta_s', 'Main_eeg_beta_c7min_norm_s', 'Main_eeg_beta_p2min_norm_s', 'Main_eeg_betaabs_s', 'Main_eeg_betaabs_c7min_norm_s', 'Main_eeg_betaabs_p2min_norm_s', 'Main_eeg_db_s', 'Main_eeg_db_c7min_norm_s', 'Main_eeg_

Unnamed: 0_level_0,Stage,Confidence,Epoch,Timestamp,Source,Fpz-M1_Stage,Fpz-M1_Confidence,StageInt,Fpz-M1_eeg_abspow,Fpz-M1_eeg_abspow_c7min_norm,...,15MinsBeforeReadyToSleep,30MinsBeforeReadyToSleep,60MinsBeforeReadyToSleep,DuringReadyToSleep,minsSinceReadyToSleep,minsSinceGotIntoBed,minsUntilWake,epoch_type,matched_event,TiredVsWired60MinsBeforeReadyToSleep
epoch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,W,0.997831,0,2024-11-08 21:16:53.214781046+00:00,Fpz-M1,W,0.997831,0,7.139501e-09,1.684490,...,0,0,0,0,-175.413087,-2.929754,566.613087,,,
1,W,0.998544,1,2024-11-08 21:17:23.214781046+00:00,Fpz-M1,W,0.998544,0,7.398731e-09,1.556884,...,0,0,0,0,-174.913087,-2.429754,566.113087,,,
2,W,0.999779,2,2024-11-08 21:17:53.214781046+00:00,Fpz-M1,W,0.999779,0,4.664853e-09,1.403051,...,0,0,0,0,-174.413087,-1.929754,565.613087,,,
3,W,0.999908,3,2024-11-08 21:18:23.214781046+00:00,Fpz-M1,W,0.999908,0,4.687601e-09,1.263913,...,0,0,0,0,-173.913087,-1.429754,565.113087,,,
4,W,0.999828,4,2024-11-08 21:18:53.214781046+00:00,Fpz-M1,W,0.999828,0,7.837614e-10,1.119677,...,0,0,0,0,-173.413087,-0.929754,564.613087,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1133,W,0.869996,1133,2024-11-09 06:43:23.214781046+00:00,Fpz-M1,W,0.869996,0,3.497408e-11,0.228223,...,0,0,0,0,391.086913,563.570246,0.113087,,,
1134,W,0.981264,1134,2024-11-09 06:43:53.214781046+00:00,Fpz-M1,W,0.981264,0,3.435944e-11,0.260864,...,0,0,0,0,391.586913,564.070246,-0.386913,,,
1135,W,0.961904,1135,2024-11-09 06:44:23.214781046+00:00,Fpz-M1,W,0.961904,0,5.441087e-11,0.307055,...,0,0,0,0,392.086913,564.570246,-0.886913,,,
1136,W,0.814729,1136,2024-11-09 06:44:53.214781046+00:00,Fpz-M1,W,0.814729,0,4.866201e-10,0.374374,...,0,0,0,0,392.586913,565.070246,-1.386913,,,


In [35]:
from run_post_human_pipeline import cached_post_human_pipeline

errors = []

for root, dirs, files in os.walk(input_dir):
    for idx, dir_name in enumerate(dirs):
        input_file = os.path.join(root, dir_name, "raw.fif")
        try:
            log("Processing file: " + input_file)
            input_file_without_ext = os.path.splitext(input_file)[0]

            if dir_name in skip_list:
                log(f"Skipping {idx} of {len(dirs)}: " + input_file)
                continue

            if os.path.exists(input_file):
                yasa_df = run_feature_pipeline.cached_pipeline(log, input_file, stats_df)
                cached_post_human_pipeline(log, dir_name, input_file, stats_df, days_data, yasa_df, tired_wired_eeg_state_events)

        except Exception as e:
            log("Error processing file: " + input_file)
            errors.append("Error processing file: " + input_file + " - " + str(e))
            log(e)

for err in errors:
    log(err)


2024-11-21 17:20:24,721 - INFO - Processing file: C:\dev\play\brainwave-data\08-07-2024--22-51-16\raw.fif
2024-11-21 17:20:24,722 - INFO - Processing file: C:\dev\play\brainwave-data\09-07-2024--22-52-25\raw.fif
2024-11-21 17:20:24,723 - INFO - Processing file: C:\dev\play\brainwave-data\2024-07-08-22-51-16\raw.fif
2024-11-21 17:20:24,724 - INFO - Loading cached file C:\dev\play\brainwave-data\2024-07-08-22-51-16\raw.with_features.csv
2024-11-21 17:20:24,756 - INFO - Loading cached file C:\dev\play\brainwave-data\2024-07-08-22-51-16\raw.post_human.csv
2024-11-21 17:20:24,814 - INFO - Processing file: C:\dev\play\brainwave-data\2024-07-11-22-46-18\raw.fif
2024-11-21 17:20:24,814 - INFO - Loading cached file C:\dev\play\brainwave-data\2024-07-11-22-46-18\raw.with_features.csv
2024-11-21 17:20:24,949 - INFO - Loading cached file C:\dev\play\brainwave-data\2024-07-11-22-46-18\raw.post_human.csv
2024-11-21 17:20:25,055 - INFO - Processing file: C:\dev\play\brainwave-data\2024-07-12-22-38-58

['Main_eeg_abspow_s', 'Main_eeg_abspow_c7min_norm_s', 'Main_eeg_abspow_p2min_norm_s', 'Main_eeg_alpha_s', 'Main_eeg_alpha_c7min_norm_s', 'Main_eeg_alpha_p2min_norm_s', 'Main_eeg_alphaaa_s', 'Main_eeg_alphaaa_c7min_norm_s', 'Main_eeg_alphaaa_p2min_norm_s', 'Main_eeg_alphaab_s', 'Main_eeg_alphaab_c7min_norm_s', 'Main_eeg_alphaab_p2min_norm_s', 'Main_eeg_alphaabs_s', 'Main_eeg_alphaabs_c7min_norm_s', 'Main_eeg_alphaabs_p2min_norm_s', 'Main_eeg_alphaabsaa_s', 'Main_eeg_alphaabsaa_c7min_norm_s', 'Main_eeg_alphaabsaa_p2min_norm_s', 'Main_eeg_alphaabsab_s', 'Main_eeg_alphaabsab_c7min_norm_s', 'Main_eeg_alphaabsab_p2min_norm_s', 'Main_eeg_at_s', 'Main_eeg_at_c7min_norm_s', 'Main_eeg_at_p2min_norm_s', 'Main_eeg_auc_s', 'Main_eeg_auc_c7min_norm_s', 'Main_eeg_auc_p2min_norm_s', 'Main_eeg_beta_s', 'Main_eeg_beta_c7min_norm_s', 'Main_eeg_beta_p2min_norm_s', 'Main_eeg_betaabs_s', 'Main_eeg_betaabs_c7min_norm_s', 'Main_eeg_betaabs_p2min_norm_s', 'Main_eeg_db_s', 'Main_eeg_db_c7min_norm_s', 'Main_eeg_

2024-11-21 17:20:38,121 - INFO - Processing file: C:\dev\play\brainwave-data\2024-11-12-21-11-43\raw.fif
2024-11-21 17:20:38,122 - INFO - Loading cached file C:\dev\play\brainwave-data\2024-11-12-21-11-43\raw.with_features.csv
2024-11-21 17:20:38,182 - INFO - No cached file C:\dev\play\brainwave-data\2024-11-12-21-11-43\raw.post_human.csv, rebuilding
100%|██████████| 1199/1199 [00:01<00:00, 1127.11it/s]
2024-11-21 17:20:40,263 - INFO - Saving to: C:\dev\play\brainwave-data\2024-11-12-21-11-43\raw.post_human.csv


['Main_eeg_abspow_s', 'Main_eeg_abspow_c7min_norm_s', 'Main_eeg_abspow_p2min_norm_s', 'Main_eeg_alpha_s', 'Main_eeg_alpha_c7min_norm_s', 'Main_eeg_alpha_p2min_norm_s', 'Main_eeg_alphaaa_s', 'Main_eeg_alphaaa_c7min_norm_s', 'Main_eeg_alphaaa_p2min_norm_s', 'Main_eeg_alphaab_s', 'Main_eeg_alphaab_c7min_norm_s', 'Main_eeg_alphaab_p2min_norm_s', 'Main_eeg_alphaabs_s', 'Main_eeg_alphaabs_c7min_norm_s', 'Main_eeg_alphaabs_p2min_norm_s', 'Main_eeg_alphaabsaa_s', 'Main_eeg_alphaabsaa_c7min_norm_s', 'Main_eeg_alphaabsaa_p2min_norm_s', 'Main_eeg_alphaabsab_s', 'Main_eeg_alphaabsab_c7min_norm_s', 'Main_eeg_alphaabsab_p2min_norm_s', 'Main_eeg_at_s', 'Main_eeg_at_c7min_norm_s', 'Main_eeg_at_p2min_norm_s', 'Main_eeg_auc_s', 'Main_eeg_auc_c7min_norm_s', 'Main_eeg_auc_p2min_norm_s', 'Main_eeg_beta_s', 'Main_eeg_beta_c7min_norm_s', 'Main_eeg_beta_p2min_norm_s', 'Main_eeg_betaabs_s', 'Main_eeg_betaabs_c7min_norm_s', 'Main_eeg_betaabs_p2min_norm_s', 'Main_eeg_db_s', 'Main_eeg_db_c7min_norm_s', 'Main_eeg_

2024-11-21 17:20:40,566 - INFO - Processing file: C:\dev\play\brainwave-data\2024-11-16-21-54-46\raw.fif
2024-11-21 17:20:40,566 - INFO - Loading cached file C:\dev\play\brainwave-data\2024-11-16-21-54-46\raw.with_features.csv
2024-11-21 17:20:40,625 - INFO - No cached file C:\dev\play\brainwave-data\2024-11-16-21-54-46\raw.post_human.csv, rebuilding
100%|██████████| 1136/1136 [00:01<00:00, 914.94it/s]
2024-11-21 17:20:42,763 - INFO - Saving to: C:\dev\play\brainwave-data\2024-11-16-21-54-46\raw.post_human.csv


['Main_eeg_abspow_s', 'Main_eeg_abspow_c7min_norm_s', 'Main_eeg_abspow_p2min_norm_s', 'Main_eeg_alpha_s', 'Main_eeg_alpha_c7min_norm_s', 'Main_eeg_alpha_p2min_norm_s', 'Main_eeg_alphaaa_s', 'Main_eeg_alphaaa_c7min_norm_s', 'Main_eeg_alphaaa_p2min_norm_s', 'Main_eeg_alphaab_s', 'Main_eeg_alphaab_c7min_norm_s', 'Main_eeg_alphaab_p2min_norm_s', 'Main_eeg_alphaabs_s', 'Main_eeg_alphaabs_c7min_norm_s', 'Main_eeg_alphaabs_p2min_norm_s', 'Main_eeg_alphaabsaa_s', 'Main_eeg_alphaabsaa_c7min_norm_s', 'Main_eeg_alphaabsaa_p2min_norm_s', 'Main_eeg_alphaabsab_s', 'Main_eeg_alphaabsab_c7min_norm_s', 'Main_eeg_alphaabsab_p2min_norm_s', 'Main_eeg_at_s', 'Main_eeg_at_c7min_norm_s', 'Main_eeg_at_p2min_norm_s', 'Main_eeg_auc_s', 'Main_eeg_auc_c7min_norm_s', 'Main_eeg_auc_p2min_norm_s', 'Main_eeg_beta_s', 'Main_eeg_beta_c7min_norm_s', 'Main_eeg_beta_p2min_norm_s', 'Main_eeg_betaabs_s', 'Main_eeg_betaabs_c7min_norm_s', 'Main_eeg_betaabs_p2min_norm_s', 'Main_eeg_db_s', 'Main_eeg_db_c7min_norm_s', 'Main_eeg_

2024-11-21 17:20:43,058 - INFO - Processing file: C:\dev\play\brainwave-data\2024-11-19-21-29-04\raw.fif
2024-11-21 17:20:43,059 - INFO - Loading cached file C:\dev\play\brainwave-data\2024-11-19-21-29-04\raw.with_features.csv
2024-11-21 17:20:43,126 - INFO - No cached file C:\dev\play\brainwave-data\2024-11-19-21-29-04\raw.post_human.csv, rebuilding
100%|██████████| 1242/1242 [00:00<00:00, 1716.49it/s]
2024-11-21 17:20:44,717 - INFO - Saving to: C:\dev\play\brainwave-data\2024-11-19-21-29-04\raw.post_human.csv


['Main_eeg_abspow_s', 'Main_eeg_abspow_c7min_norm_s', 'Main_eeg_abspow_p2min_norm_s', 'Main_eeg_alpha_s', 'Main_eeg_alpha_c7min_norm_s', 'Main_eeg_alpha_p2min_norm_s', 'Main_eeg_alphaaa_s', 'Main_eeg_alphaaa_c7min_norm_s', 'Main_eeg_alphaaa_p2min_norm_s', 'Main_eeg_alphaab_s', 'Main_eeg_alphaab_c7min_norm_s', 'Main_eeg_alphaab_p2min_norm_s', 'Main_eeg_alphaabs_s', 'Main_eeg_alphaabs_c7min_norm_s', 'Main_eeg_alphaabs_p2min_norm_s', 'Main_eeg_alphaabsaa_s', 'Main_eeg_alphaabsaa_c7min_norm_s', 'Main_eeg_alphaabsaa_p2min_norm_s', 'Main_eeg_alphaabsab_s', 'Main_eeg_alphaabsab_c7min_norm_s', 'Main_eeg_alphaabsab_p2min_norm_s', 'Main_eeg_at_s', 'Main_eeg_at_c7min_norm_s', 'Main_eeg_at_p2min_norm_s', 'Main_eeg_auc_s', 'Main_eeg_auc_c7min_norm_s', 'Main_eeg_auc_p2min_norm_s', 'Main_eeg_beta_s', 'Main_eeg_beta_c7min_norm_s', 'Main_eeg_beta_p2min_norm_s', 'Main_eeg_betaabs_s', 'Main_eeg_betaabs_c7min_norm_s', 'Main_eeg_betaabs_p2min_norm_s', 'Main_eeg_db_s', 'Main_eeg_db_c7min_norm_s', 'Main_eeg_

2024-11-21 17:20:45,152 - INFO - Processing file: C:\dev\play\brainwave-data\2024-11-20-21-30-29\raw.fif
2024-11-21 17:20:45,153 - INFO - Loading cached file C:\dev\play\brainwave-data\2024-11-20-21-30-29\raw.with_features.csv
2024-11-21 17:20:45,187 - INFO - No cached file C:\dev\play\brainwave-data\2024-11-20-21-30-29\raw.post_human.csv, rebuilding
100%|██████████| 669/669 [00:00<00:00, 1683.61it/s]
2024-11-21 17:20:46,100 - INFO - Saving to: C:\dev\play\brainwave-data\2024-11-20-21-30-29\raw.post_human.csv
2024-11-21 17:20:46,276 - INFO - Processing file: C:\dev\play\brainwave-data\compressed_files\raw.fif
2024-11-21 17:20:46,276 - INFO - Processing file: C:\dev\play\brainwave-data\nonnight\raw.fif
2024-11-21 17:20:46,292 - INFO - Processing file: C:\dev\play\brainwave-data\nonnight\2024-07-15-09-10-07\raw.fif


['Main_eeg_abspow_s', 'Main_eeg_abspow_c7min_norm_s', 'Main_eeg_abspow_p2min_norm_s', 'Main_eeg_alpha_s', 'Main_eeg_alpha_c7min_norm_s', 'Main_eeg_alpha_p2min_norm_s', 'Main_eeg_alphaaa_s', 'Main_eeg_alphaaa_c7min_norm_s', 'Main_eeg_alphaaa_p2min_norm_s', 'Main_eeg_alphaab_s', 'Main_eeg_alphaab_c7min_norm_s', 'Main_eeg_alphaab_p2min_norm_s', 'Main_eeg_alphaabs_s', 'Main_eeg_alphaabs_c7min_norm_s', 'Main_eeg_alphaabs_p2min_norm_s', 'Main_eeg_alphaabsaa_s', 'Main_eeg_alphaabsaa_c7min_norm_s', 'Main_eeg_alphaabsaa_p2min_norm_s', 'Main_eeg_alphaabsab_s', 'Main_eeg_alphaabsab_c7min_norm_s', 'Main_eeg_alphaabsab_p2min_norm_s', 'Main_eeg_at_s', 'Main_eeg_at_c7min_norm_s', 'Main_eeg_at_p2min_norm_s', 'Main_eeg_auc_s', 'Main_eeg_auc_c7min_norm_s', 'Main_eeg_auc_p2min_norm_s', 'Main_eeg_beta_s', 'Main_eeg_beta_c7min_norm_s', 'Main_eeg_beta_p2min_norm_s', 'Main_eeg_betaabs_s', 'Main_eeg_betaabs_c7min_norm_s', 'Main_eeg_betaabs_p2min_norm_s', 'Main_eeg_db_s', 'Main_eeg_db_c7min_norm_s', 'Main_eeg_

2024-11-21 17:20:46,292 - INFO - Processing file: C:\dev\play\brainwave-data\nonnight\2024-07-16-07-17-07\raw.fif
2024-11-21 17:20:46,292 - INFO - Processing file: C:\dev\play\brainwave-data\nonnight\2024-07-17-08-17-29\raw.fif
2024-11-21 17:20:46,297 - INFO - Processing file: C:\dev\play\brainwave-data\nonnight\2024-07-18-09-10-29\raw.fif
2024-11-21 17:20:46,298 - INFO - Processing file: C:\dev\play\brainwave-data\nonnight\2024-07-22-09-14-52\raw.fif
2024-11-21 17:20:46,298 - INFO - Processing file: C:\dev\play\brainwave-data\nonnight\2024-07-27-09-40-07\raw.fif
2024-11-21 17:20:46,298 - INFO - Processing file: C:\dev\play\brainwave-data\nonnight\2024-07-28-22-29-49\raw.fif
2024-11-21 17:20:46,298 - INFO - Skipping 6 of 12: C:\dev\play\brainwave-data\nonnight\2024-07-28-22-29-49\raw.fif
2024-11-21 17:20:46,298 - INFO - Processing file: C:\dev\play\brainwave-data\nonnight\2024-07-29-08-39-51\raw.fif
2024-11-21 17:20:46,298 - INFO - Processing file: C:\dev\play\brainwave-data\nonnight\2

# Upload to GCS

In [None]:
from upload import upload_dir_to_gcs_skipping_existing
import os

errors = []
dataframes = []

for root, dirs, files in os.walk(input_dir):
    for dir_name in reversed(dirs):
        input_file = os.path.join(root, dir_name, "raw.fif")
        full_dir_name = os.path.join(root, dir_name)
        try:
            upload_dir_to_gcs_skipping_existing(log, 'examined-life-derived-eeg', full_dir_name, dir_name)
        except Exception as e:
            log("Error processing file: " + input_dir)
            errors.append("Error processing file: " + input_file + " - " + str(e))
            log(e)

for error in errors:
    log(error)

log("All uploaded")

# Check if can delete Brainwave files that are safely backed up

In [None]:

import os

errors = []
can_delete = []
cannot_delete = []

for root, dirs, files in os.walk(input_dir):
    for idx, file_name in enumerate(files):
        full_input_filename = os.path.join(root, file_name)
        if full_input_filename.endswith(".brainflow.csv"):
            compressed_full_output_filename = get_brainflow_compressed_filename(full_input_filename)
                
            if os.path.exists(compressed_full_output_filename):
                can_delete.append({
                    'backed_up': compressed_full_output_filename,
                    'full_filename': full_input_filename
                })
            else:
                cannot_delete.append(full_input_filename)
    

In [None]:
can_delete_df = pd.DataFrame(can_delete)
can_delete_df

In [None]:
cannot_delete

In [None]:
can_delete_df['full_filename']

# Check if can delete Cyton files that are safely backed up

In [None]:
import webserver
import bz2
import shutil
import os
import time


errors = []
can_delete = []
cannot_delete = []

for root, dirs, files in os.walk(input_dir):
    for idx, file_name in enumerate(files):
        full_input_filename = os.path.join(root, file_name)
        if file_name.startswith("OBCI_") and file_name.endswith(".TXT.bz2"):
            can_delete.append({
                'file_name': file_name.removesuffix(".bz2"),
                'full_filename': full_input_filename
            })

In [None]:
can_delete

In [None]:
cyton_file_locations = [input_dir, "d:", "e:", "x:"]

for cyton_file_location in cyton_file_locations:
    for root, dirs, files in os.walk(cyton_file_location):
        for idx, file in enumerate(files):
            matching_record = next((f for f in can_delete if f['file_name'] == file), None)
            if matching_record:
                full_filename = os.path.join(root, file)
                log(f"Could delete {full_filename} as backed up in {matching_record['full_filename']}")

In [None]:
import ctypes

ctypes.windll.user32.MessageBoxW(0, "Compression is complete!", "Alert", 0x40 | 0x1)