In [1]:
# Autoreload possibly interferes with IntelliJ debugging
%reload_ext autoreload
%autoreload 2
import logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
log = lambda msg: logging.info(msg)


# Full pipeline (multiple files)

In [2]:
import pandas as pd
import os

input_dir = "C:\\dev\\play\\brainwave-data"
stats_df = pd.read_csv(input_dir + os.path.sep + "stats.csv")

# Find Brainflow files that haven't been copied over

In [3]:
import pytz
from tqdm import tqdm
import paramiko
import re
from datetime import datetime, timedelta
import os
import dotenv
dotenv.load_dotenv()

# Define the time window
time_window = timedelta(minutes=10)

# Define the remote server details
hostname = os.getenv('SSH_HOST')
username = os.getenv('SSH_USERNAME')
password = os.getenv('SSH_PASSWORD')
remote_dir = '/home/graham/dev/Brainwave-Data'
local_dir = input_dir

# Create an SSH client
ssh = paramiko.SSHClient()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
ssh.connect(hostname, username=username, password=password, compress=True)

# List files in the remote directory
stdin, stdout, stderr = ssh.exec_command(f'ls {remote_dir}')
remote_brainflow_files = [f for f in stdout.read().decode().splitlines() if f.endswith(".brainflow.csv")]


2024-11-11 09:41:04,909 - INFO - Connected (version 2.0, client OpenSSH_9.6)
2024-11-11 09:41:05,103 - INFO - Authentication (publickey) failed.
2024-11-11 09:41:05,159 - INFO - Authentication (password) successful!


In [4]:
remote_brainflow_files

['2024-09-30-21-27-49.brainflow.csv',
 '2024-10-01-21-36-11.brainflow.csv',
 '2024-10-02-21-20-34.brainflow.csv',
 '2024-10-02-21-21-15.brainflow.csv',
 '2024-10-04-21-13-39.brainflow.csv',
 '2024-10-05-21-09-47.brainflow.csv',
 '2024-10-06-21-29-18.brainflow.csv',
 '2024-10-07-21-35-18.brainflow.csv',
 '2024-10-08-21-21-34.brainflow.csv',
 '2024-10-09-21-22-04.brainflow.csv',
 '2024-10-11-21-16-27.brainflow.csv',
 '2024-10-15-22-01-52.brainflow.csv',
 '2024-10-20-20-09-48.brainflow.csv',
 '2024-10-21-21-30-19.brainflow.csv',
 '2024-10-22-21-32-45.brainflow.csv',
 '2024-10-23-21-24-03.brainflow.csv',
 '2024-10-24-21-35-33.brainflow.csv',
 '2024-10-29-20-42-12.brainflow.csv',
 '2024-10-30-21-26-55.brainflow.csv',
 '2024-11-01-21-32-22.brainflow.csv',
 '2024-11-02-21-29-03.brainflow.csv',
 '2024-11-03-21-20-37.brainflow.csv',
 '2024-11-04-21-20-51.brainflow.csv',
 '2024-11-05-21-34-15.brainflow.csv',
 '2024-11-07-21-26-18.brainflow.csv',
 '2024-11-08-21-16-53.brainflow.csv',
 '2024-11-09

In [5]:
files_to_copy = remote_brainflow_files.copy()

for root, dirs, files in os.walk(input_dir):
    #for idx, file_name in enumerate(tqdm(dirs, desc="Converting Brainflow to FIF")):
    for idx, file_name in enumerate(files):
        full_input_filename = os.path.join(root, file_name)
        if ".brainflow.csv.bz2" in full_input_filename:
            file_name_without_bz2 = file_name.removesuffix(".bz2")
            already_have = file_name_without_bz2 in remote_brainflow_files
            print(f"Already have {full_input_filename}: {already_have}")
            if already_have:
                files_to_copy.remove(file_name_without_bz2)



Already have C:\dev\play\brainwave-data\08-07-2024--22-51-16\08-07-2024--22-51-16.brainflow.csv.bz2: False
Already have C:\dev\play\brainwave-data\09-07-2024--22-52-25\09-07-2024--22-52-25.brainflow.csv.bz2: False
Already have C:\dev\play\brainwave-data\2024-07-15-09-10-07\2024-07-15-09-10-07.brainflow.csv.bz2: False
Already have C:\dev\play\brainwave-data\2024-07-16-23-14-52\2024-07-16-23-14-52.brainflow.csv.bz2: False
Already have C:\dev\play\brainwave-data\2024-07-24-18-57-03\2024-07-24-18-57-03.brainflow.csv.bz2: False
Already have C:\dev\play\brainwave-data\2024-09-03-21-10-58\2024-09-03-21-10-58.brainflow.csv.bz2: False
Already have C:\dev\play\brainwave-data\2024-09-04-21-44-08\2024-09-04-21-44-08.brainflow.csv.bz2: False
Already have C:\dev\play\brainwave-data\2024-09-08-21-01-39\2024-09-08-21-01-39.brainflow.csv.bz2: False
Already have C:\dev\play\brainwave-data\2024-09-10-21-22-21\2024-09-10-21-22-21.brainflow.csv.bz2: False
Already have C:\dev\play\brainwave-data\2024-09-11-

In [6]:
files_to_copy

['2024-11-09-21-27-51.brainflow.csv']

In [7]:
import os
import bz2
from tqdm import tqdm

sftp = ssh.open_sftp()
for file in files_to_copy:
    remote_file_path = remote_dir + "/" + file
    local_file_path = os.path.join(input_dir, file)
    log(f"Copying Brainflow backup {remote_file_path} to {local_file_path}")

    # Get the file size
    remote_file_size = sftp.stat(remote_file_path).st_size

    with tqdm(total=remote_file_size, unit='B', unit_scale=True, desc=file, ascii=True) as pbar:
        local_file_path = os.path.join(input_dir, file)
    
        def callback(transferred_so_far, total_to_transfer):
            pbar.update(transferred_so_far - pbar.n)
    
        # Create a file-like object that writes to the local file
        with open(local_file_path, 'wb') as local_file:
            sftp.getfo(remote_file_path, local_file, callback=callback)
sftp.close()

2024-11-11 09:41:05,834 - INFO - [chan 1] Opened sftp connection (server version 3)
2024-11-11 09:41:05,834 - INFO - Copying Brainflow backup /home/graham/dev/Brainwave-Data/2024-11-09-21-27-51.brainflow.csv to C:\dev\play\brainwave-data\2024-11-09-21-27-51.brainflow.csv
2024-11-09-21-27-51.brainflow.csv: 100%|##########| 2.34G/2.34G [01:58<00:00, 19.8MB/s] 
2024-11-11 09:43:04,055 - INFO - [chan 1] sftp session closed.


In [8]:
ssh.close()


## Convert Brainflow files to FIF

In [9]:
def get_brainflow_compressed_filename(full_input_filename: str) -> str:
    full_output_dirname = webserver.output_dirname(full_input_filename)
    compressed_full_output_filename = str(os.path.join(full_output_dirname, os.path.basename(full_input_filename))) + '.bz2'
    return compressed_full_output_filename

In [10]:
from datetime import datetime

import webserver
import convert
# import zstandard as zstd
import os
import bz2
import time
import shutil

errors = []
processed = []

# Could get these working later
skip_list = ['2024-09-10-21-22-21']

def compress_bz2(input_file, output_file):
    start_time = time.time()
    with open(input_file, 'rb') as f_in:
        with bz2.open(output_file, 'wb', compresslevel=9) as f_out:
            shutil.copyfileobj(f_in, f_out)
    end_time = time.time()
    return end_time - start_time, os.path.getsize(output_file)

for root, dirs, files in os.walk(input_dir):
    #for idx, file_name in enumerate(tqdm(dirs, desc="Converting Brainflow to FIF")):
    for idx, file_name in enumerate(files):
        full_input_filename = os.path.join(root, file_name)
        try:
            if full_input_filename.endswith(".brainflow.csv"):
                full_output_dirname = webserver.output_dirname(full_input_filename)
                full_output_filename = str(os.path.join(full_output_dirname, 'raw.fif'))
                
                compressed_full_output_filename = get_brainflow_compressed_filename(full_input_filename)
                
                if not os.path.exists(compressed_full_output_filename):
                    log(f"Compressing file {full_input_filename} to " + compressed_full_output_filename)
                    processed.append("Compressing " + full_input_filename)
                    try:
                        os.mkdir(os.path.dirname(compressed_full_output_filename))
                    except:
                        pass
                    compress_bz2(full_input_filename, compressed_full_output_filename) 
                    
                if os.path.exists(full_output_filename):
                    log(f"Skipping file {full_input_filename} as {full_output_filename} and {compressed_full_output_filename} already exist")
                    continue
                should_skip = False
                for s in skip_list:
                    if s in full_input_filename:
                        log(f"Skipping file {full_input_filename}")
                        should_skip = True
                if not should_skip:
                    log(f"Processing file {full_input_filename}")
                    processed.append("Processing " + full_input_filename)
                    channels = None
                    date_time_str = os.path.basename(full_input_filename).removesuffix(".brainflow.csv")
                    date_time_obj = datetime.strptime(date_time_str, '%Y-%m-%d-%H-%M-%S')
                    if (date_time_obj > datetime(2024, 9, 1, 0, 0, 0)):
                        channels = ['Fpz-M1']
        
                    if channels is not None:
                        log(f"Processing file {full_input_filename} with channels {channels}")
                        convert.convert_and_save_brainflow_file(log, full_input_filename, full_output_filename, channels)

        except Exception as e:
            msg = "Error processing file: " + full_input_filename
            log(msg)
            log(e)
            errors.append(msg)


2024-11-11 09:43:15,973 - INFO - Skipping file C:\dev\play\brainwave-data\2024-10-02-21-20-34.brainflow.csv as C:\dev\play\brainwave-data\2024-10-02-21-20-34\raw.fif and C:\dev\play\brainwave-data\2024-10-02-21-20-34\2024-10-02-21-20-34.brainflow.csv.bz2 already exist
2024-11-11 09:43:15,975 - INFO - Skipping file C:\dev\play\brainwave-data\2024-10-06-21-29-18.brainflow.csv as C:\dev\play\brainwave-data\2024-10-06-21-29-18\raw.fif and C:\dev\play\brainwave-data\2024-10-06-21-29-18\2024-10-06-21-29-18.brainflow.csv.bz2 already exist
2024-11-11 09:43:15,976 - INFO - Skipping file C:\dev\play\brainwave-data\2024-10-08-21-21-34.brainflow.csv as C:\dev\play\brainwave-data\2024-10-08-21-21-34\raw.fif and C:\dev\play\brainwave-data\2024-10-08-21-21-34\2024-10-08-21-21-34.brainflow.csv.bz2 already exist
2024-11-11 09:43:15,978 - INFO - Skipping file C:\dev\play\brainwave-data\2024-10-09-21-22-04.brainflow.csv as C:\dev\play\brainwave-data\2024-10-09-21-22-04\raw.fif and C:\dev\play\brainwave-d

In [11]:
errors

[]

In [12]:
processed

['Compressing C:\\dev\\play\\brainwave-data\\2024-11-09-21-27-51.brainflow.csv',
 'Processing C:\\dev\\play\\brainwave-data\\2024-11-09-21-27-51.brainflow.csv']

## Run pipeline on FIF files

In [10]:
import pandas as pd
import run_feature_pipeline
import os

errors = []
dataframes = []

# Could get these working later
skip_list = ['2024-07-23-22-40-25', '2024-07-28-22-29-49', '2024-09-18-21-25-08', '2024-09-18-21-28-11', '2024-09-19-21-29-42']

for root, dirs, files in os.walk(input_dir):
    # for idx, dir_name in enumerate(tqdm(dirs, desc="Running pipeline")):
    for idx, dir_name in enumerate(dirs):
        input_file = os.path.join(root, dir_name, "raw.fif")
        if dir_name in skip_list:
            log(f"Skipping {idx} of {len(dirs)}: " + input_file)
            continue
        try:
            log(f"Processing file {idx} of {len(dirs)}: " + input_file)
                        
            if os.path.exists(input_file):                
                yasa_df = run_feature_pipeline.cached_pipeline(log, input_file, stats_df)
                dataframes.append(yasa_df)
        except Exception as e:
            msg = f"Error processing file {idx} of {len(dirs)}: " + input_file + " - " + str(e)
            log(msg)
            errors.append(msg)
            log(e)

for error in errors:
    log(error)

all = pd.concat(dataframes)
log(f"Finished processing, have {len(all)} files of {idx} total")


2024-11-11 14:08:41,622 - INFO - Processing file 0 of 79: C:\dev\play\brainwave-data\08-07-2024--22-51-16\raw.fif
2024-11-11 14:08:41,622 - INFO - Processing file 1 of 79: C:\dev\play\brainwave-data\09-07-2024--22-52-25\raw.fif
2024-11-11 14:08:41,622 - INFO - Processing file 2 of 79: C:\dev\play\brainwave-data\2024-07-08-22-51-16\raw.fif
2024-11-11 14:08:41,622 - INFO - Loading cached file C:\dev\play\brainwave-data\2024-07-08-22-51-16\raw.with_features.csv
2024-11-11 14:08:41,658 - INFO - Processing file 3 of 79: C:\dev\play\brainwave-data\2024-07-11-22-46-18\raw.fif
2024-11-11 14:08:41,658 - INFO - Loading cached file C:\dev\play\brainwave-data\2024-07-11-22-46-18\raw.with_features.csv
2024-11-11 14:08:41,826 - INFO - Processing file 4 of 79: C:\dev\play\brainwave-data\2024-07-12-22-38-58\raw.fif
2024-11-11 14:08:41,826 - INFO - Loading cached file C:\dev\play\brainwave-data\2024-07-12-22-38-58\raw.with_features.csv
2024-11-11 14:08:41,938 - INFO - Processing file 5 of 79: C:\dev\pl

KeyboardInterrupt: 

In [14]:
errors

['Error processing file 52 of 79: C:\\dev\\play\\brainwave-data\\2024-10-02-21-20-34\\raw.fif - Sliding window size may not exceed size of selected axis']

In [15]:
# yasa_df

# Recalculate scalings
N.b. can be run frequently but will only be picked up by new runs.  Maybe worth occasionally regenerating all old files.
And yes, for new features have to rerun the pipeline on everything, then generate the stats here, then rerun the pipeline again on everything to have them use those.

In [16]:
from scaling import only_eeg

only_eeg_cols = list(only_eeg(all).columns)
assert any(col.startswith("Main") for col in only_eeg_cols), "No column starting with 'Main' found in only_eeg_cols"

In [17]:
import scaling

stats = scaling.stats(all)
stats.to_csv(input_dir + "/stats.csv")
assert any(stats['Column'].str.startswith("Main")), "No row starting with 'Main' found in column_name"
stats

  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  diff_b_a = sub

Unnamed: 0,Column,Mean,P10,P90,Min,Max,StdDev
0,F8-M1_eeg_abspow,1.921968e-07,4.772690e-11,9.516579e-10,8.534915e-35,0.000777,0.000009
1,F8-M1_eeg_abspow_c7min_norm,4.070216e+01,-8.968809e-02,5.672409e-01,-2.093740e-01,47832.670000,848.154728
2,F8-M1_eeg_abspow_p2min_norm,4.897505e+01,-6.837685e-02,5.577247e-01,-2.503299e-01,79046.375000,1356.728153
3,F8-M1_eeg_alpha,6.848900e-02,1.244090e-02,1.253551e-01,8.034852e-05,0.398397,0.048033
4,F8-M1_eeg_alpha_c7min_norm,-3.206412e-02,-5.190218e-01,3.788195e-01,-7.588421e-01,1.428529,0.332931
...,...,...,...,...,...,...,...
1579,F4-M1_eeg_thetaabsaa_c7min_norm_s,3.248161e+01,-2.789859e-01,4.042708e-01,-4.454707e-01,12827.550000,561.684263
1580,F4-M1_eeg_thetaabsaa_p2min_norm_s,2.166549e+01,-2.292130e-01,3.533578e-01,-3.893398e-01,16487.691000,595.064412
1581,F4-M1_eeg_thetaabsab_s,1.733905e-07,4.676057e-11,9.035479e-10,1.705335e-11,0.000386,0.000008
1582,F4-M1_eeg_thetaabsab_c7min_norm_s,1.610264e+01,-8.746347e-02,5.996732e-01,-1.146235e-01,6310.061000,275.667415


# Post-human processing
This requires user interaction first to provide sleep times etc.

In [3]:
from sleep_events import load_days_data
from sleep_events import pimp_my_days_data

days_data = load_days_data(True)

In [12]:
from models.eeg_states.eeg_states import load_and_prepare_eeg_state_events

eeg_state_events = load_and_prepare_eeg_state_events()

  return events
  


In [13]:
from run_post_human_pipeline import cached_post_human_pipeline
import run_feature_pipeline

dir_name = "2024-11-01-21-32-22"
input_file = f"C:\\dev\\play\\brainwave-data\\{dir_name}\\raw.fif"
yasa_df = run_feature_pipeline.cached_pipeline(log, input_file, stats_df)
post_human_df = cached_post_human_pipeline(log, dir_name, input_file, stats_df, days_data, yasa_df, eeg_state_events)
post_human_df

2024-11-11 14:20:59,680 - INFO - Loading cached file C:\dev\play\brainwave-data\2024-11-01-21-32-22\raw.with_features.csv
2024-11-11 14:20:59,723 - INFO - Loading cached file C:\dev\play\brainwave-data\2024-11-01-21-32-22\raw.post_human.csv
2024-11-11 14:20:59,763 - INFO - Cached file C:\dev\play\brainwave-data\2024-11-01-21-32-22\raw.post_human.csv is missing TiredVsWired, rebuilding
100%|██████████| 1026/1026 [00:00<00:00, 1454.91it/s]


Unnamed: 0_level_0,Stage,Confidence,Epoch,Timestamp,Source,Fpz-M1_Stage,Fpz-M1_Confidence,StageInt,Fpz-M1_eeg_abspow,Fpz-M1_eeg_abspow_c7min_norm,...,30MinsBeforeReadyToSleep,60MinsBeforeReadyToSleep,DuringReadyToSleep,minsSinceReadyToSleep,minsSinceGotIntoBed,minsSinceLEP,minsUntilWake,epoch_type,matched_event,TiredVsWired
epoch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,W,0.998100,0,2024-11-01 21:32:22.923768044+00:00,Fpz-M1,W,0.998100,0,2.781803e-09,0.711881,...,0,1,0,-53.117937,-1.717937,4.382063,412.501271,,,0.998791
1,W,0.999757,1,2024-11-01 21:32:52.923768044+00:00,Fpz-M1,W,0.999757,0,2.910033e-09,0.693499,...,0,1,0,-52.617937,-1.217937,4.882063,412.001271,,,0.999277
2,W,0.998000,2,2024-11-01 21:33:22.923768044+00:00,Fpz-M1,W,0.998000,0,3.211455e-09,0.670065,...,0,1,0,-52.117937,-0.717937,5.382063,411.501271,,,0.999200
3,W,0.997628,3,2024-11-01 21:33:52.923768044+00:00,Fpz-M1,W,0.997628,0,2.587089e-09,0.642071,...,0,1,0,-51.617937,-0.217937,5.882063,411.001271,,,0.999200
4,W,0.998508,4,2024-11-01 21:34:22.923768044+00:00,Fpz-M1,W,0.998508,0,2.020690e-09,0.616458,...,0,1,0,-51.117937,0.282063,6.382063,410.501271,,,0.999300
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1021,W,0.938725,1021,2024-11-02 06:02:52.923768044+00:00,Fpz-M1,W,0.938725,0,3.007278e-10,0.024891,...,0,0,0,457.382063,508.782063,514.882063,-97.998729,,,
1022,W,0.990655,1022,2024-11-02 06:03:22.923768044+00:00,Fpz-M1,W,0.990655,0,6.342755e-10,0.029997,...,0,0,0,457.882063,509.282063,515.382063,-98.498729,,,
1023,W,0.986483,1023,2024-11-02 06:03:52.923768044+00:00,Fpz-M1,W,0.986483,0,6.380328e-10,0.034665,...,0,0,0,458.382063,509.782063,515.882063,-98.998729,,,
1024,W,0.982666,1024,2024-11-02 06:04:22.923768044+00:00,Fpz-M1,W,0.982666,0,5.229804e-10,0.036512,...,0,0,0,458.882063,510.282063,516.382063,-99.498729,,,


In [15]:
errors = []

for root, dirs, files in os.walk(input_dir):
    for idx, dir_name in enumerate(dirs):
        input_file = os.path.join(root, dir_name, "raw.fif")
        try:
            log("Processing file: " + input_file)
            input_file_without_ext = os.path.splitext(input_file)[0]

            if dir_name in skip_list:
                log(f"Skipping {idx} of {len(dirs)}: " + input_file)
                continue

            if os.path.exists(input_file):
                yasa_df = run_feature_pipeline.cached_pipeline(log, input_file, stats_df)
                post_human_df = cached_post_human_pipeline(log, dir_name, input_file, stats_df, days_data, yasa_df, eeg_state_events)

                output_csv_file = input_file_without_ext + ".post_human.csv"
                log("Saving to: " + output_csv_file)
                post_human_df.to_csv(output_csv_file, index=False)

        except Exception as e:
            log("Error processing file: " + input_file)
            errors.append("Error processing file: " + input_file + " - " + str(e))
            log(e)

for err in errors:
    log(err)


2024-11-11 14:21:39,024 - INFO - Processing file: C:\dev\play\brainwave-data\08-07-2024--22-51-16\raw.fif
2024-11-11 14:21:39,024 - INFO - Processing file: C:\dev\play\brainwave-data\09-07-2024--22-52-25\raw.fif
2024-11-11 14:21:39,024 - INFO - Processing file: C:\dev\play\brainwave-data\2024-07-08-22-51-16\raw.fif
2024-11-11 14:21:39,031 - INFO - Loading cached file C:\dev\play\brainwave-data\2024-07-08-22-51-16\raw.with_features.csv
2024-11-11 14:21:39,070 - INFO - Loading cached file C:\dev\play\brainwave-data\2024-07-08-22-51-16\raw.post_human.csv
2024-11-11 14:21:39,111 - INFO - Saving to: C:\dev\play\brainwave-data\2024-07-08-22-51-16\raw.post_human.csv
2024-11-11 14:21:39,289 - INFO - Processing file: C:\dev\play\brainwave-data\2024-07-11-22-46-18\raw.fif
2024-11-11 14:21:39,289 - INFO - Loading cached file C:\dev\play\brainwave-data\2024-07-11-22-46-18\raw.with_features.csv
2024-11-11 14:21:39,400 - INFO - Loading cached file C:\dev\play\brainwave-data\2024-07-11-22-46-18\raw.p

# Upload to GCS

In [None]:
from upload import upload_dir_to_gcs_skipping_existing
import os

errors = []
dataframes = []

for root, dirs, files in os.walk(input_dir):
    for dir_name in reversed(dirs):
        input_file = os.path.join(root, dir_name, "raw.fif")
        full_dir_name = os.path.join(root, dir_name)
        try:
            upload_dir_to_gcs_skipping_existing(log, 'examined-life-derived-eeg', full_dir_name, dir_name)
        except Exception as e:
            log("Error processing file: " + input_dir)
            errors.append("Error processing file: " + input_file + " - " + str(e))
            log(e)

for error in errors:
    log(error)

log("All uploaded")

# Check if can delete Brainwave files that are safely backed up

In [None]:

import os

errors = []
can_delete = []
cannot_delete = []

for root, dirs, files in os.walk(input_dir):
    for idx, file_name in enumerate(files):
        full_input_filename = os.path.join(root, file_name)
        if full_input_filename.endswith(".brainflow.csv"):
            compressed_full_output_filename = get_brainflow_compressed_filename(full_input_filename)
                
            if os.path.exists(compressed_full_output_filename):
                can_delete.append({
                    'backed_up': compressed_full_output_filename,
                    'full_filename': full_input_filename
                })
            else:
                cannot_delete.append(full_input_filename)
    

In [None]:
can_delete_df = pd.DataFrame(can_delete)
can_delete_df

In [None]:
cannot_delete

In [None]:
can_delete_df['full_filename']

# Check if can delete Cyton files that are safely backed up

In [None]:
import webserver
import bz2
import shutil
import os
import time


errors = []
can_delete = []
cannot_delete = []

for root, dirs, files in os.walk(input_dir):
    for idx, file_name in enumerate(files):
        full_input_filename = os.path.join(root, file_name)
        if file_name.startswith("OBCI_") and file_name.endswith(".TXT.bz2"):
            can_delete.append({
                'file_name': file_name.removesuffix(".bz2"),
                'full_filename': full_input_filename
            })

In [None]:
can_delete

In [None]:
cyton_file_locations = [input_dir, "d:", "e:", "x:"]

for cyton_file_location in cyton_file_locations:
    for root, dirs, files in os.walk(cyton_file_location):
        for idx, file in enumerate(files):
            matching_record = next((f for f in can_delete if f['file_name'] == file), None)
            if matching_record:
                full_filename = os.path.join(root, file)
                log(f"Could delete {full_filename} as backed up in {matching_record['full_filename']}")

In [None]:
import ctypes

ctypes.windll.user32.MessageBoxW(0, "Compression is complete!", "Alert", 0x40 | 0x1)