# Convert raw Cyton files

In [1]:
from typing_extensions import List
%reload_ext autoreload
%autoreload 2
import logging
import os
log = lambda msg: logging.info(msg)
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

In [2]:
# input_file = "C:\\dev\\play\\brainwave-data\\OBCI_1A.TXT"
output_dir = "C:\\dev\\play\\brainwave-data"
input_file = "D:\\OBCI_22.TXT"
channels = {'Fpz':0}
# grep "Starting recording to SD\|Starting stream" output*.txt
logs = """
output-20240926121707.txt:2024-09-30 21:24:50,925 - INFO - Starting recording to SD
output-20240926121707.txt:2024-09-30 21:24:52,205 - INFO - Starting stream
"""
input_file_without_ext = os.path.splitext(input_file)[0]

In [3]:
import pytz
import re
from datetime import datetime, timedelta

log_line1 = logs.strip().splitlines()[0]
log_line2 = logs.strip().splitlines()[1]

print(log_line1)

# Regular expression to extract timestamps
timestamp_pattern = r'\[?(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}[.,]\d{3})\]?'

# Extract timestamps
timestamp1_str = re.search(timestamp_pattern, log_line1).group(1)
timestamp2_str = re.search(timestamp_pattern, log_line2).group(1)

# Parse timestamps
timestamp1 = datetime.strptime(timestamp1_str, '%Y-%m-%d %H:%M:%S,%f')
timestamp2 = datetime.strptime(timestamp2_str, '%Y-%m-%d %H:%M:%S,%f')

# Calculate the halfway point
halfway_point = pytz.timezone('Europe/London').localize(timestamp1 + (timestamp2 - timestamp1) / 2)

print("Timestamp 1:", timestamp1)
print("Timestamp 2:", timestamp2)
print("Halfway point:", halfway_point)

output-20240926121707.txt:2024-09-30 21:24:50,925 - INFO - Starting recording to SD
Timestamp 1: 2024-09-30 21:24:50.925000
Timestamp 2: 2024-09-30 21:24:52.205000
Halfway point: 2024-09-30 21:24:51.565000+01:00


In [4]:
#2024-10-02-21-21-15.brainflow.csv
halfway_point = datetime(2024, 10, 4, 21, 13, 39)

In [5]:
# This is adapted from https://github.com/roflecopter/openbci-session/tree/main
# Full credit to the original author, and this file is under the MIT license.
#
# MIT License
#
# Copyright (c) 2023 roflecopter
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

#!pip install pyEDFlib

import numpy as np
import scipy as sp
import datetime
import math
import os
import json
import csv
import pandas as pd
import sqlite3
import re

from pyedflib import highlevel
from contextlib import closing
from scipy.interpolate import interp1d, CubicSpline, PchipInterpolator, Akima1DInterpolator


# Max digital value that can be represented by 24 bits
ADS1299_BITS = (2**23-1)
# Default gain, e.g. what the ADS1299 multiples its input signal by
ADS1299_GAIN = 24
V_Factor = 1000000 # Converts volts to uV
# ADS1299_VREF is reference voltage for the ADS1299
def adc_v_bci(signal, ADS1299_VREF = 4.5):
    k = ADS1299_VREF / ADS1299_BITS / ADS1299_GAIN * V_Factor
    return signal * k

def interpret24bitAsInt32(hex_str):
    if len(hex_str) == 6:
        # Convert the hex string to a byte array
        byte_array = bytes.fromhex(hex_str)

        # Convert the bytes to a 24-bit integer
        new_int = (byte_array[0] << 16) | (byte_array[1] << 8) | byte_array[2]

        #print(f"Initial 24-bit integer: {new_int:#08x} (hex), {bin(new_int)} (binary) {new_int} (int)")

        # Check if the 24th bit is set (negative number in 2's complement)
        if new_int & 0x00800000:
            new_int |= 0xFF000000  # Extend the sign bit to 32 bits
        else:
            new_int &= 0x00FFFFFF  # Ensure the number is positive

        #print(f"After sign extension/positive check: {new_int:#08x} (hex), {bin(new_int)} (binary) {new_int} (int)")

        # Adjust for Python's handling of integers larger than 32 bits
        if new_int & 0x80000000:
            new_int -= 0x100000000

        #print(f"Final adjusted integer: {new_int:#08x} (hex), {bin(new_int)} (binary) {new_int} (int)")
        return new_int
    else:
        return np.nan

# def interpret16bitAsInt32(hex_str):
#     if len(hex_str) == 4:
#         # Convert the hex string to a byte array
#         byte_array = bytes.fromhex(hex_str)
# 
#         # Convert the bytes to a 16-bit integer
#         new_int = (byte_array[0] << 8) | byte_array[1]
# 
#         # Check if the 16th bit is set (negative number in 2's complement)
#         if new_int & 0x00008000:
#             new_int |= 0xFFFF0000  # Extend the sign bit to 32 bits
#         else:
#             new_int &= 0x0000FFFF  # Ensure the number is positive
# 
#         # Adjust for Python's handling of integers larger than 32 bits
#         if new_int & 0x80000000:
#             new_int -= 0x100000000
# 
#         return new_int
#     else:
#         return np.nan


def process_line(split_line, n_ch, n_acc):
    values_array = []
    for i in range(1, 1 + n_ch):
        value = split_line[i]
        value = interpret24bitAsInt32(value)
        values_array.append(value)
    return values_array

def process_file(file_path, n_ch = 8, n_acc = 3, sf = 250, verbose=False):
    with open(file_path, 'r') as file:
        file = open(file_path, 'r')
        result = []
        i = 0
        stops_n = 0
        stops = []
        stops_at = []
        while True:
        # while i < 1_000_000: # True
            line = file.readline()
        
            if (i < 3):
                print(line)
        
            # if (i == 0) and (len(line) > 30):
            #     print(f'File seems to be corrupted on line {i}, line len {len(line)}')
            #     break  # End of file
            if not line:
                print(f'EOF, no line at {i}')
                break  # End of file
            split_line = line.strip().split(',')
            if split_line[0].startswith('%Total time'):
                print(f'recording full at {i} / {line}')
                break # SD recording complete
            if len(split_line) == 1 and split_line[0].startswith('%'):
                stops_n += 1
                stops.append(i)
            elif len(split_line) == 1 and not split_line[0].startswith('%'):
                if stops[-1] == i - 1:
                    print(f'stopped at {i} / {line}')
                    stops_at.append(interpret24bitAsInt32('00' + line))
            elif (len(split_line) > 3) and (len(split_line) <= 8 + n_acc + 1):
                values = process_line(split_line, n_ch, n_acc)
                result.append(values)
            i += 1
            if i % (sf*60)== 0:
                if verbose:
                    print(f"Processing... {i/(sf*60)}m, n_samples: {len(result)}, last:{result[-1]}")
            if i % (sf*600)== 0:
                print(f'Processing... {round(i/(sf*60))}m, n_samples: {len(result)}, last:{result[-1]}')
        return result, stops, stops_at

def obci_bdf(bci_signals, sf, channels, user, gender, dts, birthday, gain, electrode = 'Gold Cup OpenBCI, Ten20', activity="sleep", device="Cyton"):
    header = highlevel.make_header(patientname=user, gender=gender, equipment=device + ', ' + activity,
                                   startdate = dts, birthdate = datetime.datetime.strptime(birthday, '%Y_%m_%d-%H_%M_%S'))
    total_samples = math.floor(len(bci_signals) / sf)
    signals = []; signal_headers = []
    bci_signals = np.array(bci_signals)
    for channel in channels:
        ch_i = channels[channel]
        channel_data = bci_signals[:,ch_i]
        channel_data = channel_data[range(0,total_samples*sf)]
        if re.search('ACC',channel) is  None:
            # EEG
            # https://openbci.com/forum/index.php?p=/discussion/comment/8122
            ch_dig_min = -8388608; ch_dig_max = 8388607
            ch_ph_min = -187500; ch_ph_max = 187500
            channel_data = np.vectorize(adc_v_bci)(channel_data)
            channel_data[channel_data > ch_ph_max] = ch_ph_max
            channel_data[channel_data < ch_ph_min] = ch_ph_min
            signals.append(channel_data)
            signal_headers.append({"label": channel, "dimension": "uV", "sample_rate": sf, "sample_frequency": sf, 'physical_max': ch_ph_max, 'physical_min': ch_ph_min, 'digital_max': ch_dig_max, 'digital_min': ch_dig_min, 'transducer': electrode, 'prefilter': ''})
        processed = len(channel_data)
    return([header, signal_headers, signals, processed])



In [6]:
bci_signals, stops, stops_at = process_file(input_file, n_ch=len(channels), n_acc=3, sf=250, verbose=True)


%STOP AT

00014729

stopped at 1 / 00014729

00,EEAA16,000000,000000,000000,000000,000000,000000,800000,F6B0,0030,0C40

Processing... 1.0m, n_samples: 14998, last:[-903947]
Processing... 2.0m, n_samples: 29998, last:[-730832]
Processing... 3.0m, n_samples: 44998, last:[-588967]
Processing... 4.0m, n_samples: 59998, last:[-470472]
Processing... 5.0m, n_samples: 74998, last:[-366686]
Processing... 6.0m, n_samples: 89998, last:[-285250]
Processing... 7.0m, n_samples: 104998, last:[-224066]
Processing... 8.0m, n_samples: 119998, last:[-159524]
Processing... 9.0m, n_samples: 134998, last:[-98851]
Processing... 10.0m, n_samples: 149998, last:[-53152]
Processing... 10m, n_samples: 149998, last:[-53152]
Processing... 11.0m, n_samples: 164998, last:[-25559]
Processing... 12.0m, n_samples: 179998, last:[17733]
Processing... 13.0m, n_samples: 194998, last:[9543]
Processing... 14.0m, n_samples: 209998, last:[52743]
Processing... 15.0m, n_samples: 224998, last:[70501]
Processing... 16.0m, n_samples

In [7]:
mins_of_data = len(bci_signals) / 250 / 60
mins_of_data

542.0918

In [8]:
    
def counts_to_volts(count):
    # 24-bit ADC, 4.5 V reference, 24x gain
    return count * 4.5 / 2**23 / 24

def counts_to_microvolts(count):
    return counts_to_volts(count) * 1_000_000

import mne
# Convert raw ADC values to microvolts
# bci_signals_copy = np.vectorize(adc_v_bci)(bci_signals) * 15686 / 8388607

# Alternatively, scale the raw signals directly
# bci_signals_copy2 = bci_signals * 0.001869917   

# Ensure bci_signals_copy2 is used consistently in the rest of the code
# For example, if you need to use bci_signals_copy2 for further processing:
# scaled = bci_signals_copy2 / 1_000_000_000  # Convert to volts if neededimport mne
import numpy as np

# Assuming bci_signals is a list of lists with shape (n_samples, n_channels)
# and channels is a dictionary with channel names and their indices

# Convert bci_signals to a numpy array

# https://openbci.com/forum/index.php?p=/discussion/comment/8122
# bci_signals_copy = np.vectorize(adc_v_bci)(bci_signals) * 15686 / 8388607
# bci_signals_copy2 = bci_signals * 0.001869917
# bci_signals_copy3 = bci_signals * 15686 / 8388607
# bci_signals_copy4 = np.array(bci_signals) * 0.02235
# random_ass_scale_factor = 1000
bci_signals_copy5 = np.vectorize(counts_to_volts)(bci_signals)# / random_ass_scale_factor
#in_micro_volts = np.vectorize(counts_to_microvolts)(bci_signals) / random_ass_scale_factor

# bci_signals_copy = np.array(bci_signals)

# Define the sampling frequency
sfreq = 250  # Hz

# Create info structure
info = mne.create_info(ch_names=list(channels.keys()), sfreq=sfreq, ch_types='eeg')

# Transpose bci_signals to match MNE's expected shape (n_channels, n_times)
bci_signals_copy = bci_signals_copy5.T

# Brainflow Cyton data in uV, MNE expects V
# scaled = bci_signals_copy / 1_000_000_000
scaled = bci_signals_copy



In [9]:
from datetime import timezone

halfway_point_utc = halfway_point.astimezone(pytz.utc).replace(tzinfo=timezone.utc)
info.set_meas_date(halfway_point_utc)

# Create Raw object
raw = mne.io.RawArray(scaled, info)

# raw.filter(0.3, 35) 
# raw.notch_filter(freqs=[50,100])

# Print the Raw object to verify
# print(raw)

Creating RawArray with float64 data, n_channels=1, n_times=8131377
    Range : 0 ... 8131376 =      0.000 ... 32525.504 secs
Ready.


In [10]:
halfway_time_str = halfway_point.strftime('%Y-%m-%d-%H-%M-%S')

# Create the directory
directory_path = os.path.join(output_dir, halfway_time_str)
os.makedirs(directory_path, exist_ok=True)
raw.save(directory_path + os.path.sep + 'raw.fif', overwrite=True)

Writing C:\dev\play\brainwave-data\2024-10-04-21-13-39\raw.fif
Closing C:\dev\play\brainwave-data\2024-10-04-21-13-39\raw.fif
[done]


# Look for matching Brainflow data files to backup

In [None]:
from tqdm import tqdm
import paramiko
import re
from datetime import datetime, timedelta
import os
import dotenv
dotenv.load_dotenv()

# Define the time window
time_window = timedelta(minutes=10)

# Define the remote server details
hostname = os.getenv('SSH_HOST')
username = os.getenv('SSH_USERNAME')
password = os.getenv('SSH_PASSWORD')
remote_dir = '/home/graham/dev/Brainwave-Data'
local_dir = directory_path

# Create an SSH client
ssh = paramiko.SSHClient()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
ssh.connect(hostname, username=username, password=password, compress=True)

# List files in the remote directory
stdin, stdout, stderr = ssh.exec_command(f'ls {remote_dir}')
files = stdout.read().decode().splitlines()
log(files)

# Regular expression to match the file pattern
file_pattern = re.compile(r'(\d{4}-\d{2}-\d{2}-\d{2}-\d{2}-\d{2})\.brainflow\.csv')

# Filter files within the time window
filtered_files = []
for file in files:
    match = file_pattern.match(file)
    if match:
        file_time_str = match.group(1)
        file_time = datetime.strptime(file_time_str, '%Y-%m-%d-%H-%M-%S')
        if abs(file_time - halfway_point) <= time_window:
            filtered_files.append(file)

# Copy the filtered files to the local directory
import os
import bz2
from tqdm import tqdm

sftp = ssh.open_sftp()
for file in filtered_files:
    remote_file_path = remote_dir + "/" + file
    local_file_path = os.path.join(directory_path, file + ".bz2")
    log(f"Copying Brainflow backup {remote_file_path} to {local_file_path}")

    # Get the file size
    remote_file_size = sftp.stat(remote_file_path).st_size

    # Check if the local compressed file exists
    if os.path.exists(local_file_path):
        log(f"Skipping {local_file_path}, compressed file already exists.")
        continue

    # Initialize the progress bar
    with tqdm(total=remote_file_size, unit='B', unit_scale=True, desc=file, ascii=True) as pbar:
        with bz2.open(local_file_path, 'wb') as bz2_file:
            def callback(transferred_so_far, total_to_transfer):
                pbar.update(transferred_so_far - pbar.n)

            # Create a file-like object that writes to the bz2 file
            class Bz2Writer:
                def write(self, data):
                    bz2_file.write(data)
                    return len(data)

            sftp.getfo(remote_file_path, Bz2Writer(), callback=callback)

# Close the connections
sftp.close()
ssh.close()

2024-10-05 10:35:58,890 - INFO - Connected (version 2.0, client OpenSSH_9.6)
2024-10-05 10:35:59,074 - INFO - Authentication (publickey) failed.
2024-10-05 10:35:59,164 - INFO - Authentication (password) successful!
2024-10-05 10:35:59,547 - INFO - ['2024-09-30-21-27-49.brainflow.csv', '2024-10-01-21-36-11.brainflow.csv', '2024-10-02-21-20-34.brainflow.csv', '2024-10-02-21-21-15.brainflow.csv', '2024-10-04-21-13-39.brainflow.csv', 'output-20241003182702.txt']
2024-10-05 10:35:59,603 - INFO - [chan 1] Opened sftp connection (server version 3)
2024-10-05 10:35:59,604 - INFO - Copying Brainflow backup /home/graham/dev/Brainwave-Data/2024-10-04-21-13-39.brainflow.csv to C:\dev\play\brainwave-data\2024-10-04-21-13-39\2024-10-04-21-13-39.brainflow.csv.bz2
2024-10-04-21-13-39.brainflow.csv:   5%|4         | 88.3M/1.95G [00:13<04:29, 6.94MB/s]

# Compress the raw data file

In [33]:
import bz2
import shutil
import os
import time
results = {}

import gzip
import bz2
import lzma
# import zstandard as zstd
import shutil
import os
import time

def compress_bz2(input_file, output_file):
    start_time = time.time()
    with open(input_file, 'rb') as f_in:
        with bz2.open(output_file, 'wb', compresslevel=9) as f_out:
            shutil.copyfileobj(f_in, f_out)
    end_time = time.time()
    return end_time - start_time, os.path.getsize(output_file)


# N.b. after comparing the compression ratios, bz2 is the best choice for space vs time.

bz_output_file = os.path.join(directory_path, os.path.basename(input_file) + '.bz2')
log("Writing to " + bz_output_file)

results['bz2'] = compress_bz2(input_file, bz_output_file)

for algo, (time_taken, size) in results.items():
    print(f"{algo.upper()} - Time taken: {time_taken:.2f} seconds, Compressed size: {size} bytes")

2024-10-03 17:43:37,437 - INFO - Writing to C:\dev\play\brainwave-data\2024-10-01-21-36-11\OBCI_1F.TXT.bz2


BZ2 - Time taken: 50.69 seconds, Compressed size: 31743438 bytes
