# Discover correct sample rate

In [2]:
import numpy as np
import pandas as pd
import logging
import os
import sys

root_dir = os.path.abspath(os.path.join(os.path.dirname('__file__'), '../..'))
sys.path.append(root_dir)



In [3]:
import sleep_events
db = sleep_events.connect_to_firebase()

In [4]:
docs = db.collection('eegEvents').stream()
records = [doc.to_dict() for doc in docs]
ha_events = pd.DataFrame(records)


In [5]:
ha_events.head()

Unnamed: 0,type,timestampCreated,timestampFor,dayAndNightOf,timestampUpdated
0,brainflow_recording_start_attempted,2024-10-03 17:26:28.581000+00:00,2024-10-03 17:26:27+00:00,2024-10-03,2024-10-03 17:26:28.581000+00:00
1,brainflow_recording_start_failed,2024-10-03 17:26:28.586000+00:00,2024-10-03 17:26:28+00:00,2024-10-03,2024-10-03 17:26:28.586000+00:00
2,brainflow_recording_start_attempted,2024-10-03 17:28:17.710000+00:00,2024-10-03 17:28:16+00:00,2024-10-03,2024-10-03 17:28:17.710000+00:00
3,brainflow_recording_start_failed,2024-10-03 17:28:17.713000+00:00,2024-10-03 17:28:17+00:00,2024-10-03,2024-10-03 17:28:17.713000+00:00
4,brainflow_recording_start_attempted,2024-10-05 20:09:49.652000+00:00,2024-10-05 20:09:36+00:00,2024-10-05,2024-10-05 20:09:49.652000+00:00


In [6]:
ha_events['type'].value_counts()


type
brainflow_recording_start_attempted    164
brainflow_recording_pre_file_start     139
brainflow_recording_post_file_start    139
brainflow_recording_file_stop          126
brainflow_recording_start_failed        24
Name: count, dtype: int64

In [7]:
# ... existing code ...
stop_events = ha_events[ha_events['type'] == 'brainflow_recording_file_stop']
start_events = ha_events[ha_events['type'] == 'brainflow_recording_post_file_start']

grouped = pd.DataFrame()
for day in stop_events['dayAndNightOf'].unique():
    day_stops = stop_events[stop_events['dayAndNightOf'] == day]
    day_starts = start_events[start_events['dayAndNightOf'] == day]
    
    last_stop = day_stops['timestampFor'].max() if not day_stops.empty else None
    last_start = day_starts['timestampFor'].max() if not day_starts.empty else None
    
    grouped = pd.concat([grouped, pd.DataFrame({
        'dayAndNightOf': [day],
        'RecordedStop': [last_stop],
        'RecordedStart': [last_start]
    })], ignore_index=True)

grouped['dayAndNightOf'] = grouped['dayAndNightOf'].astype(str)

grouped.head()
# ... existing code ...

  grouped = pd.concat([grouped, pd.DataFrame({


Unnamed: 0,dayAndNightOf,RecordedStop,RecordedStart
0,2024-10-06,2024-10-07 05:30:58+00:00,2024-10-06 20:29:18+00:00
1,2024-10-07,2024-10-08 04:42:13+00:00,2024-10-07 20:35:18+00:00
2,2024-10-08,2024-10-09 04:31:16+00:00,2024-10-08 20:21:34+00:00
3,2024-10-09,2024-10-10 06:18:35+00:00,2024-10-09 20:22:04+00:00
4,2024-10-11,2024-10-12 08:07:17+00:00,2024-10-11 20:16:27+00:00


In [23]:
import convert
from datetime import timedelta, datetime
from notebooks.Util.DayAndNightOfFinder import day_and_night_of_dir

input_dir = "C:\\dev\\play\\brainwave-data"

df = pd.DataFrame(columns=['DayAndNightOf', 'EEGStart', 'EEGStop', 'Samples'])

for day in list(grouped['dayAndNightOf'])[-1:]:
    try:
        dir, _ = day_and_night_of_dir(input_dir, day)
        input_file = os.path.join(dir, "raw.fif")
        raw, _, _ = convert.load_mne_file(print, input_file)
        start_date = raw.info['meas_date']
        duration = timedelta(seconds=float(raw.times[-1]))
        samples = raw.get_data().shape[1]
        end_date = start_date + duration
        print(f"file {input_file} Start date: {start_date}, End date: {end_date}, Samples: {samples}")
        df = pd.concat([df, pd.DataFrame({'DayAndNightOf': [day], 'EEGStart': [start_date], 'EEGStop': [end_date], 'Samples': [samples]})], ignore_index=True)
    except Exception as e:
        print(f"Error loading file {input_file}: {e}")

df


Reading file C:\dev\play\brainwave-data\2025-03-20-21-22-08\raw.fif
Opening raw data file C:\dev\play\brainwave-data\2025-03-20-21-22-08\raw.fif...
Isotrak not found
    Range : 0 ... 7979121 =      0.000 ... 31916.484 secs
Ready.
Reading 0 ... 7979121  =      0.000 ... 31916.484 secs...
Finished reading file C:\dev\play\brainwave-data\2025-03-20-21-22-08\raw.fif
file C:\dev\play\brainwave-data\2025-03-20-21-22-08\raw.fif Start date: 2025-03-20 21:22:08.886214+00:00, End date: 2025-03-21 06:14:05.370214+00:00, Samples: 7979122


  df = pd.concat([df, pd.DataFrame({'DayAndNightOf': [day], 'EEGStart': [start_date], 'EEGStop': [end_date], 'Samples': [samples]})], ignore_index=True)


Unnamed: 0,DayAndNightOf,EEGStart,EEGStop,Samples
0,2025-03-20,2025-03-20 21:22:08.886214+00:00,2025-03-21 06:14:05.370214+00:00,7979122


In [22]:
df['DayAndNightOf'] = df['DayAndNightOf'].astype(str)

# Then use merge instead of join with the correct column names

joined = pd.merge(df, grouped, left_on='DayAndNightOf', right_on='dayAndNightOf', how='left')
joined['BetweenRecordings'] = (joined['RecordedStop'] - joined['RecordedStart']).abs()
joined['StopDiff'] = (joined['RecordedStop'] - joined['EEGStop']).abs()
joined['StartDiff'] = (joined['RecordedStart'] - joined['EEGStart']).abs()
joined['SampleRate'] = joined['Samples'] / joined['BetweenRecordings'].dt.total_seconds()
joined



Unnamed: 0,DayAndNightOf,EEGStart,EEGStop,Samples,dayAndNightOf,RecordedStop,RecordedStart,BetweenRecordings,StopDiff,StartDiff,SampleRate
0,2025-03-18,2025-03-18 21:40:28.301187+00:00,2025-03-19 06:38:51.681187+00:00,8075846,2025-03-18,2025-03-19 06:37:56+00:00,2025-03-18 21:40:28+00:00,0 days 08:57:28,0 days 00:00:55.681187,0 days 00:00:00.301187,250.42936
1,2025-03-19,2025-03-19 21:45:19.286570+00:00,2025-03-20 06:10:13.582570+00:00,7573575,2025-03-19,2025-03-20 06:09:25+00:00,2025-03-19 21:45:19+00:00,0 days 08:24:06,0 days 00:00:48.582570,0 days 00:00:00.286570,250.399226
2,2025-03-20,2025-03-20 21:22:08.886214+00:00,2025-03-21 06:14:05.370214+00:00,7979122,2025-03-20,2025-03-21 06:13:12+00:00,2025-03-20 21:22:08+00:00,0 days 08:51:04,0 days 00:00:53.370214,0 days 00:00:00.886214,250.411813


In [20]:
# ... existing code ...
# Calculate Q1, Q3, and IQR for SampleRate
Q1 = joined['SampleRate'].quantile(0.1)
Q3 = joined['SampleRate'].quantile(0.9)
IQR = Q3 - Q1

# Define bounds for outliers
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR

# Filter out the outliers
filtered_sample_rate = joined['SampleRate'][(joined['SampleRate'] >= lower_bound) & 
                                           (joined['SampleRate'] <= upper_bound)]

display(filtered_sample_rate)
# Calculate mean without outliers
mean_sample_rate = filtered_sample_rate.mean()
mean_sample_rate
# ... existing code ...

0      250.41636
1     250.428094
2     250.468735
4     250.468796
5     250.483321
6     250.437843
7     250.438039
8     250.412954
9     250.274351
10    250.438405
11    250.439659
12    250.436267
13    250.412936
14    250.404737
15    250.284372
16     250.42936
17    250.399226
18    250.411813
Name: SampleRate, dtype: object

250.4158482144943

In [None]:
# This rate will be used from  night of 20th March 2025 onwards.


# Test new rate

In [27]:
import convert
from importlib import reload
reload(convert)

full_input_filename = "C:\\dev\\play\\brainwave-data\\2025-03-20-21-22-08.brainflow.csv"
channels = ['Fpz-M1']
convert.convert_and_save_brainflow_file_with_gap_filling(print, full_input_filename, "C:\\dev\\play\\brainwave-data\\2025-03-20\\raw.250.41.fif", channels, 250.4158482144943)
convert.convert_and_save_brainflow_file_with_gap_filling(print, full_input_filename, "C:\\dev\\play\\brainwave-data\\2025-03-20\\raw.250.fif", channels, 250)

Memory Usage: 580.49 MB GC to 580.49 MB
Reading Brainflow file C:\dev\play\brainwave-data\2025-03-20-21-22-08.brainflow.csv
Finished reading Brainflow file
Memory Usage: 1999.99 MB GC to 1999.99 MB
EEG channels: [1, 2, 3, 4, 5, 6, 7, 8]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  idx_and_eeg_channels_and_timestamp['datetime'] = pd.to_datetime(idx_and_eeg_channels_and_timestamp['timestamp'],unit="s").dt.tz_localize('UTC')
Processing rows: 100%|██████████| 7936687/7936687 [00:06<00:00, 1322238.47it/s]


Gaps:  datetime
False    7931224
True          27
Name: count, dtype: int64


Processing rows:   1%|          | 55606/7931250 [00:00<01:18, 100396.94it/s]

Gap 36394 start time:  2025-03-20 21:24:34.284627914+00:00
Gap 36394 end time:  2025-03-20 21:24:35.209925890+00:00
Gap 36883 start time:  2025-03-20 21:24:37.253129959+00:00
Gap 36883 end time:  2025-03-20 21:24:38.136048079+00:00


Processing rows:   7%|▋         | 590148/7931250 [00:05<01:16, 96090.48it/s] 

Gap 578640 start time:  2025-03-20 22:00:41.503294945+00:00
Gap 578640 end time:  2025-03-20 22:00:43.636815071+00:00
Gap 578642 start time:  2025-03-20 22:00:43.636892080+00:00
Gap 578642 end time:  2025-03-20 22:00:44.421345949+00:00
Gap 578644 start time:  2025-03-20 22:00:44.443382025+00:00
Gap 578644 end time:  2025-03-20 22:00:47.545509100+00:00
Gap 578788 start time:  2025-03-20 22:00:48.183696032+00:00
Gap 578788 end time:  2025-03-20 22:01:26.058260918+00:00
Gap 578789 start time:  2025-03-20 22:01:26.058260918+00:00
Gap 578789 end time:  2025-03-20 22:01:27.061367989+00:00
Gap 578790 start time:  2025-03-20 22:01:27.061367989+00:00
Gap 578790 end time:  2025-03-20 22:01:30.143058062+00:00
Gap 578808 start time:  2025-03-20 22:01:30.281250954+00:00
Gap 578808 end time:  2025-03-20 22:01:31.081108093+00:00
Gap 580957 start time:  2025-03-20 22:01:39.872884035+00:00
Gap 580957 end time:  2025-03-20 22:01:40.780877113+00:00


Processing rows:  14%|█▎        | 1075379/7931250 [00:09<00:52, 131669.78it/s]

Gap 1053301 start time:  2025-03-20 22:33:06.818521023+00:00
Gap 1053301 end time:  2025-03-20 22:33:07.838494062+00:00


Processing rows:  16%|█▌        | 1274363/7931250 [00:10<01:04, 103257.10it/s]

Gap 1259671 start time:  2025-03-20 22:46:51.836566925+00:00
Gap 1259671 end time:  2025-03-20 22:46:52.856838942+00:00


Processing rows:  28%|██▊       | 2248770/7931250 [00:19<01:30, 62627.08it/s] 

Gap 2246593 start time:  2025-03-20 23:52:33.657413960+00:00
Gap 2246593 end time:  2025-03-20 23:52:34.564877987+00:00
Gap 2246663 start time:  2025-03-20 23:52:34.993335962+00:00
Gap 2246663 end time:  2025-03-20 23:52:35.911988974+00:00
Gap 2246718 start time:  2025-03-20 23:52:36.247479916+00:00
Gap 2246718 end time:  2025-03-20 23:52:37.103400946+00:00
Gap 2246732 start time:  2025-03-20 23:52:37.475651026+00:00
Gap 2246732 end time:  2025-03-20 23:53:29.751147985+00:00
Gap 2246733 start time:  2025-03-20 23:53:29.751147985+00:00
Gap 2246733 end time:  2025-03-20 23:53:34.398108006+00:00
Gap 2247507 start time:  2025-03-20 23:53:37.626188993+00:00
Gap 2247507 end time:  2025-03-20 23:53:38.498730898+00:00


Processing rows:  40%|████      | 3207506/7931250 [00:27<00:42, 110833.19it/s]

Gap 3188748 start time:  2025-03-21 00:56:16.739051104+00:00
Gap 3188748 end time:  2025-03-21 00:56:17.760585070+00:00


Processing rows:  55%|█████▌    | 4389133/7931250 [00:37<00:28, 124583.90it/s]

Gap 4376416 start time:  2025-03-21 02:15:20.257642031+00:00
Gap 4376416 end time:  2025-03-21 02:15:21.202254057+00:00
Gap 4382297 start time:  2025-03-21 02:15:44.751677036+00:00
Gap 4382297 end time:  2025-03-21 02:15:45.747037888+00:00


Processing rows:  67%|██████▋   | 5350325/7931250 [00:45<00:23, 110076.12it/s]

Gap 5339187 start time:  2025-03-21 03:19:26.667718887+00:00
Gap 5339187 end time:  2025-03-21 03:19:27.720694065+00:00


Processing rows:  81%|████████  | 6428309/7931250 [00:54<00:14, 106352.64it/s]

Gap 6414472 start time:  2025-03-21 04:31:01.626135111+00:00
Gap 6414472 end time:  2025-03-21 04:31:02.645756960+00:00


Processing rows:  95%|█████████▍| 7504832/7931250 [01:04<00:04, 97346.44it/s] 

Gap 7489734 start time:  2025-03-21 05:42:36.550658941+00:00
Gap 7489734 end time:  2025-03-21 05:42:37.632085085+00:00


Processing rows:  99%|█████████▉| 7854878/7931250 [01:07<00:00, 111714.11it/s]

Gap 7860464 start time:  2025-03-21 06:07:18.242873907+00:00
Gap 7860464 end time:  2025-03-21 06:08:24.651217937+00:00
Gap 7860465 start time:  2025-03-21 06:08:24.651217937+00:00
Gap 7860465 end time:  2025-03-21 06:08:28.663851023+00:00


Processing rows: 100%|█████████▉| 7892001/7931250 [01:08<00:00, 91243.28it/s] 

Gap 7860466 start time:  2025-03-21 06:08:28.663851023+00:00
Gap 7860466 end time:  2025-03-21 06:08:29.551333904+00:00


Processing rows: 100%|██████████| 7931250/7931250 [01:08<00:00, 115311.95it/s]
  copied.fillna('', inplace=True)


Initial timestamp: 2025-03-20 21:22:08.886214972+00:00 from 1742505728.886215
Creating RawArray with float64 data, n_channels=1, n_times=7979204
    Range : 0 ... 7979203 =      0.000 ... 31863.810 secs
Ready.
Info <Info | 7 non-empty values
 bads: []
 ch_names: Fpz-M1
 chs: 1 EEG
 custom_ref_applied: False
 highpass: 0.0 Hz
 lowpass: 125.2 Hz
 meas_date: 2025-03-20 21:22:08 UTC
 nchan: 1
 projs: []
 sfreq: 250.4 Hz
>
Memory Usage: 2683.81 MB GC to 2683.81 MB
Saving to C:\dev\play\brainwave-data\2025-03-20\raw.250.41.fif
Writing C:\dev\play\brainwave-data\2025-03-20\raw.250.41.fif


  log(f"Saving to {output_file}")


Closing C:\dev\play\brainwave-data\2025-03-20\raw.250.41.fif
[done]
Memory Usage: 501.29 MB GC to 501.29 MB
Reading Brainflow file C:\dev\play\brainwave-data\2025-03-20-21-22-08.brainflow.csv
Finished reading Brainflow file
Memory Usage: 1954.54 MB GC to 1954.54 MB
EEG channels: [1, 2, 3, 4, 5, 6, 7, 8]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  idx_and_eeg_channels_and_timestamp['datetime'] = pd.to_datetime(idx_and_eeg_channels_and_timestamp['timestamp'],unit="s").dt.tz_localize('UTC')
Processing rows: 100%|██████████| 7936687/7936687 [00:06<00:00, 1276325.02it/s]


Gaps:  datetime
False    7931224
True          27
Name: count, dtype: int64


Processing rows:   1%|          | 49666/7931250 [00:00<01:12, 108896.51it/s]

Gap 36394 start time:  2025-03-20 21:24:34.284627914+00:00
Gap 36394 end time:  2025-03-20 21:24:35.209925890+00:00
Gap 36883 start time:  2025-03-20 21:24:37.253129959+00:00
Gap 36883 end time:  2025-03-20 21:24:38.136048079+00:00


Processing rows:   7%|▋         | 580326/7931250 [00:05<01:29, 82501.99it/s] 

Gap 578640 start time:  2025-03-20 22:00:41.503294945+00:00
Gap 578640 end time:  2025-03-20 22:00:43.636815071+00:00
Gap 578642 start time:  2025-03-20 22:00:43.636892080+00:00
Gap 578642 end time:  2025-03-20 22:00:44.421345949+00:00
Gap 578644 start time:  2025-03-20 22:00:44.443382025+00:00
Gap 578644 end time:  2025-03-20 22:00:47.545509100+00:00
Gap 578788 start time:  2025-03-20 22:00:48.183696032+00:00
Gap 578788 end time:  2025-03-20 22:01:26.058260918+00:00
Gap 578789 start time:  2025-03-20 22:01:26.058260918+00:00
Gap 578789 end time:  2025-03-20 22:01:27.061367989+00:00
Gap 578790 start time:  2025-03-20 22:01:27.061367989+00:00
Gap 578790 end time:  2025-03-20 22:01:30.143058062+00:00
Gap 578808 start time:  2025-03-20 22:01:30.281250954+00:00
Gap 578808 end time:  2025-03-20 22:01:31.081108093+00:00
Gap 580957 start time:  2025-03-20 22:01:39.872884035+00:00
Gap 580957 end time:  2025-03-20 22:01:40.780877113+00:00


Processing rows:  14%|█▎        | 1071582/7931250 [00:09<01:04, 106214.75it/s]

Gap 1053301 start time:  2025-03-20 22:33:06.818521023+00:00
Gap 1053301 end time:  2025-03-20 22:33:07.838494062+00:00


Processing rows:  16%|█▌        | 1275945/7931250 [00:11<01:01, 109011.29it/s]

Gap 1259671 start time:  2025-03-20 22:46:51.836566925+00:00
Gap 1259671 end time:  2025-03-20 22:46:52.856838942+00:00


Processing rows:  28%|██▊       | 2254492/7931250 [00:19<01:11, 79426.54it/s] 

Gap 2246593 start time:  2025-03-20 23:52:33.657413960+00:00
Gap 2246593 end time:  2025-03-20 23:52:34.564877987+00:00
Gap 2246663 start time:  2025-03-20 23:52:34.993335962+00:00
Gap 2246663 end time:  2025-03-20 23:52:35.911988974+00:00
Gap 2246718 start time:  2025-03-20 23:52:36.247479916+00:00
Gap 2246718 end time:  2025-03-20 23:52:37.103400946+00:00
Gap 2246732 start time:  2025-03-20 23:52:37.475651026+00:00
Gap 2246732 end time:  2025-03-20 23:53:29.751147985+00:00
Gap 2246733 start time:  2025-03-20 23:53:29.751147985+00:00
Gap 2246733 end time:  2025-03-20 23:53:34.398108006+00:00
Gap 2247507 start time:  2025-03-20 23:53:37.626188993+00:00
Gap 2247507 end time:  2025-03-20 23:53:38.498730898+00:00


Processing rows:  40%|████      | 3206297/7931250 [00:27<00:46, 101489.80it/s]

Gap 3188748 start time:  2025-03-21 00:56:16.739051104+00:00
Gap 3188748 end time:  2025-03-21 00:56:17.760585070+00:00


Processing rows:  55%|█████▌    | 4389072/7931250 [00:38<00:33, 104511.65it/s]

Gap 4376416 start time:  2025-03-21 02:15:20.257642031+00:00
Gap 4376416 end time:  2025-03-21 02:15:21.202254057+00:00
Gap 4382297 start time:  2025-03-21 02:15:44.751677036+00:00
Gap 4382297 end time:  2025-03-21 02:15:45.747037888+00:00


Processing rows:  68%|██████▊   | 5360080/7931250 [00:46<00:22, 112614.21it/s]

Gap 5339187 start time:  2025-03-21 03:19:26.667718887+00:00
Gap 5339187 end time:  2025-03-21 03:19:27.720694065+00:00


Processing rows:  81%|████████  | 6427465/7931250 [00:56<00:14, 103854.56it/s]

Gap 6414472 start time:  2025-03-21 04:31:01.626135111+00:00
Gap 6414472 end time:  2025-03-21 04:31:02.645756960+00:00


Processing rows:  95%|█████████▍| 7506171/7931250 [01:05<00:03, 136908.59it/s]

Gap 7489734 start time:  2025-03-21 05:42:36.550658941+00:00
Gap 7489734 end time:  2025-03-21 05:42:37.632085085+00:00


Processing rows:  99%|█████████▉| 7858851/7931250 [01:08<00:00, 123917.21it/s]

Gap 7860464 start time:  2025-03-21 06:07:18.242873907+00:00
Gap 7860464 end time:  2025-03-21 06:08:24.651217937+00:00


Processing rows:  99%|█████████▉| 7871400/7931250 [01:08<00:00, 61394.32it/s] 

Gap 7860465 start time:  2025-03-21 06:08:24.651217937+00:00
Gap 7860465 end time:  2025-03-21 06:08:28.663851023+00:00
Gap 7860466 start time:  2025-03-21 06:08:28.663851023+00:00
Gap 7860466 end time:  2025-03-21 06:08:29.551333904+00:00


Processing rows: 100%|██████████| 7931250/7931250 [01:09<00:00, 114813.67it/s]
  copied.fillna('', inplace=True)


Initial timestamp: 2025-03-20 21:22:08.886214972+00:00 from 1742505728.886215
Creating RawArray with float64 data, n_channels=1, n_times=7979122
    Range : 0 ... 7979121 =      0.000 ... 31916.484 secs
Ready.
Info <Info | 7 non-empty values
 bads: []
 ch_names: Fpz-M1
 chs: 1 EEG
 custom_ref_applied: False
 highpass: 0.0 Hz
 lowpass: 125.0 Hz
 meas_date: 2025-03-20 21:22:08 UTC
 nchan: 1
 projs: []
 sfreq: 250.0 Hz
>
Memory Usage: 2673.93 MB GC to 2673.93 MB
Saving to C:\dev\play\brainwave-data\2025-03-20\raw.250.fif
Writing C:\dev\play\brainwave-data\2025-03-20\raw.250.fif


  log(f"Saving to {output_file}")


Closing C:\dev\play\brainwave-data\2025-03-20\raw.250.fif
[done]


Unnamed: 0,General,General.1
,MNE object type,RawArray
,Measurement date,2025-03-20 at 21:22:08 UTC
,Participant,Unknown
,Experimenter,Unknown
,Acquisition,Acquisition
,Duration,08:51:57 (HH:MM:SS)
,Sampling frequency,250.00 Hz
,Time points,7979122
,Channels,Channels
,EEG,1


In [31]:
raw, _, mne_filtered = convert.load_mne_file(print, "C:\\dev\\play\\brainwave-data\\2025-03-20\\raw.250.41.fif")
start_date = raw.info['meas_date']
duration = timedelta(seconds=float(raw.times[-1]))
samples = raw.get_data().shape[1]
end_date = start_date + duration
print(f"file {input_file} Start date: {start_date}, End date: {end_date}, Samples: {samples}")


Reading file C:\dev\play\brainwave-data\2025-03-20\raw.250.41.fif
Opening raw data file C:\dev\play\brainwave-data\2025-03-20\raw.250.41.fif...
Isotrak not found


  log(f"Reading file {input_file}")


    Range : 0 ... 7979203 =      0.000 ... 31863.810 secs
Ready.
Reading 0 ... 7979203  =      0.000 ... 31863.810 secs...
Finished reading file C:\dev\play\brainwave-data\2025-03-20\raw.250.41.fif
file C:\dev\play\brainwave-data\2025-03-20-21-22-08\raw.fif Start date: 2025-03-20 21:22:08.886214+00:00, End date: 2025-03-21 06:13:12.696236+00:00, Samples: 7979204


In [29]:
raw, _, _ = convert.load_mne_file(print, "C:\\dev\\play\\brainwave-data\\2025-03-20\\raw.250.fif")
start_date = raw.info['meas_date']
duration = timedelta(seconds=float(raw.times[-1]))
samples = raw.get_data().shape[1]
end_date = start_date + duration
print(f"file {input_file} Start date: {start_date}, End date: {end_date}, Samples: {samples}")


Reading file C:\dev\play\brainwave-data\2025-03-20\raw.250.fif
Opening raw data file C:\dev\play\brainwave-data\2025-03-20\raw.250.fif...
Isotrak not found
    Range : 0 ... 7979121 =      0.000 ... 31916.484 secs
Ready.


  log(f"Reading file {input_file}")


Reading 0 ... 7979121  =      0.000 ... 31916.484 secs...
Finished reading file C:\dev\play\brainwave-data\2025-03-20\raw.250.fif
file C:\dev\play\brainwave-data\2025-03-20-21-22-08\raw.fif Start date: 2025-03-20 21:22:08.886214+00:00, End date: 2025-03-21 06:14:05.370214+00:00, Samples: 7979122


In [34]:
import mne

# Calculate the number of samples needed for exact divisibility
raw_copy = mne_filtered.copy()
# duration = raw.times[-1]

# # Calculate how many samples we need for whole seconds
# needed_duration = np.ceil(duration)
# current_samples = raw.n_times
# target_samples = int(needed_duration * raw.info['sfreq'])

# # Pad with zeros if needed to get an exact number of seconds
# if target_samples > current_samples:
#     pad_samples = target_samples - current_samples
#     pad_seconds = pad_samples / raw_copy.info['sfreq']
#     print(f"Padding EDF with {pad_samples} samples ({pad_seconds:.4f} seconds)")
#     padding = np.zeros((raw_copy.info['nchan'], pad_samples))
#     raw_copy._data = np.hstack((raw_copy._data, padding))
# else:
#     print("No padding needed for EDF export")

mne.export.export_raw("C:\\dev\\play\\brainwave-data\\2025-03-20\\raw.250.41.edf", raw_copy, overwrite=True)


  mne.export.export_raw("C:\\dev\\play\\brainwave-data\\2025-03-20\\raw.250.41.edf", raw_copy, overwrite=True)


ValueError: Signal duration of 31863.802168624s is not exactly divisible by data_record_duration of 0.998339s

In [36]:
import pyedflib

# Get the data from MNE (in microvolts)
data = mne_filtered.get_data(units=dict(eeg="uV"))
channel_names = mne_filtered.info['ch_names']
sfreq = mne_filtered.info['sfreq']
n_channels = len(channel_names)

# Create output file path
output_file =  "test.edf"

# Create EDF writer
writer = pyedflib.EdfWriter(output_file, n_channels=n_channels, file_type=pyedflib.FILETYPE_EDFPLUS)

# Prepare channel information
channel_info = []
for ch_name in channel_names:
    ch_dict = {
        'label': ch_name,
        'dimension': 'uV',
        'sample_rate': sfreq,
        'physical_min': -1500,
        'physical_max': 1500,
        'digital_min': -32768,
        'digital_max': 32767,
        'transducer': '',
        'prefilter': ''
    }
    channel_info.append(ch_dict)

writer.setSignalHeaders(channel_info)
writer.setTechnician('MNE to EDF export')

# Get the start time
if mne_filtered.info['meas_date'] is not None:
    start_dt = mne_filtered.info['meas_date']
    writer.setStartdatetime(start_dt.replace(tzinfo=None))

# Write the data
try:
    print(f"Writing {data.shape[1]} samples for {n_channels} channels to EDF")
    writer.writeSamples(data)
    print("Successfully wrote EDF file")
except Exception as e:
    print(f"Error writing EDF: {str(e)}")
    raise
finally:
    writer.close()
    print(f"Closed EDF file: {output_file}")


AssertionError: cannot accurately represent sampling frequencies with data record durations between 1-60s: [250.4158477783203]

In [40]:
import json
import datetime
import numpy as np
import os
import struct

def save_recording_to_binary_format(data, channel_names, sfreq, output_base_path, meas_date=None):
    """
    Save recording data to a binary format with a JSON file for metadata.
    
    Parameters:
    -----------
    data : numpy.ndarray
        EEG data with shape (n_channels, n_samples)
    channel_names : list
        List of channel names
    sfreq : float
        Sampling frequency in Hz
    output_base_path : str
        Base path for output files (without extension)
    meas_date : datetime or None
        Measurement start date/time
    """
    # Create metadata
    metadata = {
        "sampling_frequency": float(sfreq),
        "channels": channel_names,
        "n_channels": len(channel_names),
        "n_samples": data.shape[1],
        "data_format": "int16",
        "byte_order": "little-endian"
    }
    
    # Add timing information
    if meas_date is not None:
        start_time = meas_date.isoformat()
        end_time = (meas_date + datetime.timedelta(seconds=data.shape[1]/sfreq)).isoformat()
        metadata["start_time"] = start_time
        metadata["end_time"] = end_time
    
    # Save metadata to JSON
    json_path = f"{output_base_path}.json"
    with open(json_path, 'w') as f:
        json.dump(metadata, f, indent=4)
    
    # Prepare binary data file
    bin_path = f"{output_base_path}.bin"
    
    # Convert data to int16 (2 bytes per sample) and clip values
    int16_data = np.clip(data, -32768, 32767).astype(np.int16)
    
    # Write binary data (samples are stored channel by channel)
    with open(bin_path, 'wb') as f:
        int16_data.tofile(f)
    
    print(f"Saved metadata to {json_path}")
    print(f"Saved recording data to {bin_path}")
    print(f"File size: {os.path.getsize(bin_path) / (1024*1024):.2f} MB")
    
    return json_path, bin_path

def read_binary_recording(metadata_path):
    """
    Read recording data from binary format and metadata JSON.
    
    Parameters:
    -----------
    metadata_path : str
        Path to the JSON metadata file
    
    Returns:
    --------
    data : numpy.ndarray
        EEG data with shape (n_channels, n_samples)
    metadata : dict
        Recording metadata
    """
    # Load metadata
    with open(metadata_path, 'r') as f:
        metadata = json.load(f)
    
    # Derive binary data path
    bin_path = metadata_path.replace('.json', '.bin')
    
    # Read binary data
    data = np.fromfile(bin_path, dtype=np.int16)
    
    # Reshape data based on metadata
    data = data.reshape(metadata["n_channels"], -1)
    
    return data, metadata

# Example usage
output_base_path = "recording"
if mne_filtered.info['meas_date'] is not None:
    start_dt = mne_filtered.info['meas_date']
else:
    start_dt = None

# Get the data from MNE (in microvolts)
data = mne_filtered.get_data(units=dict(eeg="uV"))
channel_names = mne_filtered.info['ch_names']
sfreq = mne_filtered.info['sfreq']

# Save to binary format
json_path, bin_path = save_recording_to_binary_format(
    data, 
    channel_names, 
    sfreq, 
    output_base_path, 
    meas_date=start_dt
)

Saved metadata to recording.json
Saved recording data to recording.bin
File size: 15.22 MB
