If a given feature is showing weird values, perhaps for a particular day, this notebook will track down where the issue is (unless it's on UI side).

In [7]:
import logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
log = lambda msg: logging.info(msg)

import os
import sys
root_dir = os.path.abspath(os.path.join(os.path.dirname('__file__'), '../..'))
sys.path.append(root_dir)


In [8]:
import pandas as pd
import os

input_dir = "C:\\dev\\play\\brainwave-data"
stats_df = pd.read_csv(input_dir + os.path.sep + "stats.csv")

In [9]:
from sleep_events import load_days_data

day_data = load_days_data(True)

In [10]:
[col for col in day_data.columns if 'R:sdeltaabs' in col]

['night:yasaExtended:R:sdeltaabs:mean',
 'night:yasaExtended:R:sdeltaabs_s:mean']

In [29]:
bad_feat = 'night:yasaExtended:W:alphaabs:mean'

# Find days where UI data is bad


In [30]:
day_data[bad_feat].describe()

count    116.000000
mean       0.024999
std        0.100769
min        0.007544
25%        0.011305
50%        0.013581
75%        0.015776
max        1.092700
Name: night:yasaExtended:W:alphaabs:mean, dtype: float64

In [49]:
day_data[['dayAndNightOf', bad_feat]].dropna().sort_values(by=bad_feat, ascending=False).head(10)

Unnamed: 0,dayAndNightOf,night:yasaExtended:W:alphaabs:mean
262,2024-09-03,1.0927
79,2024-08-20,0.137212
171,2024-12-16,0.036965
215,2024-08-25,0.034695
295,2025-02-09,0.031797
200,2025-01-19,0.030773
306,2025-03-07,0.029247
318,2025-02-26,0.026924
319,2025-02-27,0.024168
301,2025-02-16,0.024122


In [35]:
from notebooks.Util.DayAndNightOfFinder import day_and_night_of_dir

bad_day = '2024-09-03'
bad_folder, bad_folder_name = day_and_night_of_dir(input_dir, bad_day)
good_day = '2025-01-07'
good_folder, good_folder_name = day_and_night_of_dir(input_dir, good_day)


# Data files

In [36]:
def files_for_folder(folder_name: str):
    file_info = []

    for file_name in os.listdir(os.path.join(input_dir, folder_name)):
        if file_name.endswith('.csv'):
            full_path = os.path.join(input_dir, folder_name, file_name)
            modification_time = os.path.getmtime(full_path)
            file_info.append({'file_name': file_name, 'full_path': full_path, 'modification_time': modification_time})
    df_files = pd.DataFrame(file_info)
    df_files['modification_time'] = pd.to_datetime(df_files['modification_time'], unit='s')
    return df_files

df_files = files_for_folder(bad_folder)
df_files

Unnamed: 0,file_name,full_path,modification_time
0,raw.artifacts.csv,C:\dev\play\brainwave-data\2024-09-03-21-10-58...,2025-03-11 19:12:48.419289589
1,raw.final_wake_model.csv,C:\dev\play\brainwave-data\2024-09-03-21-10-58...,2024-12-19 17:25:16.000000000
2,raw.final_wake_model_post_human.csv,C:\dev\play\brainwave-data\2024-09-03-21-10-58...,2024-12-19 17:25:18.000000000
3,raw.microwakings.csv,C:\dev\play\brainwave-data\2024-09-03-21-10-58...,2025-01-08 11:17:52.000000000
4,raw.night_events.csv,C:\dev\play\brainwave-data\2024-09-03-21-10-58...,2024-12-02 03:27:54.000000000
5,raw.physical_features.1s.csv,C:\dev\play\brainwave-data\2024-09-03-21-10-58...,2025-03-20 07:42:17.572556257
6,raw.physical_features.csv,C:\dev\play\brainwave-data\2024-09-03-21-10-58...,2025-01-10 18:32:38.000000000
7,raw.post_human.csv,C:\dev\play\brainwave-data\2024-09-03-21-10-58...,2025-03-08 09:45:22.384139061
8,raw.post_yasa.csv,C:\dev\play\brainwave-data\2024-09-03-21-10-58...,2025-03-11 19:53:10.121223688
9,raw.sleep_stages.csv,C:\dev\play\brainwave-data\2024-09-03-21-10-58...,2025-01-10 17:54:52.000000000


In [37]:
# This is what the UI uses
file_path = df_files[df_files['file_name'] == 'raw.with_features.csv']['full_path'].values[0]
df = pd.read_csv(file_path)

In [38]:
[col for col in df.columns if 'betaabs' in col]

['Main_eeg_betaabs',
 'Main_eeg_betaabs_c7min_norm',
 'Main_eeg_betaabs_p2min_norm',
 'Main_eeg_betaabs_s',
 'Main_eeg_betaabs_c7min_norm_s',
 'Main_eeg_betaabs_p2min_norm_s']

In [39]:
bad_feat_eeg = 'Main_eeg_betaabs'

In [42]:
file_path = df_files[df_files['file_name'] == 'raw.post_yasa.csv']['full_path'].values[0]
post_yasa_df = pd.read_csv(file_path)
R = post_yasa_df[post_yasa_df['Stage'] == 'W']
R[bad_feat_eeg].describe()

count    884.000000
mean       1.498085
std        2.176180
min        0.001469
25%        0.050718
50%        1.907079
75%        2.032990
max       41.115040
Name: Main_eeg_betaabs, dtype: float64

In [43]:
R[['Epoch', 'Stage', 'Timestamp', bad_feat_eeg]]

Unnamed: 0,Epoch,Stage,Timestamp,Main_eeg_betaabs
0,0,W,2024-09-03 21:10:58.197338104+01:00,0.068979
1,1,W,2024-09-03 21:11:28.197338104+01:00,0.124669
2,2,W,2024-09-03 21:11:58.197338104+01:00,0.049194
3,3,W,2024-09-03 21:12:28.197338104+01:00,0.061551
4,4,W,2024-09-03 21:12:58.197338104+01:00,0.203375
...,...,...,...,...
1775,1775,W,2024-09-04 11:58:28.197338104+01:00,2.009964
1776,1776,W,2024-09-04 11:58:58.197338104+01:00,2.006756
1777,1777,W,2024-09-04 11:59:28.197338104+01:00,2.098285
1778,1778,W,2024-09-04 11:59:58.197338104+01:00,2.142219


# Compare to days where UI data is fine

In [44]:
good_files = files_for_folder(good_folder)

# This is what the UI uses
good_with_features_file_path = good_files[good_files['file_name'] == 'raw.with_features.csv']['full_path'].values[0]
good_df = pd.read_csv(good_with_features_file_path)
good_df[bad_feat_eeg].describe()

count    1.263000e+03
mean     1.063741e-02
std      5.513943e-02
min      5.147581e-07
25%      2.251533e-03
50%      2.776523e-03
75%      4.222165e-03
max      1.163897e+00
Name: Main_eeg_betaabs, dtype: float64

In [46]:
file_path = good_files[good_files['file_name'] == 'raw.post_yasa.csv']['full_path'].values[0]
good_post_yasa_df = pd.read_csv(file_path)
good_R = good_post_yasa_df[good_post_yasa_df['Stage'] == 'W']
good_R[bad_feat_eeg].describe()

count    2.240000e+02
mean     4.510523e-02
std      1.251861e-01
min      5.147581e-07
25%      8.017677e-03
50%      1.624881e-02
75%      3.218697e-02
max      1.163897e+00
Name: Main_eeg_betaabs, dtype: float64

In [47]:
good_R[['Epoch', 'Stage', 'Timestamp', bad_feat_eeg]]

Unnamed: 0,Epoch,Stage,Timestamp,Main_eeg_betaabs
0,0,W,2025-01-07 21:12:58.465799093+00:00,0.300296
1,1,W,2025-01-07 21:13:28.465799093+00:00,0.113829
2,2,W,2025-01-07 21:13:58.465799093+00:00,0.164073
3,3,W,2025-01-07 21:14:28.465799093+00:00,0.072593
4,4,W,2025-01-07 21:14:58.465799093+00:00,0.216930
...,...,...,...,...
1266,1266,W,2025-01-08 07:45:58.465799093+00:00,0.008161
1267,1267,W,2025-01-08 07:46:28.465799093+00:00,0.014268
1268,1268,W,2025-01-08 07:46:58.465799093+00:00,0.015100
1269,1269,W,2025-01-08 07:47:28.465799093+00:00,0.016128
