If a given feature is showing weird values, perhaps for a particular day, this notebook will track down where the issue is (unless it's on UI side).

In [1]:
import logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
log = lambda msg: logging.info(msg)


In [2]:
import pandas as pd
import os

input_dir = "C:\\dev\\play\\brainwave-data"
stats_df = pd.read_csv(input_dir + os.path.sep + "stats.csv")

In [3]:
from sleep_events import load_days_data

day_data = load_days_data(True)

In [4]:
[col for col in day_data.columns if 'R:sdeltaabs' in col]

['night:yasaExtended:R:sdeltaabs_s:mean',
 'night:yasaExtended:R:sdeltaabs:mean']

In [6]:
bad_feat = 'night:yasaExtended:R:sdeltaabs:mean'

# Find days where UI data is bad


In [7]:
day_data[bad_feat].describe()

count    84.000000
mean      0.051364
std       0.085321
min       0.018287
25%       0.023069
50%       0.025542
75%       0.027835
max       0.439318
Name: night:yasaExtended:R:sdeltaabs:mean, dtype: float64

In [8]:
day_data[['dayAndNightOf', bad_feat]].dropna()

Unnamed: 0,dayAndNightOf,night:yasaExtended:R:sdeltaabs:mean
119,2024-08-18,0.026505
120,2024-08-19,0.027335
121,2024-08-20,0.018287
122,2024-08-21,0.019832
123,2024-08-22,0.022546
...,...,...
277,2025-01-23,0.101339
281,2025-01-27,0.405882
282,2025-01-28,0.077918
284,2025-01-30,0.398514


In [29]:
bad_day = '2025-01-31'
bad_folder = '2025-01-31-21-45-23'
good_day = '2025-01-07  '
good_folder = '2025-01-07-21-12-58'

# Data files

In [33]:
def files_for_folder(folder_name: str):
    file_info = []

    for file_name in os.listdir(os.path.join(input_dir, folder_name)):
        if file_name.endswith('.csv'):
            full_path = os.path.join(input_dir, folder_name, file_name)
            modification_time = os.path.getmtime(full_path)
            file_info.append({'file_name': file_name, 'full_path': full_path, 'modification_time': modification_time})
    df_files = pd.DataFrame(file_info)
    df_files['modification_time'] = pd.to_datetime(df_files['modification_time'], unit='s')
    return df_files

df_files = files_for_folder(bad_folder)
df_files

Unnamed: 0,file_name,full_path,modification_time
0,raw.microwakings.csv,C:\dev\play\brainwave-data\2025-01-31-21-45-23...,2025-02-01 14:08:55.712017298
1,raw.physical_features.csv,C:\dev\play\brainwave-data\2025-01-31-21-45-23...,2025-02-01 14:07:31.228470087
2,raw.post_human.csv,C:\dev\play\brainwave-data\2025-01-31-21-45-23...,2025-02-03 07:57:56.651539803
3,raw.post_yasa.csv,C:\dev\play\brainwave-data\2025-01-31-21-45-23...,2025-02-01 14:08:12.456092119
4,raw.sleep_stages.csv,C:\dev\play\brainwave-data\2025-01-31-21-45-23...,2025-02-01 14:07:12.195030451
5,raw.sleep_stages.Fpz-M1.csv,C:\dev\play\brainwave-data\2025-01-31-21-45-23...,2025-02-01 14:06:56.863138676
6,raw.with_features.csv,C:\dev\play\brainwave-data\2025-01-31-21-45-23...,2025-02-01 14:10:14.104509592
7,raw.yasa.csv,C:\dev\play\brainwave-data\2025-01-31-21-45-23...,2025-02-01 14:07:28.430262566


In [11]:
# This is what the UI uses
file_path = df_files[df_files['file_name'] == 'raw.with_features.csv']['full_path'].values[0]
df = pd.read_csv(file_path)

In [12]:
[col for col in df.columns if 'sdeltaabs' in col]

['Main_eeg_sdeltaabs',
 'Main_eeg_sdeltaabs_c7min_norm',
 'Main_eeg_sdeltaabs_p2min_norm',
 'Main_eeg_sdeltaabs_s',
 'Main_eeg_sdeltaabs_c7min_norm_s',
 'Main_eeg_sdeltaabs_p2min_norm_s']

In [13]:
bad_feat_eeg = 'Main_eeg_sdeltaabs'

In [40]:
file_path = df_files[df_files['file_name'] == 'raw.post_yasa.csv']['full_path'].values[0]
post_yasa_df = pd.read_csv(file_path)
R = post_yasa_df[post_yasa_df['Stage'] == 'R']
R[bad_feat_eeg].describe()

count    298.000000
mean       7.725530
std       53.645499
min        0.006301
25%        0.013400
50%        0.032253
75%        0.125037
max      709.547670
Name: Main_eeg_sdeltaabs, dtype: float64

In [22]:
R[['Epoch', 'Stage', 'Timestamp', bad_feat_eeg]]

Unnamed: 0,Epoch,Stage,Timestamp,Main_eeg_sdeltaabs
398,398,R,2025-02-01 01:04:23.727721930+00:00,10.058379
399,399,R,2025-02-01 01:04:53.727721930+00:00,0.067572
400,400,R,2025-02-01 01:05:23.727721930+00:00,0.046309
401,401,R,2025-02-01 01:05:53.727721930+00:00,0.027815
402,402,R,2025-02-01 01:06:23.727721930+00:00,0.064676
...,...,...,...,...
1204,1204,R,2025-02-01 07:47:23.727721930+00:00,0.043711
1205,1205,R,2025-02-01 07:47:53.727721930+00:00,0.010118
1206,1206,R,2025-02-01 07:48:23.727721930+00:00,0.012985
1207,1207,R,2025-02-01 07:48:53.727721930+00:00,0.012762


# Compare to days where UI data is fine

In [34]:
good_files = files_for_folder(good_folder)

# This is what the UI uses
good_with_features_file_path = good_files[good_files['file_name'] == 'raw.with_features.csv']['full_path'].values[0]
good_df = pd.read_csv(good_with_features_file_path)
good_df[bad_feat_eeg].describe()

count     1271.000000
mean        43.193170
std        979.545555
min          0.000001
25%          0.024653
50%          0.061637
75%          0.281348
max      29615.360000
Name: Main_eeg_sdeltaabs, dtype: float64

In [41]:
file_path = good_files[good_files['file_name'] == 'raw.post_yasa.csv']['full_path'].values[0]
good_post_yasa_df = pd.read_csv(file_path)
good_R = good_post_yasa_df[good_post_yasa_df['Stage'] == 'R']
good_R[bad_feat_eeg].describe()

count    292.000000
mean       0.047768
std        0.080415
min        0.006128
25%        0.015164
50%        0.023577
75%        0.044886
max        0.888901
Name: Main_eeg_sdeltaabs, dtype: float64

In [42]:
good_R[['Epoch', 'Stage', 'Timestamp', bad_feat_eeg]]

Unnamed: 0,Epoch,Stage,Timestamp,Main_eeg_sdeltaabs
283,283,R,2025-01-07 23:34:28.465799093+00:00,0.025504
288,288,R,2025-01-07 23:36:58.465799093+00:00,0.018964
289,289,R,2025-01-07 23:37:28.465799093+00:00,0.024894
290,290,R,2025-01-07 23:37:58.465799093+00:00,0.013525
291,291,R,2025-01-07 23:38:28.465799093+00:00,0.019112
...,...,...,...,...
1221,1221,R,2025-01-08 07:23:28.465799093+00:00,0.013356
1222,1222,R,2025-01-08 07:23:58.465799093+00:00,0.016545
1223,1223,R,2025-01-08 07:24:28.465799093+00:00,0.016897
1224,1224,R,2025-01-08 07:24:58.465799093+00:00,0.012112
