In [2]:
import pandas as pd 
import os
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import json
import mne
import numpy as np
from tqdm import tqdm
import re
from sklearn.metrics import silhouette_score, calinski_harabasz_score, davies_bouldin_score
from multiprocessing import Pool
import warnings
from collections import defaultdict
from typing import Dict, Tuple, Any, List
os.chdir('../../..')
from scipy import stats
from statsmodels.stats.power import TTestIndPower

from Scripts.Spectral_Analysis.Spectrum_Filter import filter_spectras
warnings.filterwarnings("ignore")



In [3]:
def _aggregate_by_subject(results):
    # subj_band_vals[(ch, band)][sid] -> [vals_per_trial...]
    subj_band_vals = defaultdict(lambda: defaultdict(list))
    for power, phase, s_id, t_id, gender, handiness, age, label, img, task_type in results:
        s_id = int(s_id)
        vecs = (power.mean(axis=2) if power.ndim == 3 else power).astype(np.float32, copy=False)  # (C,F)
        for ch in range(n_channels):
            for band, idx in band_cols.items():
                if idx.size == 0: 
                    continue
                val_trial = float(np.nanmean(vecs[ch, idx]))
                subj_band_vals[(ch, band)][s_id].append(val_trial)
    # среднее по триалам → массивы
    subj_vecs = {}
    for ch in range(n_channels):
        for band in bands.keys():
            ids, vals = [], []
            d = subj_band_vals[(ch, band)]
            for sid, arr in d.items():
                if arr:
                    ids.append(sid)
                    vals.append(float(np.nanmean(arr)))
            subj_vecs[(ch, band)] = {'ids': ids, 'vals': np.array(vals, float)}
    return subj_vecs

def build_subject_vectors(
    day_subj: Dict[Tuple[int, str], Dict[str, Any]],
    night_subj: Dict[Tuple[int, str], Dict[str, Any]],
    bands: Dict[str, Tuple[float, float]],
    n_channels: int,
) -> Dict[Tuple[int, str], Dict[str, Any]]:
    """
    Собирает subject_vectors для каждого (канал, бэнд) без парного выравнивания.

    Parameters
    ----------
    day_subj, night_subj : dict
        {(ch, band): {'ids': List[int], 'vals': np.ndarray}, ...}
        — результат aggregate_by_subject(...)
    bands : dict
        Словарь бэндов; используются только ключи (имена бэндов).
    n_channels : int
        Количество каналов.

    Returns
    -------
    subject_vectors : dict
        {(ch, band): {
            'day': np.ndarray, 'night': np.ndarray,
            'day_ids': List[int], 'night_ids': List[int]
        }}
    """
    subject_vectors: Dict[Tuple[int, str], Dict[str, Any]] = {}

    for ch in range(n_channels):
        for band in bands.keys():
            key = (ch, band)
            d = day_subj.get(key, {'ids': [], 'vals': np.array([], dtype=float)})
            n = night_subj.get(key, {'ids': [], 'vals': np.array([], dtype=float)})

            d_ids: List[int] = list(d.get('ids', []))
            n_ids: List[int] = list(n.get('ids', []))
            d_vals = np.asarray(d.get('vals', np.array([], float)), dtype=float)
            n_vals = np.asarray(n.get('vals', np.array([], float)), dtype=float)

            subject_vectors[key] = {
                'day'      : d_vals,
                'night'    : n_vals,
                'day_ids'  : d_ids,
                'night_ids': n_ids,
            }

    return subject_vectors

def hedges_g(x, y) -> float:
    """
    Hedges' g для двух независимых выборок (Day − Night), с поправкой J.
    """
    x = np.asarray(x, float); y = np.asarray(y, float)
    nx, ny = len(x), len(y)
    if nx < 2 or ny < 2:
        return np.nan
    vx, vy = np.var(x, ddof=1), np.var(y, ddof=1)
    # обе дисперсии нулевые
    if vx == 0 and vy == 0:
        return float(np.sign(np.nanmean(x) - np.nanmean(y)) * 0.0)
    # объединённая (пул) СКО
    sp2 = ((nx - 1) * vx + (ny - 1) * vy) / (nx + ny - 2)
    if sp2 <= 0:
        return np.nan
    d = (np.nanmean(x) - np.nanmean(y)) / np.sqrt(sp2)  # Day − Night
    J = 1 - 3 / (4 * (nx + ny) - 9) if (nx + ny) > 2 else 1.0
    return float(J * d)

def effect_size_d(x, y):
    # Cohen's d: разница средних / std
    nx, ny = len(x), len(y)
    sp = np.sqrt(((nx - 1) * np.var(x, ddof=1) + (ny - 1) * np.var(y, ddof=1)) / (nx + ny - 2))
    return (np.mean(x) - np.mean(y)) / sp


def build_band_table(subject_vectors: Dict[Tuple[int, str], Dict[str, Any]],
                     alpha: float = 0.05) -> pd.DataFrame:
    """
    Строит сводную таблицу по всем (канал, бэнд) из subject_vectors:
      Welch t-test, p-value, Hedges' g, примерная мощность (TTestIndPower).
    """
    power_calc = TTestIndPower()
    rows = []

    for (ch, band), vecs in subject_vectors.items():
        x = np.asarray(vecs.get('day',   []), float)   # day
        y = np.asarray(vecs.get('night', []), float)   # night
        n1, n2 = len(x), len(y)
        if n1 == 0 or n2 == 0:
            continue

        # Welch t-test (устойчивей при разн. дисперсиях)
        try:
            t_stat, p_val = stats.ttest_ind(x, y, equal_var=False, nan_policy='omit')
        except Exception:
            t_stat, p_val = np.nan, np.nan

        # Эффект (Hedges' g), знак Day − Night
        #g = hedges_g(x, y)
        g = effect_size_d(x,y)

        # Прикидка мощности (допущение равных дисперсий в модели мощности)
        try:
            ratio = n2 / max(n1, 1)
            es = abs(g)
            power = power_calc.solve_power(effect_size=es, nobs1=n1, ratio=ratio,
                                           alpha=alpha, alternative='two-sided')
        except Exception:
            power = np.nan

        rows.append({
            'channel': ch,
            'band': band,
            'n_day': n1,
            'n_night': n2,
            'mean_day': float(np.nanmean(x)) if n1 else np.nan,
            'mean_night': float(np.nanmean(y)) if n2 else np.nan,
            'delta_day_minus_night': float(np.nanmean(x) - np.nanmean(y)) if (n1 and n2) else np.nan,
            't_stat': float(t_stat) if np.isfinite(t_stat) else np.nan,
            'p_value': float(p_val) if np.isfinite(p_val) else np.nan,
            'hedges_g': float(g) if np.isfinite(g) else np.nan,
            'power': float(power) if np.isfinite(power) else np.nan,
            'sig_alpha_0.05': bool((p_val <= 0.05) if np.isfinite(p_val) else False),
            'sig_alpha_0.01': bool((p_val <= 0.01) if np.isfinite(p_val) else False),
            'sig_and_power': bool((p_val <= 0.05) and (power >= 0.8)) if (np.isfinite(p_val) and np.isfinite(power)) else False,
        })

    df = pd.DataFrame(rows)
    if not df.empty:
        df = df.sort_values(['p_value', 'power'],
                            ascending=[True, True]).reset_index(drop=True)
    return df

# --- пример использования ---
# band_table = build_band_table(subject_vectors, alpha=0.05)
# print(band_table.head(12))




In [4]:
exec_spec_path     = r'./Generated/Spectrums/psds_array_morlet.npz'
day_time_meta_path = r"./Supplementary/Experiment_Metadata.xlsx"

f = np.linspace(2, 40, 40*2)

bands = {'Delta':(1,4), 'Tetta':(4,7), 'Alpha':(7,13), 'Beta':(13,30)}
band_cols = {name: np.where((f >= lo) & (f < hi))[0] for name,(lo,hi) in bands.items()}


In [6]:
results_day   = filter_spectras(exec_spec_path, day_time_meta_path=day_time_meta_path, gender='f')
results_night = filter_spectras(exec_spec_path, day_time_meta_path=day_time_meta_path, gender='m')


In [7]:
first_power = (results_day or results_night)[0][0]
n_channels, n_freqs = first_power.shape[:2]

In [8]:
day_subj   = _aggregate_by_subject(results_day)
night_subj = _aggregate_by_subject(results_night)

In [9]:
subject_vectors = build_subject_vectors(day_subj, night_subj, bands, n_channels)

In [10]:
band_table_r = build_band_table(subject_vectors, alpha=0.05)

In [11]:
print(band_table_r.head(12))

    channel   band  n_day  n_night  mean_day  mean_night  \
0        39   Beta      8        8  0.079190    0.039274   
1        58   Beta      8        8  0.103180    0.046096   
2        28   Beta      8        8  0.103799    0.044466   
3        35  Tetta      8        8  0.286991    0.147817   
4        21   Beta      8        8  0.055815    0.032785   
5         7   Beta      8        8  0.048271    0.027848   
6        26   Beta      8        8  0.074063    0.041728   
7        61   Beta      8        8  0.065080    0.040416   
8        48   Beta      8        8  0.085606    0.058988   
9        60   Beta      8        8  0.110848    0.040080   
10        8   Beta      8        8  0.186452    0.069899   
11       12   Beta      8        8  0.044176    0.032610   

    delta_day_minus_night    t_stat   p_value  hedges_g     power  \
0                0.039916  2.895621  0.012404  1.447810  0.768142   
1                0.057084  2.842530  0.021782  1.421265  0.752884   
2           

In [29]:
results_day   = filter_spectras(exec_spec_path, day_time="Day",     day_time_meta_path=day_time_meta_path)
results_night = filter_spectras(exec_spec_path, day_time="Evening", day_time_meta_path=day_time_meta_path)
first_power = (results_day or results_night)[0][0]
n_channels, n_freqs = first_power.shape[:2]
day_subj   = _aggregate_by_subject(results_day)
night_subj = _aggregate_by_subject(results_night)

subject_vectors = build_subject_vectors(day_subj, night_subj, bands, n_channels)

band_table_r = build_band_table(subject_vectors, alpha=0.05)
print(band_table_r.head(12))


    channel  band  n_day  n_night  mean_day  mean_night  \
0         1  Beta      9        7  0.069974    0.036717   
1        15  Beta      9        7  0.086674    0.058647   
2        39  Beta      9        7  0.072947    0.041600   
3        28  Beta      9        7  0.094699    0.047690   
4        20  Beta      9        7  0.112791    0.068724   
5        61  Beta      9        7  0.062034    0.040809   
6         9  Beta      9        7  0.176116    0.068118   
7         8  Beta      9        7  0.168995    0.075693   
8        60  Beta      9        7  0.098793    0.045471   
9        10  Beta      9        7  0.073309    0.047539   
10       53  Beta      9        7  0.096006    0.051217   
11       44  Beta      9        7  0.079342    0.056365   

    delta_day_minus_night    t_stat   p_value  hedges_g     power  \
0                0.033257  2.610242  0.027214  1.171853  0.580983   
1                0.028027  2.426645  0.029371  1.173201  0.581951   
2                0.031347

In [32]:
results_day   = filter_spectras(exec_spec_path, day_time="Day",     day_time_meta_path=day_time_meta_path, gender="m")
results_night = filter_spectras(exec_spec_path, day_time="Evening", day_time_meta_path=day_time_meta_path, gender="m")
day_subj   = _aggregate_by_subject(results_day)
night_subj = _aggregate_by_subject(results_night)

subject_vectors = build_subject_vectors(day_subj, night_subj, bands, n_channels)

band_table_r = build_band_table(subject_vectors, alpha=0.05)
print(band_table_r.head(12))


    channel   band  n_day  n_night  mean_day  mean_night  \
0         1   Beta      3        5  0.076885    0.034154   
1        60  Tetta      3        5  0.176139    0.119474   
2        54  Alpha      3        5  0.101243    0.081528   
3        22   Beta      3        5  0.020894    0.026680   
4         2   Beta      3        5  0.034602    0.048447   
5        23   Beta      3        5  0.031701    0.044847   
6         1  Alpha      3        5  0.154606    0.104846   
7        15   Beta      3        5  0.088190    0.053515   
8        54  Tetta      3        5  0.133551    0.095487   
9        16  Tetta      3        5  0.428475    0.172061   
10       16  Delta      3        5  0.782157    0.482899   
11       52  Alpha      3        5  0.113440    0.085348   

    delta_day_minus_night    t_stat   p_value  hedges_g     power  \
0                0.042732  1.945783  0.180449  1.855504  0.566951   
1                0.056665  1.527229  0.182646  1.029726  0.222152   
2           

In [33]:
results_day   = filter_spectras(exec_spec_path, day_time="Day",     day_time_meta_path=day_time_meta_path, gender="f")
results_night = filter_spectras(exec_spec_path, day_time="Evening", day_time_meta_path=day_time_meta_path, gender="f")
day_subj   = _aggregate_by_subject(results_day)
night_subj = _aggregate_by_subject(results_night)

subject_vectors = build_subject_vectors(day_subj, night_subj, bands, n_channels)

band_table_r = build_band_table(subject_vectors, alpha=0.05)
print(band_table_r.head(12))


    channel  band  n_day  n_night  mean_day  mean_night  \
0        20  Beta      6        2  0.130033    0.063544   
1        28  Beta      6        2  0.122171    0.048681   
2         6  Beta      6        2  0.066720    0.036079   
3        61  Beta      6        2  0.072326    0.043342   
4        33  Beta      6        2  0.065068    0.037147   
5        31  Beta      6        2  0.306475    0.077555   
6        50  Beta      6        2  0.038004    0.053001   
7        60  Beta      6        2  0.127978    0.059458   
8         2  Beta      6        2  0.168085    0.064437   
9         9  Beta      6        2  0.154085    0.072241   
10       51  Beta      6        2  0.042351    0.052044   
11       32  Beta      6        2  0.167989    0.061301   

    delta_day_minus_night    t_stat   p_value  hedges_g     power  \
0                0.066489  2.939751  0.027977  1.360748  0.290296   
1                0.073490  2.748929  0.036060  1.422480  0.312106   
2                0.030641

In [34]:
results_day   = filter_spectras(exec_spec_path, day_time="Day",     day_time_meta_path=day_time_meta_path, gender="m")
results_night = filter_spectras(exec_spec_path, day_time="Evening", day_time_meta_path=day_time_meta_path, gender="f")
day_subj   = _aggregate_by_subject(results_day)
night_subj = _aggregate_by_subject(results_night)

subject_vectors = build_subject_vectors(day_subj, night_subj, bands, n_channels)

band_table_r = build_band_table(subject_vectors, alpha=0.05)
print(band_table_r.head(12))


    channel   band  n_day  n_night  mean_day  mean_night  \
0        58   Beta      3        2  0.042103    0.091890   
1         3   Beta      3        2  0.050200    0.081822   
2         1  Tetta      3        2  0.233812    0.194038   
3        34  Alpha      3        2  0.104816    0.156338   
4        27   Beta      3        2  0.025847    0.036748   
5        52   Beta      3        2  0.029128    0.046602   
6        58  Alpha      3        2  0.092459    0.156193   
7        50   Beta      3        2  0.044253    0.053001   
8        22   Beta      3        2  0.020894    0.035160   
9        32   Beta      3        2  0.033534    0.061301   
10       12   Beta      3        2  0.034307    0.048745   
11        1   Beta      3        2  0.076885    0.043124   

    delta_day_minus_night    t_stat   p_value  hedges_g     power  \
0               -0.049787 -4.969531  0.018197 -3.792054  0.782753   
1               -0.031623 -2.828825  0.096779 -2.027029  0.339759   
2           

In [35]:
results_day   = filter_spectras(exec_spec_path, day_time="Day",     day_time_meta_path=day_time_meta_path, gender="f")
results_night = filter_spectras(exec_spec_path, day_time="Evening", day_time_meta_path=day_time_meta_path, gender="m")
day_subj   = _aggregate_by_subject(results_day)
night_subj = _aggregate_by_subject(results_night)

subject_vectors = build_subject_vectors(day_subj, night_subj, bands, n_channels)

band_table_r = build_band_table(subject_vectors, alpha=0.05)
print(band_table_r.head(12))


    channel   band  n_day  n_night  mean_day  mean_night  \
0        39   Beta      6        5  0.086879    0.035790   
1        28   Beta      6        5  0.122171    0.047294   
2        17   Beta      6        5  0.084696    0.056801   
3        15   Beta      6        5  0.085915    0.053515   
4        61   Beta      6        5  0.072326    0.039796   
5        10   Beta      6        5  0.083991    0.044052   
6        20   Beta      6        5  0.130033    0.070796   
7        16   Beta      6        5  0.072346    0.046254   
8        21   Beta      6        5  0.058761    0.032234   
9        58  Tetta      6        5  0.216594    0.141203   
10        8   Beta      6        5  0.208179    0.057461   
11       60   Beta      6        5  0.127978    0.039876   

    delta_day_minus_night    t_stat   p_value  hedges_g     power  \
0                0.051089  3.575347  0.007473  2.046814  0.851783   
1                0.074877  3.118163  0.020450  1.736087  0.723406   
2           

In [36]:
import numpy as np
from scipy import stats

def var_checks(x, y):
    r = np.var(x, ddof=1) / np.var(y, ddof=1)
    lev_p = stats.levene(x, y, center='median').pvalue   # Brown–Forsythe
    return r, lev_p  # r>>1 или r<<1 — сильная гетероскедастичность

for (ch, band), vecs in subject_vectors.items():
        x = np.asarray(vecs.get('day',   []), float)   # day
        y = np.asarray(vecs.get('night', []), float)   # night
        n1, n2 = len(x), len(y)
        if n1 == 0 or n2 == 0:
            continue

print(var_checks(x, y))


(np.float64(21.943761164678996), np.float64(0.38720674244120207))
