In [1]:
import h5py
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import scipy.io as sio

In [2]:
# Read the Excel file
file_name = 'Data_4_Import_REST.xlsx'  # Replace with your file path
excel_sheet_name = 'Depression Rest'
file_path = os.path.join("Depression_Study", "depression_data", file_name)
df_raw = pd.read_excel(file_path, sheet_name=excel_sheet_name)

# Extract values from the first column into two arrays
healthy_sample = df_raw[df_raw.iloc[:, 1] == 99].iloc[:, 0].tolist()
depressed_sample = df_raw[df_raw.iloc[:, 1] != 99].iloc[:, 0].tolist()

# Print the results
print("Healthy sample:", healthy_sample)
print("Depressed sample:", depressed_sample)

Healthy sample: [507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 560, 562, 563, 564, 566, 568, 569, 570, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 588, 589, 590, 593, 596, 599, 600, 601, 603, 609, 611, 612, 615, 617, 619, 620, 621, 623]
Depressed sample: [558, 559, 561, 565, 567, 571, 572, 586, 587, 591, 592, 594, 595, 597, 598, 602, 604, 605, 606, 607, 608, 610, 613, 614, 616, 618, 622, 624, 625, 626, 627, 628]


In [3]:
filename = 'eeg_source.mat'
eeg_source = h5py.File(filename, 'r')
eeg_source_open = eeg_source['eeg_source_open']
eeg_source_closed = eeg_source['eeg_source_closed']

def get_eeg_list(eeg_source, sample_list):
    eeg_keys = eeg_source.keys()
    return [eeg_source[f'x{sample}'] for sample in sample_list if f'x{sample}' in eeg_keys]

eeg_source_open_healthy = get_eeg_list(eeg_source_open, healthy_sample)
eeg_source_open_depressed = get_eeg_list(eeg_source_open, depressed_sample)
eeg_source_closed_healthy = get_eeg_list(eeg_source_closed, healthy_sample)
eeg_source_closed_depressed = get_eeg_list(eeg_source_closed, depressed_sample)

In [5]:
def lzc_prep(eeg_source):
    all_subjects = []

    for i, subj_data in enumerate(eeg_source):
        if i % 10 == 0:
            print(f"Processing {i+1}/{len(eeg_source)}")
        subj_data = np.array(subj_data)  # shape: (samples, sources, trials)
        subj_srcs = []

        for src in range(60):
            # Reshape: concatenate across trials for the current source
            data_1d = subj_data[:, src, :].reshape(-1)
            
            # Binarize by median thresholding
            threshold = np.median(data_1d)
            binary_seq = (data_1d > threshold).astype(int)

            # Split into 4s epochs (sr = 500Hz)
            segment_length = 4 * 500
            n_segments = len(binary_seq) // segment_length
            subj_segments = [binary_seq[i*segment_length:(i+1)*segment_length] for i in range(n_segments)]

            subj_srcs.append(subj_segments)

        all_subjects.append(subj_srcs)

    print()
    return all_subjects


prep_open_healthy = lzc_prep(eeg_source_open_healthy)
prep_open_depressed = lzc_prep(eeg_source_open_depressed)   
prep_closed_healthy = lzc_prep(eeg_source_closed_healthy)   
prep_closed_depressed = lzc_prep(eeg_source_closed_depressed)   

Processing 1/89
Processing 11/89
Processing 21/89
Processing 31/89
Processing 41/89
Processing 51/89
Processing 61/89
Processing 71/89
Processing 81/89

Processing 1/30
Processing 11/30
Processing 21/30

Processing 1/89
Processing 11/89
Processing 21/89
Processing 31/89
Processing 41/89
Processing 51/89
Processing 61/89
Processing 71/89
Processing 81/89

Processing 1/30
Processing 11/30
Processing 21/30



In [None]:
# Took 588 minutes = 9.8 hours

from lz76.lz76 import LZ76

# def calc_er(X):
#     lz = LZ76(X) # Compute Lempel-Ziv complexity
#     er = lz*np.log2(len(X))/len(X) # Normalize using log2(length)
#     return er

def compute_lz76(prep_data):
    """
    Computes LZ76 complexity for each segment in prep_data.
    Returns:
        lz76_per_subject_source: list of lists (subject, source mean)
        lz76_per_subject: list (subject mean)
        lz76_overall: float (overall mean)
    """
    lz76_per_subject_source = []
    lz76_per_subject = []

    for i, subj in enumerate(prep_data):
        subj_sources = []

        for j, src in enumerate(subj):
            if j % 10 == 0:
                print(f"Processing {i+1}/{len(prep_data)} subject's {j+1}/{len(subj)} source")
            # Calculating LZC per subject/source/segment
            src_seg_lz = [LZ76(seg) for seg in src if len(seg) > 0]
            # Calculating LZC per subject/source
            subj_sources.append(np.nanmean(src_seg_lz))
        
        lz76_per_subject_source.append(subj_sources)
        # Calculating LZC per subject
        lz76_per_subject.append(np.nanmean(subj_sources))

    # Calculating LZC overall
    lz76_overall = np.nanmean(lz76_per_subject)
    print()

    return lz76_per_subject_source, lz76_per_subject, lz76_overall


lz76_open_healthy = compute_lz76(prep_open_healthy)
lz76_open_depressed = compute_lz76(prep_open_depressed)
lz76_closed_healthy = compute_lz76(prep_closed_healthy)
lz76_closed_depressed = compute_lz76(prep_closed_depressed)

Processing 1/89 subject's 10/60 source
Processing 1/89 subject's 20/60 source
Processing 1/89 subject's 30/60 source
Processing 1/89 subject's 40/60 source
Processing 1/89 subject's 50/60 source
Processing 1/89 subject's 60/60 source
Processing 2/89 subject's 10/60 source
Processing 2/89 subject's 20/60 source
Processing 2/89 subject's 30/60 source
Processing 2/89 subject's 40/60 source
Processing 2/89 subject's 50/60 source
Processing 2/89 subject's 60/60 source
Processing 3/89 subject's 10/60 source
Processing 3/89 subject's 20/60 source
Processing 3/89 subject's 30/60 source
Processing 3/89 subject's 40/60 source
Processing 3/89 subject's 50/60 source
Processing 3/89 subject's 60/60 source
Processing 4/89 subject's 10/60 source
Processing 4/89 subject's 20/60 source
Processing 4/89 subject's 30/60 source
Processing 4/89 subject's 40/60 source
Processing 4/89 subject's 50/60 source
Processing 4/89 subject's 60/60 source
Processing 5/89 subject's 10/60 source
Processing 5/89 subject's