In [2]:
import json
import os

import mne
import numpy as np
from lempel_ziv_complexity import lempel_ziv_complexity
from scipy.sparse.linalg import eigs
from tqdm.notebook import tqdm

%matplotlib notebook

In [26]:
class NumpyEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.str_):
            return str(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        else:
            return super().default(obj)

In [4]:
BANDS = ["delta", "theta", "alpha", "beta", "gamma"]
RATIOS_TWO_BANDS = [
    ("theta", "delta"),
    ("alpha", "delta"),
    ("alpha", "theta"),
    ("beta", "delta"),
    ("beta", "theta"),
    ("beta", "alpha"),
    ("gamma", "delta"),
    ("gamma", "theta"),
    ("gamma", "alpha"),
    ("gamma", "beta"),
]
RATIOS_THREE_BANDS = [
    ("alpha", "delta", "theta"),
    ("beta", "delta", "theta"),
    ("beta", "theta", "alpha"),
    ("gamma", "delta", "theta"),
    ("gamma", "theta", "alpha"),
    ("gamma", "alpha", "beta"),
]
POSSIBLE_N_BINS_LZC = ["median", "3", "4", "5", "6"]
POSSIBLE_N_BINS_S_LAMBDA = ["temporal", "4", "6", "10", "14", "20"]

In [5]:
def spatial_encoding(signal, n_bins="median"):
    norm_signal = (signal - np.min(signal)) / (np.max(signal) - np.min(signal))

    if n_bins == "median":
        bins = [0, np.median(norm_signal)]
    else:
        bins = np.linspace(0, 1, int(n_bins) + 1)[:-1]

    symbols = np.digitize(norm_signal, bins) - 1
    symbols = list(map(
        lambda x: x if x < 10 else chr(ord("a") + (x - 10)),
        symbols
    ))

    return ''.join(map(str, symbols))

In [6]:
TEMPORAL_ENCODING_DICTIONARY = {
    (0, 1, 2): "a",
    (0, 2, 1): "b",
    (1, 0, 2): "c",
    (1, 2, 0): "d",
    (2, 0, 1): "e",
    (2, 1, 0): "f",
}


def temporal_encoding(signal):
    symbols = []

    for i in range(len(signal) - 2):
        window = signal[i:i + 3]
        order = tuple(np.argsort(window))
        symbols.append(
            TEMPORAL_ENCODING_DICTIONARY[order]
        )

    return "".join(symbols)

In [7]:
def lzc(epochs, n_bins="median"):
    result = []
    for epoch in epochs:
        symbols = spatial_encoding(epoch, n_bins)
        result.append(lempel_ziv_complexity(symbols))
    return np.array(result)

In [8]:
def create_transition_matrix(symbols, m=1):
    transitions = []
    for i in range(len(symbols) - m):
        transitions.append(
            (tuple(symbols[i:i + m]), symbols[i + m])
        )

    state_index = {state: idx for idx, state in enumerate(set([s for s, _ in transitions]))}
    N = len(state_index)
    matrix = np.zeros((N, N))

    for from_state, to_state in transitions:
        i = state_index[from_state]
        j = state_index.get(tuple(list(from_state[1:]) + [to_state]), None)
        if j is not None:
            matrix[i, j] += 1

    matrix = matrix / np.maximum(matrix.sum(axis=1, keepdims=True), 1e-12)

    return matrix

In [9]:
def compute_S_and_Lambda(transition_matrix):
    vals, vecs = eigs(transition_matrix.T, k=1, which='LM')
    rho = np.real(vecs[:, 0])
    rho = rho / rho.sum()

    L1 = -transition_matrix * np.log(np.maximum(transition_matrix, 1e-12))
    L2 = transition_matrix * np.log(np.maximum(transition_matrix, 1e-12)) ** 2

    S = np.sum(rho @ L1)

    L_square = np.sum(rho @ L2)
    Lambda = L_square - S ** 2

    return S, Lambda

In [30]:
def get_features(folder, channels_list, size=30):
    for filename in os.listdir(f"preprocessed/{folder}"):
        results_filename = f"{filename[:-8]}_part2.json"
        if results_filename in os.listdir(f"features/{folder}"):
            print(filename, "skip")
            continue

        pbar = tqdm(total=8, desc=f"{filename}", unit="step")

        resulting_features = {
            "lzc": {},
            "psd_ratio": {},
            "S": {n_bins: {} for n_bins in POSSIBLE_N_BINS_S_LAMBDA},
            "Lambda": {n_bins: {} for n_bins in POSSIBLE_N_BINS_S_LAMBDA},
            "annotations": {"main": []},
            "coh": {},
            "plv": {},
        }

        raw = mne.io.read_raw_fif(f"preprocessed/{folder}/{filename}", verbose=False)
        epochs = mne.make_fixed_length_epochs(raw, duration=size, preload=True, verbose=False)

        pbar.set_description(f"{filename} -> psd")
        pbar.update(1)
        psd_table = epochs.compute_psd(n_jobs=-1, verbose=False).get_data().mean(axis=2).T
        resulting_features["psd"] = {
            "_".join(ch_name.rsplit("-", 1)): psd_array
            for ch_name, psd_array in zip(raw.ch_names, psd_table)
        }

        pbar.set_description(f"{filename} -> psd ratios")
        pbar.update(1)
        for channel in channels_list:
            for dividend, divisor in RATIOS_TWO_BANDS:
                resulting_features["psd_ratio"][f"{channel}_{dividend}/{divisor}"] = (
                    resulting_features["psd"][f"{channel}_{dividend}"] / resulting_features["psd"][f"{channel}_{divisor}"]
                )
            for dividend, divisor_1, divisor_2 in RATIOS_THREE_BANDS:
                resulting_features["psd_ratio"][f"{channel}_{dividend}/({divisor_1}+{divisor_2})"] = (
                    resulting_features["psd"][f"{channel}_{dividend}"] / (
                        resulting_features["psd"][f"{channel}_{divisor_1}"] + resulting_features["psd"][f"{channel}_{divisor_2}"]
                    )
                )

        pbar.set_description(f"{filename} -> lzc")
        pbar.update(1)
        for n_bins in POSSIBLE_N_BINS_LZC:
            resulting_features["lzc"][n_bins] = {
                channel_name: lzc(epochs.get_data(copy=False)[:, channel_n], n_bins=n_bins)
                for channel_n, channel_name in enumerate(raw.ch_names)
            }

        pbar.set_description(f"{filename} -> s, lambda")
        pbar.update(1)
        for epoch in epochs:
            for channel_epoch_data, channel_name in zip(epoch, raw.ch_names):
                for n_bins in POSSIBLE_N_BINS_S_LAMBDA:
                    if n_bins == "temporal":
                        symbols = temporal_encoding(channel_epoch_data)
                    else:
                        symbols = spatial_encoding(channel_epoch_data, n_bins)

                    transition_matrix = create_transition_matrix(symbols)
                    S, Lambda = compute_S_and_Lambda(transition_matrix)

                    if channel_name not in resulting_features["S"][n_bins]:
                        resulting_features["S"][n_bins][channel_name] = []
                        resulting_features["Lambda"][n_bins][channel_name] = []

                    resulting_features["S"][n_bins][channel_name].append(S)
                    resulting_features["Lambda"][n_bins][channel_name].append(Lambda)

        pbar.set_description(f"{filename} -> annotations")
        pbar.update(1)
        annotators_number = max(map(len, epochs.get_annotations_per_epoch()))
        if annotators_number != 1:
            for annotator in range(annotators_number):
                resulting_features["annotations"][str(annotator + 1)] = []
        for annotations in epochs.get_annotations_per_epoch():
            if len(annotations) == 1:
                for annotator in resulting_features["annotations"]:
                    resulting_features["annotations"][annotator].append(annotations[0][2])
            else:
                for annotation in annotations:
                    resulting_features["annotations"][annotation[2][-1:]].append(annotation[2][:1])
                resulting_features["annotations"]["main"].append(None)

        pbar.set_description(f"{filename} -> coherence, plv")
        pbar.update(1)
        with open(f"features/{folder}/{filename[:-8]}_part1.json", mode="r", encoding="utf-8") as file:
            coh_plv_data = json.loads(file.read())
            for key_upper in ["coh", "plv"]:
                for key_lower in coh_plv_data[key_upper]:
                    resulting_features[key_upper][key_lower] = np.array(
                        coh_plv_data[key_upper][key_lower]
                    ).reshape(len(epochs), 6).mean(axis=1)

        pbar.set_description(f"{filename} -> saving")
        pbar.update(1)
        with open(f"features/{folder}/{results_filename}", mode="w", encoding="utf-8") as file:
            file.write(json.dumps(resulting_features, cls=NumpyEncoder, indent=None))

        pbar.set_description(f"{filename} -> DONE")
        pbar.update(1)
        pbar.close()

In [31]:
get_features(
    "isruc-sleep",
    ["F3", "C3", "O1", "F4", "C4", "O2"]
)

s1_10_eeg.fif skip
s1_1_eeg.fif skip
s1_2_eeg.fif skip
s1_3_eeg.fif skip
s1_4_eeg.fif skip
s1_5_eeg.fif skip
s1_6_eeg.fif skip


s1_7_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

s1_8_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

s1_9_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

s3_10_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

s3_1_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

s3_2_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

s3_3_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

s3_4_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

s3_5_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

s3_6_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

s3_7_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

s3_8_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

s3_9_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

In [32]:
get_features(
    "sleep_edf_database_expanded",
    ["Fpz-Cz", "Pz-Oz"]
)

0_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

10_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

11_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

12_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

13_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

14_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

15_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

1_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

2_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

3_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

4_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

5_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

6_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

7_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

8_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

9_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

In [33]:
get_features(
    "eegmat",
    ["F3", "C3", "O1", "F4", "C4", "O2"]
)

0_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

10_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

11_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

12_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

13_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

14_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

15_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

16_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

17_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

18_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

19_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

1_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

20_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

21_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

22_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

23_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

24_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

25_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

26_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

27_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

28_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

29_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

2_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

30_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

31_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

32_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

33_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

34_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

35_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

3_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

4_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

5_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

6_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

7_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

8_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

9_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

In [34]:
get_features(
    "spis",
    ["F3", "C3", "O1", "F4", "C4", "O2"]
)

0_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

1_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

2_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

3_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

4_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

5_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

6_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

7_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

8_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

9_eeg.fif:   0%|          | 0/8 [00:00<?, ?step/s]

In [28]:
# MNIST

CHANNELS_MNIST = ["F3", "F4", "O1", "O2", "P7", "P8"]

resulting_features = None
results_filename = f"mnist_part2.json"
for iter_n, filename in enumerate(tqdm(os.listdir(f"preprocessed/mnist"))):
    if resulting_features is None:
        resulting_features = {
            "psd": {},
            "lzc": {n_bins: {} for n_bins in POSSIBLE_N_BINS_LZC},
            "psd_ratio": {},
            "S": {n_bins: {} for n_bins in POSSIBLE_N_BINS_S_LAMBDA},
            "Lambda": {n_bins: {} for n_bins in POSSIBLE_N_BINS_S_LAMBDA},
            "annotations": {"main": []},
        }
    raw = mne.io.read_raw_fif(f"preprocessed/mnist/{filename}", verbose=False)
    epochs = mne.make_fixed_length_epochs(raw, duration=2, preload=True, verbose=False)

    if len(resulting_features["psd"]) == 0:
        for ch_name in raw.ch_names:
            resulting_features["psd"][
                "_".join(ch_name.rsplit("-", 1))
            ] = []
            for n_bins in POSSIBLE_N_BINS_LZC:
                resulting_features["lzc"][n_bins][ch_name] = []
            for n_bins in POSSIBLE_N_BINS_S_LAMBDA:
                resulting_features["S"][n_bins][ch_name] = []
                resulting_features["Lambda"][n_bins][ch_name] = []

    psd_table = epochs.compute_psd(n_jobs=-1, verbose=False).get_data().mean(axis=2).T
    for ch_name, psd_array in zip(raw.ch_names, psd_table):
        resulting_features["psd"][
            "_".join(ch_name.rsplit("-", 1))
        ].extend(psd_array)

    for n_bins in POSSIBLE_N_BINS_LZC:
        for channel_n, channel_name in enumerate(raw.ch_names):
            resulting_features["lzc"][n_bins][channel_name].extend(
                lzc(epochs.get_data(copy=False)[:, channel_n], n_bins=n_bins)
            )

    for epoch in epochs:
        for channel_epoch_data, channel_name in zip(epoch, raw.ch_names):
            for n_bins in POSSIBLE_N_BINS_S_LAMBDA:
                if n_bins == "temporal":
                    symbols = temporal_encoding(channel_epoch_data)
                else:
                    symbols = spatial_encoding(channel_epoch_data, n_bins)

                transition_matrix = create_transition_matrix(symbols)
                S, Lambda = compute_S_and_Lambda(transition_matrix)

                resulting_features["S"][n_bins][channel_name].append(S)
                resulting_features["Lambda"][n_bins][channel_name].append(Lambda)

    resulting_features["annotations"]["main"].append(
        epochs.get_annotations_per_epoch()[0][0][2]
    )

    if len(resulting_features["annotations"]["main"]) == 1000:
        for channel in CHANNELS_MNIST:
            for dividend, divisor in RATIOS_TWO_BANDS:
                resulting_features["psd_ratio"][f"{channel}_{dividend}/{divisor}"] = (
                    np.array(
                        resulting_features["psd"][f"{channel}_{dividend}"]
                    ) / np.array(
                    resulting_features["psd"][f"{channel}_{divisor}"]
                )
                )
            for dividend, divisor_1, divisor_2 in RATIOS_THREE_BANDS:
                resulting_features["psd_ratio"][f"{channel}_{dividend}/({divisor_1}+{divisor_2})"] = (
                    np.array(resulting_features["psd"][f"{channel}_{dividend}"]) / (
                        np.array(
                            resulting_features["psd"][f"{channel}_{divisor_1}"]
                        ) + np.array(
                            resulting_features["psd"][f"{channel}_{divisor_2}"]
                        )
                    )
                )

        with open(f"features/mnist/{iter_n // 1000}_part2.json", mode="w", encoding="utf-8") as file:
            file.write(json.dumps(resulting_features, cls=NumpyEncoder, indent=None))
        resulting_features = None

  0%|          | 0/65033 [00:00<?, ?it/s]

  psd_table = epochs.compute_psd(n_jobs=-1, verbose=False).get_data().mean(axis=2).T
  norm_signal = (signal - np.min(signal)) / (np.max(signal) - np.min(signal))
  vals, vecs = eigs(transition_matrix.T, k=1, which='LM')
  psd_table = epochs.compute_psd(n_jobs=-1, verbose=False).get_data().mean(axis=2).T
  norm_signal = (signal - np.min(signal)) / (np.max(signal) - np.min(signal))
  vals, vecs = eigs(transition_matrix.T, k=1, which='LM')
  psd_table = epochs.compute_psd(n_jobs=-1, verbose=False).get_data().mean(axis=2).T
  norm_signal = (signal - np.min(signal)) / (np.max(signal) - np.min(signal))
  vals, vecs = eigs(transition_matrix.T, k=1, which='LM')
  np.array(
  np.array(resulting_features["psd"][f"{channel}_{dividend}"]) / (
  psd_table = epochs.compute_psd(n_jobs=-1, verbose=False).get_data().mean(axis=2).T
  norm_signal = (signal - np.min(signal)) / (np.max(signal) - np.min(signal))
  vals, vecs = eigs(transition_matrix.T, k=1, which='LM')
  psd_table = epochs.compute_psd(n_jo