In [2]:
from glob import glob

import neurokit2 as nk
import numpy as np
import pandas as pd
import scipy
from scipy.io import savemat
from sklearn.preprocessing import StandardScaler

from utils.utils import gadf_transform

FS = 400
trials = 30
files = glob("../data/raw/*/*", recursive=True)
regions = pd.read_table("../data/Supplementary/BN_atlas.txt")
phase2_labels = pd.read_csv("../data/phase2_test_labels.csv")

In [3]:
%%capture
result_train = {
    "fMRI": [],
    "RSP": [],
    "PPG": [],
    "RSP_raw": [],
    "PPG_raw": [],
    "class": [],
    "level": [],
    "subject": [],
    "group": [],
    "indices": []
}

result_test = {
    "fMRI": [],
    "RSP": [],
    "PPG": [],
    "RSP_raw": [],
    "PPG_raw": [],
    "subject": [],
    "class": [],
    "level": [],
    "group": []
}

for file in files:
    group = file.split("\\")[1]
    subject = file.split("\\")[2]

    fmri = np.load(f"{file}/fMRI_data.npz")["data"]
    rsp = np.load(f"{file}/RESP_data.npz")["data"]
    ppg = np.load(f"{file}/PPG_data.npz")["data"]

    if group == "Train":
        labels = np.load(f"{file}/labels.npz")["data"]

    for i in range(trials):

        try:
            s, e = 5, 20
            rsp_clean = nk.rsp_clean(rsp[i][s * FS:e * FS], sampling_rate=FS)
            ppg_clean = nk.ppg_clean(ppg[i][s * FS:e * FS], sampling_rate=FS)
            rsp_clean = StandardScaler().fit_transform(rsp_clean.reshape(-1, 1)).reshape(-1)
            ppg_clean = StandardScaler().fit_transform(ppg_clean.reshape(-1, 1)).reshape(-1)
        except:
            rsp_clean, ppg_clean = np.zeros(10000), np.zeros(10000)

        try:
            rsp_raw = rsp[i][::4]
            ppg_raw = ppg[i][::4]

            if len(ppg_raw) != 2500:
                rsp_raw = np.zeros(2500)
                ppg_raw = np.zeros(2500)
        except:
            rsp_raw = np.zeros(2500)
            ppg_raw = np.zeros(2500)

        if group == "Train":
            for k in range(246):
                fmri[i, k] = StandardScaler().fit_transform(fmri[i, k].reshape(-1, 1)).reshape(-1)
                # fmri[i, k] = np.array(fmri[i, k]) - np.mean(fmri[i, k, :5])

            result_train["fMRI"].append(fmri[i, :, :])
            result_train["RSP"].append(np.dstack([
                gadf_transform(rsp_clean, image_size=64, sample_range=(-1, 1), method="difference"),
                gadf_transform(rsp_clean, image_size=64, sample_range=(-1, 1), method="summation")
            ]))
            result_train["PPG"].append(np.dstack([
                gadf_transform(ppg_clean, image_size=64, sample_range=(-1, 1), method="difference"),
                gadf_transform(ppg_clean, image_size=64, sample_range=(-1, 1), method="summation")
            ]))

            result_train["RSP_raw"].append(rsp_raw)
            result_train["PPG_raw"].append(ppg_raw)
            result_train["class"].append(labels[i, 0])
            result_train["level"].append(labels[i, 1])
            result_train["subject"].append(subject)
            result_train["group"].append(group)
        else:
            for k in range(246):
                fmri[i, k] = StandardScaler().fit_transform(fmri[i, k].reshape(-1, 1)).reshape(-1)

            result_test["fMRI"].append(fmri[i, :, :])
            result_test["RSP"].append(np.dstack([
                gadf_transform(rsp_clean, image_size=64, sample_range=(-1, 1), method="difference"),
                gadf_transform(rsp_clean, image_size=64, sample_range=(-1, 1), method="summation")
            ]))
            result_test["PPG"].append(np.dstack([
                gadf_transform(ppg_clean, image_size=64, sample_range=(-1, 1), method="difference"),
                gadf_transform(ppg_clean, image_size=64, sample_range=(-1, 1), method="summation")
            ]))

            result_test["RSP_raw"].append(rsp_raw)
            result_test["PPG_raw"].append(ppg_raw)
            phase2_select = phase2_labels[(phase2_labels["Participant"] == subject) & (phase2_labels["Trial"] == i + 1)]
            result_test["class"].append(phase2_select["CLASS"].values[0])
            result_test["level"].append(phase2_select["LEVEL"].values[0])
            result_test["subject"].append(subject)
            result_test["group"].append(group)

# Convert train dataset to numpy arrays
result_train["fMRI"] = np.array(result_train["fMRI"])
result_train["RSP"] = np.array(result_train["RSP"])
result_train["PPG"] = np.array(result_train["PPG"])
result_train["RSP_raw"] = np.array(result_train["RSP_raw"])
result_train["PPG_raw"] = np.array(result_train["PPG_raw"])
result_train["class"] = np.array(result_train["class"])
result_train["level"] = np.array(result_train["level"])
result_train["subject"] = np.array(result_train["subject"])
result_train["group"] = np.array(result_train["group"])

# Convert test dataset to numpy arrays
result_test["fMRI"] = np.array(result_test["fMRI"])
result_test["RSP"] = np.array(result_test["RSP"])
result_test["PPG"] = np.array(result_test["PPG"])
result_test["RSP_raw"] = np.array(result_test["RSP_raw"])
result_test["PPG_raw"] = np.array(result_test["PPG_raw"])
result_test["subject"] = np.array(result_test["subject"])
result_test["group"] = np.array(result_test["group"])
result_test["class"] = np.array(result_test["class"])
result_test["level"] = np.array(result_test["level"])

np.save("../data/processed/train_img.npy", result_train)
np.save("../data/processed/test_img.npy", result_test)