In [7]:
import pandas as pd
import os.path as op
from os import sep
import nibabel as nb
import numpy as np
import json
import trimesh
import open3d as o3d
import matplotlib.pylab as plt
from matplotlib import cm, colors
from utilities import files
import new_files
import tqdm.auto as tqdm
from copy import copy
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler, RobustScaler, minmax_scale
from sklearn.manifold import MDS
from scipy.spatial.distance import euclidean
from brain_tools import *
import pickle

In [11]:
def plot_csd(smooth_csd, list_ROI_vertices, bb_path, times, ax, cb=True, cmap="RdBu_r", vmin_vmax=None):
    layer_labels = ["I", "II", "III", "IV", "V", "VI"]
    with open(bb_path, "r") as fp:
        bb = json.load(fp)
    bb = [np.array(bb[i])[list_ROI_vertices] for i in bb.keys()]
    bb_mean = [np.mean(i) for i in bb]
    bb_std = [np.std(i) for i in bb]
    max_smooth = np.max(np.abs(smooth_csd))
    if vmin_vmax == None:
        divnorm = colors.TwoSlopeNorm(vmin=-max_smooth, vcenter=0, vmax=max_smooth)
    else:
        divnorm = colors.TwoSlopeNorm(vmin=vmin_vmax[0], vcenter=0, vmax=vmin_vmax[1])
    extent = [times[0], times[-1], 1, 0]
    csd_imshow = ax.imshow(
        smooth_csd, norm=divnorm, origin="lower",
        aspect="auto", extent=extent,
        cmap=cmap
    )
    ax.set_ylim(1,0)
    for l_ix, th in enumerate(np.cumsum(bb_mean)):
            ax.axhline(th, linestyle=(0, (5,5)), c="black", lw=0.5)
            ax.axhspan(th-bb_std[l_ix], th+bb_std[l_ix], alpha=0.05, color="black", lw=0)
            ax.annotate(layer_labels[l_ix],[times[0]+0.01, th-0.01],size=15)
    if cb:
        plt.colorbar(csd_imshow, ax=ax)
    plt.tight_layout()

In [2]:
dataset_location = "/home/common/bonaiuto/multiburst/derivatives/processed"
epoch_types = {
    "visual": [np.linspace(-0.2, 0.8, num=601), [0.0, 0.2], -0.01],
    "motor": [np.linspace(-0.5, 0.5, num=601), [-0.2, 0.2], -0.2]
}

In [4]:
dir_search = new_files.Files()

In [5]:
csd_files = dir_search.get_files(
    dataset_location, "*.npy", prefix="time_CSD_autoreject"
)

json_files = dir_search.get_files(
    dataset_location, "*.json", prefix="info"
)

info_dict = {}
for i in json_files:
    sub = i.split(sep)[-3]
    with open(i, "r") as fp:
        info_dict[sub] = json.load(fp)

In [29]:
for csd_file in csd_files[5:]:
    try:
        epoch_type = [i for i in epoch_types.keys() if i in csd_file][0]
        subject = csd_file.split(sep)[-4]
        core_name = csd_file.split(sep)[-1].split("_")[-1].split(".")[0]
        info = info_dict[subject]
        atlas = pd.read_csv(info["atlas"])
        atlas_labels = np.load(info["atlas_colors_path"])
        visual_ROI = atlas.loc[(atlas.PRIMARY_SECTION == 1)].USED_LABEL.values
        visual_ROI = np.hstack([visual_ROI, [i for i in atlas.USED_LABEL.values if "_MT_" in i]])
        sensorimotor_ROI = ["L_4_ROI", "R_4_ROI"]
        labels_xxx = {
            "visual": visual_ROI,
            "motor": sensorimotor_ROI
        }
        ROI_labels = labels_xxx[epoch_type]
        vertex_num = np.arange(atlas_labels.shape[0])
        ROI_vertices = {i: vertex_num[[i == al.decode("utf=8") for al in atlas_labels]] for i in ROI_labels}
        times, pca_sel, baseline_lim = epoch_types[epoch_type]
        
        csd_data = np.load(csd_file)
        true_CSD = {}
        for l in ROI_labels:
            true_CSD[l] = []
            for rv in ROI_vertices[l]:
                true_CSD[l].append(csd_data[rv, :, :])
        true_CSD = {i: np.array(true_CSD[i]) for i in ROI_labels}
        
        pca_time_sel = np.where((times >= pca_sel[0]) & (times <= pca_sel[1]))[0]
        pca_csd_dataset = {i: true_CSD[i][:,:, pca_time_sel].reshape(true_CSD[i].shape[0], -1) for i in ROI_labels}
        
        rows = int(len(ROI_labels)/2)
        f, ax = plt.subplots(rows, 2, figsize=(15, 3*rows), facecolor="white")
        f.suptitle("RAW")
        ax = ax.flatten()
        for roi_ix, roi in enumerate(ROI_labels):
            ax[roi_ix].set_title("{}; {} vertices".format(roi, ROI_vertices[roi].shape[0]))
            ax[roi_ix].plot(pca_csd_dataset[roi].T)
        plt.tight_layout()
        plt.savefig("/home/mszul/git/DANC_multilayer_laminar/output/{}_raw_csd.png".format(core_name), dpi=300)
        plt.close(f)
        f, ax = plt.subplots(rows, 2, figsize=(15, 3*rows), facecolor="white")
        f.suptitle("CLEANED")
        ax = ax.flatten()
        ROI_outlier_map = {}
        for roi_ix, roi in enumerate(ROI_labels):
            metric = pca_csd_dataset[roi].std(axis=1)
            nan_map = np.isnan(metric)
            minmax = np.percentile(metric[~nan_map], 0.005), np.percentile(metric[~nan_map], 99.995)
            outlier_map = metric > minmax[1] + minmax[1] * 0.000
            unwanted_map = nan_map | outlier_map
            ax[roi_ix].set_title("{}; {} vertices; {} NaNs and {} outliers removed".format(roi, ROI_vertices[roi].shape[0], sum(nan_map), sum(outlier_map)))
            ax[roi_ix].plot(pca_csd_dataset[roi][~unwanted_map].T)
            ROI_outlier_map[roi] = unwanted_map
        
        plt.savefig("/home/mszul/git/DANC_multilayer_laminar/output/{}_cleaned_csd.png".format(core_name), dpi=300)
        plt.close(f)
        
        PCA_results = {}
        for roi_ix, roi in enumerate(ROI_labels):
            ds = pca_csd_dataset[roi][~ROI_outlier_map[roi]]
            scaler = RobustScaler()
            ds = scaler.fit_transform(ds)
            pca = PCA(n_components=30)
            ds_pca = pca.fit_transform(ds)
            
            PCA_results[roi] = [ds_pca, pca.components_, pca.explained_variance_ratio_]
        
        rows = int(len(ROI_labels)/2)
        f, ax = plt.subplots(rows, 2, figsize=(20, 4*rows), facecolor="white")
        f.suptitle("VAR_EXP_RATIOS")
        ax = ax.flatten()
        for roi_ix, roi in enumerate(ROI_labels):
            ax[roi_ix].bar(np.arange(1, PCA_results[roi][2].shape[0]+1), PCA_results[roi][2]);
            ax[roi_ix].set_title(roi)
        plt.tight_layout()
        plt.savefig("/home/mszul/git/DANC_multilayer_laminar/output/{}_var_exp_ratio.png".format(core_name), dpi=300)
        plt.close(f)
        
        rows = len(ROI_labels)
        f, ax = plt.subplots(rows, 4, figsize=(20, 4*rows), facecolor="white")
        for row, roi in enumerate(ROI_labels):
            ax[row, 0].set_ylabel(roi)
            for column in range(4):
                ax[0, column].set_title("PC {}".format(column+1))
                eigenvector_csd = smooth_csd(np.array(np.split(PCA_results[roi][1][column], info["n_surf"])), info["n_surf"])
                plot_csd(
                    eigenvector_csd, ROI_vertices[roi], info["big_brain_layers_path"], 
                    times[pca_time_sel], ax[row, column], cmap="viridis"
                )
        plt.tight_layout()
        plt.savefig("/home/mszul/git/DANC_multilayer_laminar/output/{}_CSD_smooth_eigenvectors.png".format(core_name), dpi=300)
        plt.close(f)
        
        SMOOTH_CSD = {}
        for l in ROI_labels:
            SMOOTH_CSD[l] = []
            for rv in ROI_vertices[l]:
                SMOOTH_CSD[l].append(smooth_csd(csd_data[rv, :, :], info["n_surf"]))
                
        for l in ROI_labels:
            SMOOTH_CSD[l] = np.array(SMOOTH_CSD[l])[~ROI_outlier_map[l]]
        
        for pc_comp in range(4):
            n_bins = 100
            PRC_ROI_CSD = {}
            COLOR_MAP_ROI = {}
            SC_LOG_ROI = {}
            for l in ROI_labels:
                prc = np.linspace(0, 100, num=21)
                prc_bounds = list(zip(prc[:-1], prc[1:]))
                csd_bounds = []
                sc_log = np.log10(np.abs(PCA_results[l][0][:, pc_comp]))
                sc_log = sc_log - np.median(sc_log)
                SC_LOG_ROI[l] = sc_log
                sc_log = PCA_results[l][0][:, pc_comp]
                for ix, pb in enumerate(prc_bounds):
                    bounds = [np.percentile(sc_log, i) for i in pb]
                    pr_mask = np.where((sc_log >= bounds[0]) & (sc_log < bounds[1]))[0]
                    mean_smooth_csd = np.mean(SMOOTH_CSD[l][pr_mask], axis=0)
                    baseline = np.mean(mean_smooth_csd[:, np.where(times < baseline_lim)], axis=2)
                    csd_bounds.append(mean_smooth_csd - baseline)
                csd_bounds = np.array(csd_bounds)
                PRC_ROI_CSD[l] = csd_bounds
                COLOR_MAP_ROI[l] = data_to_rgb(
                    sc_log, n_bins, "afmhot_r", np.percentile(sc_log, 50), 
                    np.percentile(sc_log, 100), vcenter=np.percentile(sc_log, 75)
                )
            
            for l in ROI_labels:
                v_mm = [PRC_ROI_CSD[l].min(), PRC_ROI_CSD[l].max()]
                f, ax = plt.subplots(5,4, figsize=(18, 20), facecolor="white")
                ax = ax.flatten()
                for ix, pb in enumerate(prc_bounds):
                    ax[ix].set_title("{} - {} percentile".format(*pb))
                    plot_csd(
                        PRC_ROI_CSD[l][ix], ROI_vertices[l], info["big_brain_layers_path"],
                        times, ax[ix], vmin_vmax=v_mm
                    )
                f.suptitle(l, y=1.01)
                plt.tight_layout()
                plt.savefig("/home/mszul/git/DANC_multilayer_laminar/output/{}_PC_{}_{}_CSD_mean_baseline.png".format(core_name, str(pc_comp+1).zfill(2), l), dpi=300)
                plt.close(f)
        
            f, ax = plt.subplots(len(ROI_labels), 1, figsize=(10, 4*len(ROI_labels)), facecolor="white")
            ax = ax.flatten()
            n_bins = 100
            for xix, l in enumerate(ROI_labels):
                sc_log = SC_LOG_ROI[l]
                datacolors, mappable = data_to_rgb(
                    sc_log, n_bins, "afmhot_r", np.percentile(sc_log, 75), 
                    np.percentile(sc_log, 100), vcenter=np.percentile(sc_log, 95), ret_map=True
                )
                hist, bins, barlist = ax[xix].hist(sc_log, bins=n_bins, edgecolor='black', linewidth=0.5)
                for ix, xx in enumerate(barlist):
                    plt.setp(xx, "facecolor", mappable.to_rgba(bins[ix+1]))
            
                for h in np.linspace(0, 100, num=21):
                    ax[xix].axvline(np.percentile(sc_log, h), lw=0.5, c="red")
                
                ax[xix].set_title("ROI: {}".format(l))
            plt.tight_layout()
            plt.savefig("/home/mszul/git/DANC_multilayer_laminar/output/{}_PC_{}_colormap.png".format(core_name, str(pc_comp+1).zfill(2)), dpi=300)
            plt.close(f)
        
            # filename = "/home/mszul/git/DANC_multilayer_laminar/output/{}_PC_{}_csd_means.pickle".format(core_name, str(pc_comp+1).zfill(2))
            # with open(filename, 'wb') as handle:
            #     pickle.dump(PRC_ROI_CSD, handle, protocol=pickle.HIGHEST_PROTOCOL)
    except:
        continue

In [27]:
csd_files

['/home/common/bonaiuto/multiburst/derivatives/processed/sub-001/multilayer_11/inverted/time_CSD_autoreject-sub-001-ses-01-001-motor-epo.npy',
 '/home/common/bonaiuto/multiburst/derivatives/processed/sub-001/multilayer_11/inverted/time_CSD_autoreject-sub-001-ses-01-001-visual-epo.npy',
 '/home/common/bonaiuto/multiburst/derivatives/processed/sub-001/multilayer_11/inverted/time_CSD_autoreject-sub-001-ses-01-002-motor-epo.npy',
 '/home/common/bonaiuto/multiburst/derivatives/processed/sub-001/multilayer_11/inverted/time_CSD_autoreject-sub-001-ses-01-002-visual-epo.npy',
 '/home/common/bonaiuto/multiburst/derivatives/processed/sub-001/multilayer_11/inverted/time_CSD_autoreject-sub-001-ses-01-003-motor-epo.npy',
 '/home/common/bonaiuto/multiburst/derivatives/processed/sub-001/multilayer_11/inverted/time_CSD_autoreject-sub-001-ses-01-003-visual-epo.npy',
 '/home/common/bonaiuto/multiburst/derivatives/processed/sub-002/multilayer_11/inverted/time_CSD_autoreject-sub-002-ses-03-001-motor-epo.np

In [28]:
csd_file

'/home/common/bonaiuto/multiburst/derivatives/processed/sub-002/multilayer_11/inverted/time_CSD_autoreject-sub-002-ses-03-001-visual-epo.npy'