In [50]:
from src.features.discriminability import discr_stat

import h5py
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
%matplotlib inline

from scipy import stats

from pathlib import Path
import os
import re
import pandas as pd

from tqdm import tqdm

In [5]:
h5_key = 'latent'
## Define paths
basedir = Path('..')
datadir = basedir / 'data'
rawdir = datadir / 'raw'
gccadir = datadir / 'interim' / 'gcca'

In [6]:
def get_files(path,
              level='(e|n)',
              subject='([0-9]{3})',
              task='(.+?)',
              filetype='h5',
              flag=''):
    files = []
    query = f'^{level}_sub-'
    query += f'{subject}_ses-1_'
    query += f'task-{task}{flag}\.{filetype}'
    for f in os.listdir(path):
        match = re.search(query, f)
        if match:
            files.append((f, match.groups()))
    
    return(files)

In [7]:
tasks = ['restingstate', 'openmonitoring', 'compassion']
levels = ['e', 'n']

In [124]:
## Get filenames for each task, novice vs. experienced
## Load a single set of latents

#latents_inter = {l:{t:[] for t in tasks} for l in levels}
#labels_inter = {l:{t:[] for t in tasks} for l in levels}

#latents_intra = {t:{l:[] for l in levels} for t in tasks}
#labels_intra = {t:{l:[] for l in levels} for t in tasks}

latents = []; labels_lt = []; labels_l = []; labels_t = []

n_components = 1

for level in levels:
    for task in tasks:
        paths = get_files(path=gccadir, level=level, task=task, flag='_gcca')
        
        n_load = len(paths)

        for path,subj in tqdm(paths[:n_load]):
            h5f = h5py.File(gccadir / path,'r')
            latent = h5f[h5_key][:][:,0]
            h5f.close()
            
            latents.append(latent)
            labels_lt.append(f'{level}_{task}')
            labels_l.append(level)
            labels_t.append(task)
            
            

labels_lt = np.array(labels_lt)
labels_t = np.array(labels_t)
labels_l = np.array(labels_l)
latents = np.array(latents)

100%|██████████| 29/29 [00:02<00:00, 14.38it/s]
100%|██████████| 29/29 [00:01<00:00, 16.73it/s]
100%|██████████| 29/29 [00:00<00:00, 33.29it/s]
100%|██████████| 47/47 [00:01<00:00, 27.66it/s]
100%|██████████| 47/47 [00:02<00:00, 18.83it/s]
100%|██████████| 47/47 [00:02<00:00, 21.64it/s]


In [122]:
discr_index, rdfs = discr_stat(latents,labels_l, return_rdfs=True)

In [132]:
for task in tasks:
    ls = [f'{level}_{task}' for level in levels]
    idx = np.hstack((np.where(np.isin(labels_lt, l)) for l in ls))[0]
    discr_index = discr_stat(latents[idx],labels_lt[idx])
    print(f'{ls[0]}, {ls[1]}: Discr Index={discr_index}')

  This is separate from the ipykernel package so we can avoid doing imports until


e_restingstate, n_restingstate: Discr Index=0.5086847074252225
e_openmonitoring, n_openmonitoring: Discr Index=0.5280508846293704


  This is separate from the ipykernel package so we can avoid doing imports until
  This is separate from the ipykernel package so we can avoid doing imports until


e_compassion, n_compassion: Discr Index=0.4805657345317525


In [133]:
for level in levels:
    ls = [f'{level}_{task}' for task in tasks]
    idx = np.hstack((np.where(np.isin(labels_lt, l)) for l in ls))[0]
    discr_index = discr_stat(latents[idx],labels_lt[idx])
    print(f'{ls[0]}, {ls[1]}: Discr Index={discr_index}')

  This is separate from the ipykernel package so we can avoid doing imports until


e_restingstate, e_openmonitoring: Discr Index=0.49485448162618195


  This is separate from the ipykernel package so we can avoid doing imports until


n_restingstate, n_openmonitoring: Discr Index=0.49765780305863366
