In [15]:
from src.features.discriminability import discr_stat

import h5py
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
%matplotlib inline

from scipy import stats

from pathlib import Path
import os
import re
import pandas as pd

from tqdm import tqdm

from sklearn.metrics import euclidean_distances
import itertools

In [16]:
h5_key = 'latent'
## Define paths
basedir = Path('..')
datadir = basedir / 'data'
rawdir = datadir / 'raw'
gccadir = datadir / 'interim' / 'gcca250'

In [17]:
def get_files(path,
              level='(e|n)',
              subject='([0-9]{3})',
              task='(.+?)',
              filetype='h5',
              flag=''):
    files = []
    query = f'^{level}_sub-'
    query += f'{subject}_ses-1_'
    query += f'task-{task}{flag}\.{filetype}'
    for f in os.listdir(path):
        match = re.search(query, f)
        if match:
            files.append((f, match.groups()))
    
    return(files)

In [18]:
tasks = ['restingstate', 'openmonitoring', 'compassion']
levels = ['e', 'n']

In [19]:
## Get filenames for each task, novice vs. experienced
## Load a single set of latents

#latents_inter = {l:{t:[] for t in tasks} for l in levels}
#labels_inter = {l:{t:[] for t in tasks} for l in levels}

#latents_intra = {t:{l:[] for l in levels} for t in tasks}
#labels_intra = {t:{l:[] for l in levels} for t in tasks}

latents = []; labels_lt = []; labels_l = []; labels_t = []

n_components = 1

for level in levels:
    for task in tasks:
        paths = get_files(path=gccadir, level=level, task=task, flag='_gcca')
        
        n_load = len(paths)

        for path,subj in tqdm(paths[:n_load]):
            h5f = h5py.File(gccadir / path,'r')
            latent = h5f[h5_key][:][:,0]
            h5f.close()
            
            latents.append(latent)
            labels_lt.append(f'{level}_{task}')
            labels_l.append(level)
            labels_t.append(task)
            
            

labels_lt = np.array(labels_lt)
labels_t = np.array(labels_t)
labels_l = np.array(labels_l)
latents = np.array(latents)

100%|██████████| 29/29 [00:00<00:00, 62.13it/s]
100%|██████████| 29/29 [00:00<00:00, 67.61it/s]
100%|██████████| 29/29 [00:00<00:00, 58.89it/s]
100%|██████████| 47/47 [00:00<00:00, 78.35it/s]
100%|██████████| 47/47 [00:00<00:00, 80.09it/s]
100%|██████████| 47/47 [00:00<00:00, 89.03it/s]


In [6]:
discr_index, rdfs = discr_stat(latents,labels_l, return_rdfs=True)

In [132]:
for task in tasks:
    ls = [f'{level}_{task}' for level in levels]
    idx = np.hstack((np.where(np.isin(labels_lt, l)) for l in ls))[0]
    discr_index = discr_stat(latents[idx],labels_lt[idx])
    print(f'{ls[0]}, {ls[1]}: Discr Index={discr_index}')

  This is separate from the ipykernel package so we can avoid doing imports until


e_restingstate, n_restingstate: Discr Index=0.5086847074252225
e_openmonitoring, n_openmonitoring: Discr Index=0.5280508846293704


  This is separate from the ipykernel package so we can avoid doing imports until
  This is separate from the ipykernel package so we can avoid doing imports until


e_compassion, n_compassion: Discr Index=0.4805657345317525


In [133]:
for level in levels:
    ls = [f'{level}_{task}' for task in tasks]
    idx = np.hstack((np.where(np.isin(labels_lt, l)) for l in ls))[0]
    discr_index = discr_stat(latents[idx],labels_lt[idx])
    print(f'{ls[0]}, {ls[1]}: Discr Index={discr_index}')

  This is separate from the ipykernel package so we can avoid doing imports until


e_restingstate, e_openmonitoring: Discr Index=0.49485448162618195


  This is separate from the ipykernel package so we can avoid doing imports until


n_restingstate, n_openmonitoring: Discr Index=0.49765780305863366


## Save Distance Matrices for R

In [20]:
distancedir = datadir / 'interim' / 'gcca250_distances'

In [28]:
def get_save_classes(labels, level1='',task1='',level2='',task2='',task3=''):
    global latents
    ## Create search keys and get indices
    key1 = f'{level1}_{task1}'
    key2 = f'{level2}_{task2}'
    idx1 = [i for i,label in enumerate(labels) if key1 in label]
    idx2 = [i for i,label in enumerate(labels) if key2 in label]
    print(f'Len of {key1}: {len(idx1)}')
    print(f'Len of {key2}: {len(idx2)}')
    if not task3 == '':
        key3 = f'_{task3}'
        idx3 = [i for i,label in enumerate(labels) if key3 in label]
        print(f'Len of {key3}: {len(idx3)}')
        idxs = np.hstack((idx1, idx2, idx3))
        
        ## Get relevant stuff
        distances = euclidean_distances(latents[idxs])
        labels2 = np.hstack((['1'] * len(idx1), ['2'] * len(idx2), ['3'] * len(idx3)))

        ## Save relevant stuff
        pd.DataFrame(distances).to_csv(distancedir / f'{key1}_{key2}_{key3}_distances.csv', header=False, index=False)
        pd.DataFrame(labels2).to_csv(distancedir / f'{key1}_{key2}_{key3}_labels.csv', header=False, index=False)
    else:
        idxs = np.hstack((idx1, idx2))
    
        ## Get relevant stuff
        distances = euclidean_distances(latents[idxs])
        labels2 = np.hstack((['1'] * len(idx1), ['2'] * len(idx2)))

        ## Save relevant stuff
        pd.DataFrame(distances).to_csv(distancedir / f'{key1}_{key2}_distances.csv', header=False, index=False)
        pd.DataFrame(labels2).to_csv(distancedir / f'{key1}_{key2}_labels.csv', header=False, index=False)

In [78]:
## Inter task (3)
for task in tasks:
    get_save_classes(labels=labels_lt, level1=levels[0], level2=levels[1], task1=task, task2=task)

Len of e_restingstate: 29
Len of n_restingstate: 47
Len of e_openmonitoring: 29
Len of n_openmonitoring: 47
Len of e_compassion: 29
Len of n_compassion: 47


In [79]:
## Inter experience (2)
for level in levels:
    for t1,t2 in combinations(tasks, 2):
        get_save_classes(labels=labels_lt, level1=level, level2=level, task1=t1, task2=t2)

Len of e_restingstate: 29
Len of e_openmonitoring: 29
Len of e_restingstate: 29
Len of e_compassion: 29
Len of e_openmonitoring: 29
Len of e_compassion: 29
Len of n_restingstate: 47
Len of n_openmonitoring: 47
Len of n_restingstate: 47
Len of n_compassion: 47
Len of n_openmonitoring: 47
Len of n_compassion: 47


In [80]:
## Pairwise (9)
## Inter experience (2)
for t1,t2 in combinations(tasks, 2):
    get_save_classes(labels=labels_lt, level1=levels[0], level2=levels[1], task1=t1, task2=t2)
    get_save_classes(labels=labels_lt, level1=levels[0], level2=levels[1], task1=t2, task2=t1)

Len of e_restingstate: 29
Len of n_openmonitoring: 47
Len of e_openmonitoring: 29
Len of n_restingstate: 47
Len of e_restingstate: 29
Len of n_compassion: 47
Len of e_compassion: 29
Len of n_restingstate: 47
Len of e_openmonitoring: 29
Len of n_compassion: 47
Len of e_compassion: 29
Len of n_openmonitoring: 47


In [11]:
## Novice vs. Expert (9)
get_save_classes(labels=labels_lt, level1=levels[0], level2=levels[1])

Len of e_: 87
Len of n_: 141


In [23]:
## Inter-trait (3)
for t1,t2 in itertools.combinations(tasks, 2):
    get_save_classes(labels=labels_lt, task1=t1, task2=t2)

Len of _restingstate: 76
Len of _openmonitoring: 76
Len of _restingstate: 76
Len of _compassion: 76
Len of _openmonitoring: 76
Len of _compassion: 76


In [29]:
## Triplet inter-trait (1)
get_save_classes(labels=labels_lt, task1=tasks[0], task2=tasks[1], task3=tasks[2])

Len of _restingstate: 76
Len of _openmonitoring: 76
Len of _compassion: 76
