In [None]:
# Preprocessing

In [None]:
# General stuff
import scipy.io as sio
import numpy as np
import matplotlib.pyplot as plt
import os,re
import time
import nibabel as nib
from subprocess import Popen, PIPE
from os.path import join as opj
import math
from IPython.core.debugger import Tracer

# sklearn stuff
from sklearn.decomposition import TruncatedSVD,FastICA,CanICA,DictLearning
from sklearn.preprocessing import scale
import sklearn.metrics.cluster as metrics
from sklearn.cluster import AgglomerativeClustering
from sklearn.cluster.hierarchical import _hc_cut # Internal function to cut ward tree, helps speed up things a lot
from sklearn.utils import resample
from sklearn.model_selection import KFold

# personal functions
from importlib import reload
import myutils as mu
reload(mu)

if os.path.exists('/data1/vbeliveau/'):
    # On NRU server
    cluster_code='/data1/vbeliveau/5HT_clustering'
    cluster_data='/data1/vbeliveau/5HT_clustering_data'
    subjects_dir='/usr/local/nru/freesurfer/subjects'
elif os.path.exists('C:/Users/vbeliveau/'):
    # On laptop
    cluster_code='C:/Users/vbeliveau/Documents/GitHub/5HT_clustering'
    cluster_data='C:/Users/vbeliveau/Documents/5HT_clustering_data'
    subjects_dir='C:/Users/vbeliveau/Downloads/'
else:
    raise ValueError('Unknown location')
    
# Analysis directories
surf_data=opj(cluster_data,'surf_data')
mu.assert_dir(surf_data)
BPnd_data=opj(cluster_data,'BPnd')
mu.assert_dir(BPnd_data)
MFA_data=opj(cluster_data,'MFA')
MFA_preproc=opj(MFA_data,'preproc')
mu.assert_dir(MFA_data)
mu.assert_dir(MFA_preproc)

info_path=opj(cluster_code,'info_alltracers_base_healthy_hrrt.mat')
hemi_type=['lh','rh']

info=sio.loadmat(info_path)
tracers=['cumi','dasb','sb','az','C36']
petID=[item for subl in info['petID'] for subsubl in subl for item in subsubl]

In [None]:
# Create cortex masks

hemi_type=['lh','rh']
targ_list=['fsaverage','fsaverage5']

for targ in targ_list:
    for hemi in hemi_type:
        mu.fs_create_cortex_mask(subjects_dir,targ,hemi,validate=True,verbose=True,
                                 save_out=opj(surf_data,'mask.'+targ+'.'+hemi))

In [None]:
# Create neighbor lists

hemi_type=['lh','rh']
targ_list=['fsaverage5']
out_list=['matrix','list']

for targ in targ_list:
    for hemi in hemi_type:
        fmask=opj(surf_data,'mask.'+targ+'.'+hemi)
        for out_type in out_list:
            save_out=opj(surf_data,'neigh.'+targ+'.'+hemi)
            mu.fs_surf_neighborhood(opj(subjects_dir,targ,'surf',hemi+'.pial'),mask=fmask,
                                    verbose=True, out_type=out_type,save_out=save_out)

In [None]:
# Convert already existing BPnd maps

tracers=['cumi','dasb','sb','az','C36']
# tracers=['cumi']
# hemi_type=['lh','rh']
hemi_type=['lh']
targ='fsaverage5'
sm='10'
    
convert=True
convert_mean=False

maps_dest=opj(BPnd_data,'maps')
mu.assert_dir(maps_dest)

for tracer in tracers:
    
    print(tracer)
    
    surf_dest=maps_dest + '/' + tracer    
    mu.assert_dir(surf_dest)
    
    # If BPnd was not preprocessed (e.g. fsaverage5), do it here
    if convert:        
        log_dest=opj(surf_dest,'log')
        mu.assert_dir(log_dest)
        for hemi in hemi_type:
            # Convert BPnd from fsaverage to target
            sval=opj('/data1/vbeliveau/atlas/analyses/bmax_maps/data.nopvc.surf/' + 
                tracer + '.bpnd.mrtm2.nopvc.fsaverage.' + hemi + '.sm' +  sm + '.nii.gz')
            tval=opj(surf_dest,'mrtm2.nopvc.' + targ + '.' + hemi + '.sm' + sm + '.nii.gz')

            log_file=open(opj(log_dest,'mri_surf2surf.' + hemi +'.log'),'w')
            p=Popen(['mri_surf2surf','--srcsubject','fsaverage','--trgsubject',targ,
                    '--srchemi',hemi,'--trghemi',hemi,'--sval',sval,'--tval',tval], stdout=log_file, stderr=log_file)
            p.communicate()
            log_file.close()
            
    if convert_mean:
        log_dest=opj(surf_dest,'log')
        mu.assert_dir(log_dest)
        for hemi in hemi_type:
            # Convert BPnd from fsaverage to target
            sval=('/data1/vbeliveau/atlas/analyses/bmax_maps/data.nopvc.surf/' + 
                tracer + '.mean.bpnd.mrtm2.nopvc.fsaverage.' + hemi + '.sm' +  sm + '.nii.gz')
            tval=opj(surf_dest,'mean.mrtm2.nopvc.' + targ + '.' + hemi + '.sm' + sm + '.nii.gz')

            log_file=open(opj(log_dest,'mri_surf2surf.mean.' + hemi +'.log'),'w')
            p=Popen(['mri_surf2surf','--srcsubject','fsaverage','--trgsubject',targ,
                    '--srchemi',hemi,'--trghemi',hemi,'--sval',sval,'--tval',tval], stdout=log_file, stderr=log_file)
            p.communicate()
            log_file.close()

In [None]:
# Concatenate mean BPnd maps

tracers=['cumi','dasb','sb','az','C36']
hemi_type=['lh','rh']
targ='fsaverage5'
sm='10'

for hemi in hemi_type:
    img=np.ndarray(len(tracers),dtype=object)
    fmask=opj(surf_data,'mask.'+targ+'.'+hemi)
    for tracer,nt in zip(tracers,np.arange(0,len(tracers))):
        fname=opj(BPnd_data,'maps',tracer,'mean.mrtm2.nopvc.' + 
                  targ + '.' + hemi + '.sm' + sm + '.nii.gz')
        img[nt]=mu.fs_load_surf_data(fname,mask=fmask)
        img[nt]=scale(img[nt],axis=0)
    fout=opj(BPnd_data,'maps','mean.scaled.concat.'+targ+'.'+hemi+'.sm'+sm+'.nii.gz')
    mu.fs_save_surf_data(np.column_stack(img),fout,mask=fmask)

In [None]:
# Sample TACs to fsaverage surface

tracers=['cumi','dasb','sb','az','C36']
# tracers=['cumi']
hemi_type=['lh','rh']
targ='fsaverage5'
smooth=['0','5','10']

import_pet='/data1/vbeliveau/atlas/import/PET'
proc_pet='/data2/FSproc/PET'

for tracer in tracers:
    
    dest=opj(MFA_data,'surf_tacs')
    mu.assert_dir(dest)
    tracer_dest=opj(dest,tracer)
    mu.assert_dir(tracer_dest)
    log_dest=opj(tracer_dest,'log')
    mu.assert_dir(log_dest)
        
    subjlist=[item for item in petID if re.search('^'+tracer+'.*',item) is not None]
    
    for subj in subjlist:
        for hemi in hemi_type:
            for sm in smooth:

                mov=opj(import_pet,subj,'tac.realigned.nii.gz')
                reg=opj(proc_pet,subj,'tac.realigned.wavg.GD.lta')
                out=opj(tracer_dest,subj + '.' + hemi + '.' + targ + '.sm' + sm + '.nii.gz')
                cmd=['mri_vol2surf','--mov',mov,'--reg',reg,'--hemi',hemi,'--o',out,
                            '--trgsubject',targ,'--projfrac','0.5','--cortex']
                if sm != '0':
                    cmd.append('--surf-fwhm')
                    cmd.append(sm)

                log_file=open(opj(log_dest,'mri_vol2surf.' + subj + '.' + hemi + '.sm' +  sm + '.log'),'w')
                p=Popen(cmd, stdout=log_file, stderr=log_file)
                p.communicate() # This makes sure we wait for command to be executed before moving on
                log_file.close()

In [None]:
# Preprocess the raw data for MFA

tracers=['cumi','dasb','sb','az','C36']
# tracers=['cumi']
targ='fsaverage5'
smooth=['0','5','10']
hemi_type=['lh','rh'] # Note that this order is important when accessing MFA preproc data

for tracer in tracers:
    hemi_mask={}
    subjlist=[item for item in petID if re.search('^'+tracer+'.*',item) is not None]
    data_scaled=np.ndarray([len(subjlist)],dtype=object)
    eig1=np.empty(len(subjlist))
    for sm in smooth:
        for ns in np.arange(0,len(subjlist)):
            # Load and concatenate data
            data=None
            for hemi in hemi_type:
                fmask=opj(surf_data,'mask.'+targ+'.'+hemi)
                hemi_data=mu.fs_load_surf_data(opj(MFA_data,'surf_tacs',tracer,subjlist[ns] + '.' +
                                 hemi + '.' + targ + '.sm' + sm + '.nii.gz'),mask=fmask)
                if data is not None:
                    data=np.vstack((data,hemi_data))
                else:
                    data=hemi_data

            # Identify empty frames in current hemisphere, and compare to other hemisphere
            frames=np.sum(data,axis=0)>10e5 # Here 10e5 is a little random, any better test?            

            # Select valid frames, Scale data (mean=0 and var=1, row-wise) and compute first eigenval
            data_scaled[ns]=scale(data[:,frames],axis=1)
            eig1[ns]=np.linalg.svd(data_scaled[ns],compute_uv=False)[0]

        # Save out results
        np.savez(opj(MFA_preproc,tracer + '.' + targ + '.sm' + sm), data_scaled, eig1)

# The following warning was issued when running for the first time, but didn't appear when rerunning.
# Will need to investigate further...
# /data1/vbeliveau/software/anaconda3/lib/python3.5/site-packages/sklearn/preprocessing/data.py:160: UserWarning: Numerical issues were encountered when centering the data and might not be solved. Dataset may contain too large values. You may need to prescale your features.
#   warnings.warn("Numerical issues were encountered "
# /data1/vbeliveau/software/anaconda3/lib/python3.5/site-packages/sklearn/preprocessing/data.py:177: UserWarning: Numerical issues were encountered when scaling the data and might not be solved. The standard deviation of the data is probably very close to 0. 
#   warnings.warn("Numerical issues were encountered "