# Build a map from wellid to glasser parcel

In [1]:
import os
import scipy
import pandas as pd
import numpy as np
import pickle
from pygest import algorithms
from statistics import mean, StatisticsError


In [2]:
""" Data inputs """

PYGEST_DATA = "/data"
CODEDIR = "/home/mike/Dropbox/projects/GE-Conn/HCP_FC/Glasser_GroupAvg"
DATADIR = "/home/mike/projects/AHBAProcessing/proc_data"

# A list of 360 labels of 180 Glasser parcels per hemisphere
glasser_label_file = os.path.join(CODEDIR, "HCP_GroupAvg_Glasser_Labels.txt")

# A label with x,y,z coordinates for 1280 wellids that can be mapped to Glasser parcels
wellid_coord_file = os.path.join(DATADIR, "ROIxGene_HCP_INT.mat.SampleCoordinates.csv")

# A pickled dataframe holding all AHBA-provided information on each sample (wellid)
all_ahba_sample_dataframe = os.path.join(PYGEST_DATA, "cache/all-samples.df")

In [3]:
def glasser_parcel_map():
    """ Read in a list of glasser parcel labels and return a 180-item int-to-label dict for mapping. """
    
    labels = pd.read_csv(glasser_label_file, header=None)
    left_labels = labels[180:][0]
    left_labels.index = range(1, 181)
    
    return left_labels.to_dict()


def aurina_coordinates_to_dataframe():
    """ Read in wellid x,y,z coordinates and average them over the glasser parcel they map to """
    
    coords = pd.read_csv(wellid_coord_file, header=None)
    coords.columns = ['parcel', 'x', 'y', 'z']
    coords['mni_xyz'] = [(x, y, z) for x, y, z in coords[['x', 'y', 'z']].values]
    
    parcel_coords = coords[['parcel', 'x', 'y', 'z']].groupby(['parcel']).mean()
    parcel_coords['mni_xyz'] = [(x, y, z) for x, y, z in parcel_coords[['x', 'y', 'z']].values]

    parcel_coords.index = parcel_coords.index.map(glasser_parcel_map())
    
    return parcel_coords, coords


def dist_3d(to_triple, from_triple):
    """ Return the Euclidean distance between two 3D vectors """
    
    return scipy.spatial.distance.euclidean(
        [to_triple[0], to_triple[1], to_triple[2]],
        [from_triple[0], from_triple[1], from_triple[2]]
    )


def wellid_from_xyz(series_of_triples, samples):
    """ Return the wellid from its x,y,z coordinates """
    
    dists = samples['mni_xyz'].apply(
        dist_3d,
        args=( (series_of_triples[0], series_of_triples[1], series_of_triples[2]), )
    )
    
    # For every sample, only one wellid should be within a mm of it. That's its id; return it.
    if len(dists[dists < 0.500]) < 1:
        print("No mapping for " + str(series_of_triples))
        return 0.0
    elif len(dists[dists < 0.500]) == 1:
        return dists[dists < 0.500].index[0]
    else:
        print("Mapping isn't 1-to-1; {} has {} matches.".format(
            str(series_of_triples), len(dists[dists < 0.500])
        ))
        return 0.0
    
    
def sample_to_parcel_map(parcels):
    """ Given a map from Glasser parcel to wellid, return a map from wellid to Glasser parcel. """
    
    with open(all_ahba_sample_dataframe, 'rb') as f:
        samples = pickle.load(f)
    samples = samples[samples["side"] == "L"]
    
    wellid_to_parcel_map = parcels['parcel'].map(glasser_parcel_map())
    wellid_to_parcel_map.index = parcels['mni_xyz'].apply(
        wellid_from_xyz, args=( samples, ),
    )
    
    return wellid_to_parcel_map.to_dict()



In [4]:
samp_glasser, samp_preglasser = aurina_coordinates_to_dataframe()
map_wellid_to_parcel = sample_to_parcel_map(samp_preglasser)


In [5]:
# Write out the map of 1,280 wellids as keys, each with its corresponding Glasser parcel as value.
with open("./wellid_to_glasser.map", "wb") as f:
    pickle.dump(map_wellid_to_parcel, f)
    