In [1]:
from cfg import *
import PrimaryLibrary as PL

In [2]:
API_PATH = "http://api.brain-map.org/api/v2/data"
GRAPH_ID = 1
PLANE_ID = 1 # coronal
MOUSE_PRODUCT_ID = 1 # aba

DATA_CON_SET_QUERY_URL = ("%s/SectionDataSet/query.json" +\
                          "?criteria=[failed$eqfalse]" +\
                          ",products[id$in5]" +\
                          ",[green_channel$eqrAAV]" +\
                          #",specimen(donor[transgenic_mouse_id$eqall donors])" +\
                          ",specimen(stereotaxic_injections(age[days$ge54],[days$le58]))" +\
                          ",plane_of_section[id$eq%d]" +\
                          "&include=specimen(stereotaxic_injections(age,stereotaxic_injection_materials,stereotaxic_injection_coordinates,primary_injection_structure)),specimen(donor(age))") \
                          % (API_PATH, PLANE_ID)

UNIONIZE_CON_FMT = "%s/ProjectionStructureUnionize/query.json" +\
               "?criteria=[section_data_set_id$eq%d],[is_injection$eqfalse]" +\
               "&include=hemisphere"

STRUCTURES_URL = ("%s/Structure/query.json?" +\
                      "criteria=[graph_id$eq%d]") \
                      % (API_PATH, GRAPH_ID)

    
DATA_EXP_SET_QUERY_URL = ("%s/SectionDataSet/query.json" +\
                          "?criteria=[failed$eq'false'][expression$eq'true']" +\
                          ",products[id$eq%d]" +\
                          ",plane_of_section[id$eq%d],genes" +\
                          "&include=genes") \
                          % (API_PATH, MOUSE_PRODUCT_ID, PLANE_ID)

UNIONIZE_EXP_FMT = "%s/StructureUnionize/query.json" +\
               "?criteria=[section_data_set_id$eq%d],structure[graph_id$eq1]" +\
               ("&include=section_data_set(products[id$in%d])" % (MOUSE_PRODUCT_ID)) +\
               "&only=id,structure_id,sum_pixels,expression_energy,section_data_set_id"
    

In [3]:
# Make a query to the API via a URL.
def QueryAPI(url):
    start_row = 0
    num_rows = 2000
    total_rows = -1
    rows = []
    done = False

    # The ontology has to be downloaded in pages, since the API will not return
    # more than 2000 rows at once.
    while not done:
        pagedUrl = url + '&start_row=%d&num_rows=%d' % (start_row,num_rows)

        print pagedUrl
        source = urllib.urlopen(pagedUrl).read()

        response = json.loads(source)
        rows += response['msg']

        if total_rows < 0:
            total_rows = int(response['total_rows'])

        start_row += len(response['msg'])

        if start_row >= total_rows:
            done = True

    print('Number of results: {}'.format(total_rows))
    return rows

In [4]:
def DownloadUnionizedData(dataSets):
    unionizes = [QueryAPI(UNIONIZE_FMT % (API_PATH,d['id'])) for d in dataSets]
    return unionizes

In [5]:
# Download the mouse brain structures in a structure graph.
def DownloadStructures():
    structs = QueryAPI(STRUCTURES_URL)
    # Build a dict from structure id to structure and identify each node's
    # direct descendants.
    structHash = {}
    for s in structs:
        s['num_children'] = 0
        s['structure_id_path'] = [int(sid) for sid in s['structure_id_path'].split('/') if sid != '']
        structHash[s['id']] = s

    for sid,s in structHash.iteritems():
        if len(s['structure_id_path']) > 1:
            parentId = s['structure_id_path'][-2]
            structHash[parentId]['num_children'] += 1

    ## pull out the structure ids for structures in this structure graph that
    ## have no children (i.e. just the leaves)
    ## corrStructIds = [sid for sid,s in structHash.iteritems() if s['num_children'] == 0]
    # RB: no, leave all structures in and filter later
    corrStructIds = structHash.keys()

    return sorted(corrStructIds), structHash

In [6]:
def CreateConnectivityMatrix(dataSets,structureIds,structHash,unionizes):
    # Each injection experiment will have a connectivity vector.  This vector will be as long
    # as the number of requested structures.
    nstructs = len(structureIds)
    ndata = len(unionizes)
    print('ndata {} ndatasets {}'.format(ndata,len(dataSets)))

    sidHash = dict([(id,i) for (i,id) in enumerate(structureIds)])
    didHash = dict([(d['id'],i) for (i,d) in enumerate(dataSets)])

    connectivityL = np.empty([nstructs,ndata])
    connectivityL.fill(np.nan)
    connectivityR = np.empty([nstructs,ndata])
    connectivityR.fill(np.nan)
 
    connectivityDict = {'projection_density': 0, 'projection_intensity': 0, 'projection_energy': 0, 'projection_volume': 0, 'normalized_projection_volume': 0}
    for key in connectivityDict.keys():
        connectivityDict[key] = np.empty([nstructs,ndata])
        connectivityDict[key].fill(np.nan)

    # For each data set's set of unionizes, then for each individual structure,
    # fill in the structure's connectivity vector.
    for i,us in enumerate(unionizes):
        # for each unionize
        for j,u in enumerate(us):
            sid = u['structure_id']
            did = u['section_data_set_id']

            struct = structHash[sid]
            struct['volume'] = u['sum_pixels']
            struct['coordinates'] = u['max_voxel_x'], u['max_voxel_y'], u['max_voxel_z']

            if i ==0 and j == 0:
              print u

            if sidHash.has_key(sid) and didHash.has_key(did):
                if u['hemisphere_id'] is 1:
                    connectivityL[sidHash[sid]][didHash[did]]  = u['normalized_projection_volume']
                elif u['hemisphere_id'] is 2:
                    connectivityR[sidHash[sid]][didHash[did]] = u['normalized_projection_volume']
                    for key in connectivityDict.keys():
                        connectivityDict[key][sidHash[sid]][didHash[did]] = u[key]
                elif u['hemisphere_id'] is 3:
                  pass
                  # this is just the average value of L+R
            else:
                print "ERROR: structure {}/injection {} skipped.".format(sid,did)

    return connectivityL, connectivityR

**Read_v2_Data function**  
*Author*: Nestor Timonidis  
*Description*:  
    This function calls a repository provided by Grange et al in [1]  
    and parses the voxelized gridded data of allen brain atlas version 2,
    that contain the in situ hybridization gene expression of 4104 genes  
*Input*:  
VoxelData -> the voxelized gridded gene expression data,  
voxel_annots ->  the annotations regarding the id of each voxel,  
gene_annots ->   the annotations regarding the name of each gene,  
allenGeneIds ->  the annotations regarding the id of each gene,  
dims ->  the number of dimensions that constitute the 3D brain grid

**Note**: the parsing of the voxelized gridded data is being achieved   
by the use of two files being stored in the Storage section: 'refAtlas.mat' and 'ExpEnergy.mat'.  
This is due to the big amount of time that it takes to automatically download the files from their repository.  
For archiving reasons though, the code in the following block demonstrates how the data were being automatically downloaded  
and extracted for usage.  


In [1]:
def Read_v2_Data(VoxelDict, Ref):   
    
    #clients = get_hbp_service_client()
    #collab_path = get_collab_storage_path()
    #clients.storage.download_file(collab_path + '/ExpEnergy.mat','/tmp/ExpEnergy.mat') 
    #clients.storage.download_file(collab_path + '/refAtlas.mat','/tmp/refAtlas.mat') 
    cnt = 0
    
    VoxelData = VoxelDict['D']
    voxel_annots = Ref['Ref']['Coronal'][0][0]['Filters'][0][0]['idxArray'][0][0][0][1]
    voxel_annots = np.asarray(voxel_annots,dtype = 'int32')
    voxel_annots = [val[0] for idx,val in enumerate(voxel_annots)]

    gene_filtered_ids = Ref['Ref']['Coronal'][0][0]['Genes'][0][0]['Filters'][0][0]['idxArray'][0][0][0][7]            
    gene_filtered_ids = [val[0] for idx,val in enumerate(gene_filtered_ids)]

    gene_annots = Ref['Ref']['Coronal'][0][0]['Genes'][0][0]['allenNames'][0][0][0][0]
    gene_annots = [val[0][0] for idx,val in enumerate(gene_annots)]
    gene_annots = [val for idx,val in enumerate(gene_annots) if (idx+1) in gene_filtered_ids]             
    gene_annots = np.asarray(gene_annots,dtype = 'string')
   
    allenGeneIds = Ref['Ref']['Coronal'][0][0]['Genes'][0][0]['allenGeneIds'][0][0][0][0]
    allenGeneIds = [val[0] for idx,val in enumerate(allenGeneIds)]
    allenGeneIds = [val for idx,val in enumerate(allenGeneIds) if (idx+1) in gene_filtered_ids]
    allenGeneIds = np.asarray(allenGeneIds,dtype = 'string')
    
    struct_infile = '../Cell_Density_Estimation/py_files/structures.csv'
    dims = Ref['Ref']['Coronal'][0][0]['size'][0][0][0]  

    return VoxelData,voxel_annots,gene_annots,allenGeneIds,dims

In [None]:
def CustomUnionization(InputMat, InjMat, InputMeta, resolution = 100):
    
    print('Custom Unionization script initialization!!')
    
    annotation = nrrd.read('../25 3 2019/annotation/ccf_2017/annotation_{}.nrrd'.format(resolution))
    flat_annot = np.ndarray.flatten(annotation[0])
    all_ids = np.unique(flat_annot)
    all_ids = np.delete(all_ids,0)
    with open('../25 3 2019/structures.csv','rb') as fp:
            structure       = PL.readtable(fp) 
                        
    struct_idx_dict = OrderedDict([(val,idx) for idx,val in enumerate(structure['id'])])

    inj_len = np.shape(InputMat)[3]
    str_len = len(struct_idx_dict.keys())
    left_point = np.shape(annotation[0])[2]
    middle_point = np.floor(left_point/2)
    structural_conn = np.zeros((str_len*2,inj_len))
    structural_conn2 = np.zeros((str_len*2,inj_len))
    print structural_conn.shape
    
    for source in range(inj_len):
        #injection_vol = len(np.nonzero(InjMat[:,:,:,source])[0])
        injection_vol = np.mean(InjMat[:,:,:,source])
        for annot,pos in struct_idx_dict.items():
            if int(annot) in all_ids:     
                print InputMeta[source]['structure-abbrev'], source, structure['acronym'][pos]
                annot_mask = np.where(annotation[0] == int(annot))
                split_right = annot_mask[2][annot_mask[2] < middle_point]
                split_left = annot_mask[2][annot_mask[2] >= middle_point]
                ipsi_subset = InputMat[annot_mask[0][annot_mask[2] < middle_point],\
                                       annot_mask[1][annot_mask[2] < middle_point],split_right,source]
                contra_subset = InputMat[annot_mask[0][annot_mask[2] >= middle_point],\
                                         annot_mask[1][annot_mask[2] >= middle_point],split_left,source]
                if len(split_right) > 0:
                    structural_conn2[pos, source] = len(np.nonzero(ipsi_subset)[0])/(1.0*injection_vol)
                    structural_conn[pos, source] = np.mean(ipsi_subset)/(1.0*injection_vol)
                if len(split_left) > 0:
                    structural_conn2[pos+str_len, source] = len(np.nonzero(contra_subset)[0])/(1.0*injection_vol)
                    structural_conn[pos+str_len, source] = np.mean(contra_subset)/(1.0*injection_vol)

            else:
                structural_conn[pos, source] = np.nan
                structural_conn[pos+str_len, source] = np.nan
                
    # save the complete matrix (both left and right inj):
    first_quarter = structural_conn[:(structural_conn.shape[0] / 2),:]
    second_quarter = structural_conn[(structural_conn.shape[0] / 2):, :]
    sc_down = np.concatenate((second_quarter, first_quarter), axis=0)
    structural_conn = np.concatenate((structural_conn, sc_down), axis=1)
    structural_conn = structural_conn / (np.nanmax(structural_conn))  # normalize the matrix
    
    first_quarter = structural_conn2[:(structural_conn2.shape[0] / 2),:]
    second_quarter = structural_conn2[(structural_conn2.shape[0] / 2):, :]
    sc_down = np.concatenate((second_quarter, first_quarter), axis=0)
    structural_conn2 = np.concatenate((structural_conn2, sc_down), axis=1)
    structural_conn2 = structural_conn2 / (np.nanmax(structural_conn2))  # normalize the matrix
    
    struct_idx_dict_2 = OrderedDict([(pos,structure['acronym'][pos]) for annot,pos in struct_idx_dict.items()\
                                     if int(annot) in all_ids])
    
    return structural_conn, structural_conn2, struct_idx_dict_2                             

In [None]:
def UnionizePerSource(OldMat,Meta, level = 'fine'):

    if level == 'full':
        unique_sources = np.unique([inj['structure-abbrev'] for inj in Meta])
    elif level == 'coarse':
        with open('structures.csv','rb') as fp:
            structure       = PL.readtable(fp) 
        tvmb_ids = pk.load(open('ids_for_tvmb.pkl','rb'))
        unique_sources = [structure['acronym'][id2] for idx in tvmb_ids 
                          for id2 in range(len(structure['id'])) if structure['id'][id2] == idx]
    
     # Gathering clusters of injections to be averaged later on
    inj_clusters = OrderedDict()
    for idy,source in enumerate(unique_sources):
        inj_cls  = [i for i,inj in enumerate(WTMeta) if inj['structure-abbrev'] == source]
        if len(inj_cls) > 0: 
            if source not in inj_clusters: inj_clusters[source] = []
            inj_clusters[source].extend(inj_cls) 
    
    # More experiments...
    black_sheeps = [idx for idx,val in unique_sources if val not in inj_clusters.keys()]
    OldMat = np.delete(OldMat, black_sheeps,axis = 0)
    #  *************************#
    # For each cluster of injections, average their values and store it in the respective source space
    NewMat = np.zeros((len(OldMat),len(unique_sources)))
    idx = 0
    for source in inj_clusters.keys():
        NewMat[:,idx] = np.mean(OldMat[:,inj_clusters[source]], axis = 1)
        idx+=1
        
    # Mirror projections at the source level, assuming that projections 
    # from the left hemisphere are the same as the right one    
    right_target_hemi = NewMat[:(NewMat.shape[0] / 2),:]
    left_target_hemi = NewMat[(NewMat.shape[0] / 2):,:]
    sc_down = np.concatenate((left_target_hemi, right_target_hemi), axis = 0)
    fullNewMat = np.concatenate((NewMat, sc_down), axis = 1)
    fullNewMat = fullNewMat / (np.amax(fullNewMat))  # normalize the matrix
    
    return fullNewMat, inj_clusters    

In [None]:
# Creates an xml file with information related to the Allen CCF v3 annotation volume
# to be used by fsl
import xml.etree.ElementTree as ET
def CustomXmlMaker(annot_path = '../25 3 2019', resolution = 200, outfile = 'saves/allen_annotation_atlas.xml'):
    mcc = MouseConnectivityCache(manifest_file='connectivity/mouse_connectivity_manifest.json')
    mask_path =  '{}/annotation_{}.nrrd'.format(annot_path,resolution)
    annot_200 = nrrd.read(mask_path)
    unique_ids = np.unique(annot_200[0])
    annot_shape = annot_200[0].shape
    structure_tree = mcc.get_structure_tree()
    ancestry_trg_dict = structure_tree.ancestors(unique_ids[1:len(unique_ids)]) 
    primary_list = ['CTX', 'OLF', 'HIP', 'RHP', 'STR', 'PAL', 'TH', 'HY', 'MB', 'P', 'MY', 'CB']

    cnt = 0
    tree = ET.parse('template_atlas.xml')
    root = tree.getroot()
    child=ET.Element("data")
    for x in range(annot_shape[0]):
        for y in range(annot_shape[1]):
            for z in range(annot_shape[2]):  
                data = root.find('data')
                kid = ET.SubElement(data, "label")
                kid.attrib = {}
                if annot_200[0][x,y,z] == 0: 
                    kid.text = 'background' 
                else:    
                    kid.text = [val[1]['name'] if len(val) > 1 else val[0]['name'] for val in ancestry_trg_dict if val[0]['id'] == annot_200[0][x,y,z]][0]   
                    print kid.text
                kid.attrib['index'] = str(1) #str(annot_200[0][x,y,z])
                kid.attrib['x'] = str(x)
                kid.attrib['y'] = str(y)
                kid.attrib['z'] = str(z)
                #data.append(kid)


    tree = ET.ElementTree(root)
    #print ET.tostring(root)   
    tree.write(outfile)

In [7]:
def CreateExpressionMatrix(dataSets,structureIds,structHash,unionizes):
    # Each structure will have an expression vector.  This vector will be as long
    # as the number of requested structures.
    nstructs = len(structureIds)
    ndata = len(unionizes)

    sidHash = dict([(id,i) for (i,id) in enumerate(structureIds)])
    didHash = dict([(d['id'],i) for (i,d) in enumerate(dataSets)])

    expression = np.empty([nstructs,ndata])
    expression.fill(np.nan)

    # For each data set's set of unionizes, then for each individual structure,
    # fill in the structure's expression vector.
    for i,us in enumerate(unionizes):
        # for each unionize
        for j,u in enumerate(us):
            sid = u['structure_id']
            did = u['section_data_set_id']

            struct = structHash[sid]
            struct['volume'] = u['sum_pixels']

            if sidHash.has_key(sid) and didHash.has_key(did):
                expression[sidHash[sid]][didHash[did]] = u['expression_energy']

    return expression

In [8]:
def allChildren(acr,acr2parent):
   # Description: given a tree hierarchy and an entity,
   # this function returns all the children
   # of the entity

   AC = []
   for a,p in acr2parent.items():
     if p == acr:
       AC.append(a)
       AC.extend(allChildren(a,acr2parent))
   return AC

In [9]:
def ReduceToLeafNodes(structure_acronyms,tree_file):
    # Description: this function checks the givenReduceToLeafNodes structures
    #              based on the tree hierarchy and returns the
    #              leaf nodes

    leaf_nodes = []
    with open(tree_file) as fp:
        acr2parent = json.load(fp)

    for idx,acro in enumerate(structure_acronyms):
        AC = allChildren(acro, acr2parent)
        if len(AC) == 0: # structure is a leaf node
           leaf_nodes.append((idx,acro))
    return leaf_nodes


In [10]:
def GetConUnionizes():

    infile1 = 'expression_files/inj_unionizes.nrrd'
    infile2 = 'expression_files/exp_density.nrrd'
    mcc = MouseConnectivityCache(resolution = 100)
    experiments = mcc.get_experiments(cre = True, dataframe=True)
    uni_con = []
    for idx,val in enumerate(experiments['id']):
        print idx
        tmp = mcc.get_experiment_structure_unionizes(experiment_id = val)
        uni_con.append(tmp)
    print uni_con.shape
    fp = h5py.File('unionized_connectivity.hdf5','w')
    fp.create_dataset('dataset1',data = uni_con)

In [11]:
def GetCreLines(infile = None):
    infile1 = 'cre_inj_density.nrrd'
    infile2 = 'cre_pro_density.nrrd'
    mcc = MouseConnectivityCache(resolution = 100, cache = True, manifest_file = 'voxel_model_manifest.json')
    mca = MouseConnectivityApi()
    cre_experiments = mcc.get_experiments(cre = True, dataframe=True)
    
    MetaPerCre      = []
    InjPerCre       = []
    ProjPerCre      = []
    creDict         = {}
    with open('Supplementary Table 1.csv') as fp:
         buff = csv.reader(fp)
         for idx,row in enumerate(buff):
             if len(row) > 1: # concatenate the two rows together - error caused by csv transition
                row[0] =  row[0] + row[1]
             remains = [x for x in filter(None,row[0].split(';'))]
             if remains[0].isdigit() and remains[1] in infile:
             #if len(remains) > 8 and idx > 2 and remains[1].isdigit() == False:
                creDict[remains[1]] = []
                creDict[remains[1]].append(remains[7])
                creDict[remains[1]].append(remains[8])

    # download the projection density volume for one of the experiments
    for idx,val in enumerate(cre_experiments['id']):
         
        mcc.get_projection_density(val, infile2)
        mcc.get_injection_fraction(val, infile1)
        #mcc.get_injection_density(val, infile1)
        tmp = cre_experiments['transgenic_line'][val]
        if 'A93' in cre_experiments['transgenic_line'][val]:
            tmp = 'A93-Tg1-Cre'
        selCre = [key for key in creDict.keys() if tmp == key]
        #if len(selCre) > 0:
        if tmp in creDict.keys():
            selCre = selCre[0]
            MetaPerCre.append({})
            rx = len(MetaPerCre)-1
            MetaPerCre[rx]['injection-coordinates'] = \
            [cre_experiments['injection_x'][val],
             cre_experiments['injection_y'][val],
             cre_experiments['injection_z'][val],
            ]
            MetaPerCre[rx]['injection_volume'] = \
              cre_experiments['injection_volume'][val]
            MetaPerCre[rx]['structure-abbrev'] =\
             cre_experiments['structure_abbrev'][val]
            MetaPerCre[rx]['transgenic-line'] = tmp
            MetaPerCre[rx]['id'] = cre_experiments['id'][val]
            MetaPerCre[rx]['layer'] = creDict[selCre][0]
            MetaPerCre[rx]['Cell Type'] = creDict[selCre][1]
            # read it into memory
            id_array, id_info = nrrd.read(infile1)
            InjPerCre.append(id_array)
            pd_array, pd_info = nrrd.read(infile2) 
            ProjPerCre.append(pd_array)
            
    InjPerCre = np.asarray(InjPerCre); ProjPerCre = np.asarray(ProjPerCre)
    f2 = h5py.File('temporary_storage/InjPerCre.hdf5','w')
    f2.create_dataset('dataset1',data = InjPerCre)
    #f2 = h5py.File('new_saves/ProjPerCre.hdf5','w')
    #f2.create_dataset('dataset1',data = ProjPerCre)
    #pk.dump(MetaPerCre,open('new_saves/MetaPerCre.pkl','wb')) 
    return MetaPerCre, ProjPerCre


In [2]:
def ReadConnectivityData():
    infile1 = 'expression_files/inj_density.nrrd'
    infile2 = 'expression_files/proj_density.nrrd'
    #mca = MouseConnectivityApi()
    mcc = MouseConnectivityCache(resolution = 100, cache = True, 
                                 manifest_file = 'voxel_model_manifest.json')
    experiments = mcc.get_experiments(cre = False, dataframe=True)
    # get metadata for all non-Cre experiments
    #experiments = mca.experiment_source_search(injection_structures = 'root', transgenic_lines = 0)
    ProjPerExp = []
    MetaPerInj = []
    InjPerExp  = []
    
    # download the projection density volume for one of the experiments
    for idx,val in enumerate(experiments['id']):
        experiment_file = 'experiment_' + str(val) 
        if experiment_file in os.listdir('./'):
            pd_array, pd_info = nrrd.read(experiment_file + '/projection_density_100.nrrd') 
        else:    
            mcc.get_projection_density(val, infile2)
            # read it into memory
            pd_array, pd_info = nrrd.read(infile2)
            mcc.get_injection_fraction(val, infile1)
            # read it into memory
            id_array, id_info = nrrd.read(infile1)
        
        MetaPerInj.append({})
        for key,item in experiments.iteritems():
            MetaPerInj[idx][key] = item.iloc[idx]
        ProjPerExp.append(pd_array)
        InjPerExp.append(id_array)
   
    InjPerExp = np.asarray(ProjPerExp,dtype = 'float32')
    ProjPerExp = np.asarray(ProjPerExp,dtype = 'float32')
    #f2 = h5py.File('ProjPerExp.hdf5','w')
    #f2.create_dataset('dataset1',data = ProjPerExp)
    f2 = h5py.File('temporary_storage/InjPerExp.hdf5','w')
    f2.create_dataset('dataset1',data = InjPerExp)
    #pk.dump(MetaPerInj,open('new_saves/MetaPerInj.pkl','wb'))
    return MetaPerInj, ProjPerExp

In [None]:
def StructuralDistance(str_cords):
    
    middle_ground = len(str_cords)/2
    DistMat_tmp = np.zeros((np.shape(str_cords)[0],middle_ground),np.float)
    for row in range(middle_ground):
        for col in range(middle_ground):
            DistMat_tmp[row,col] = np.sqrt(np.sum((str_cords[row,:]-str_cords[col,:])**2))
            DistMat_tmp[row+middle_ground,col] = np.sqrt(np.sum((str_cords[row+middle_ground,:]-str_cords[col,:])**2))
    
    right_target_hemi = DistMat_tmp[:middle_ground,:]
    left_target_hemi = DistMat_tmp[middle_ground:,:]
    DistMat_flip = np.concatenate((left_target_hemi, right_target_hemi), axis = 0)
    DistMat = np.concatenate((DistMat_tmp, DistMat_flip), axis = 1)
    
    return DistMat

In [None]:
def ConnectomeStorage(folder_name,data_name, weights, distances, labels, centres, hemispheres):
    
    labels = np.array(labels, dtype='|S58')
    fp = h5py.File('{}/{}.h5'.format(folder_name,data_name),'w')
    fp.create_dataset('hemispheres',data = hemispheres, dtype='|b1')
    fp.create_dataset('weights',data = weights, dtype='f8')
    fp.create_dataset('tract_lengths',data = distances, dtype='f8')
    fp.create_dataset('region_labels',data = labels, dtype='|S58') 
    fp.create_dataset('centres',data = centres, dtype='|f8') 
    fp.close()

In [None]:
def rotate_reference(allen):
    # first rotation in order to obtain: x1=x2, y1=z2, z1=y2
    vol_trans = np.zeros((allen.shape[0], allen.shape[2], allen.shape[1]), dtype=float)
    for x in range(allen.shape[0]):
        vol_trans[x, :, :] = (allen[x, :, :][::-1]).transpose()

    # second rotation in order to obtain: x1=z2, y1=x1, z1=y2
    allen_rotate = np.zeros((allen.shape[2], allen.shape[0], allen.shape[1]), dtype=float)
    for y in range(allen.shape[1]):
        allen_rotate[:, :, y] = (vol_trans[:, :, y]).transpose()
    return allen_rotate

In [None]:
def construct_centres(mcc, order, key_ord, NewMat,structure):
    #centres = np.zeros((len(key_ord) * 2, 3), dtype=float)
    Affinity = np.asarray([[0,0,0.001,-5.7125],
                           [-0.001,0,0,5.3625],
                           [0,-0.001,0,5.1625]])
    names = []
    row = -1
    str_ids = [structure['id'][idx2] for val in NewMat[1].keys() 
                 for idx2,val2 in enumerate(structure['acronym']) if val == val2]
    centres = np.zeros((len(str_ids) * 2, 3), dtype=float)
    #for graph_ord_inj in key_ord:
    for idx,node_id in enumerate(str_ids):
        #node_id = order[graph_ord_inj][0]
        coord = [0, 0, 0]
        mask, _ = mcc.get_structure_mask(node_id)
        mask = rotate_reference(mask)
        mask_r = mask[:mask.shape[0] / 2, :, :]
        xyz = np.where(mask_r)
        if xyz[0].shape[0] > 0:  # Check if the area is in the annotation volume
            coord[0] = np.mean(xyz[0])
            coord[1] = np.mean(xyz[1])
            coord[2] = np.mean(xyz[2])
        row += 1
        #centres[row, :] = coord
        centres[row, :] = Affinity.dot([coord[0],coord[1],coord[2],1])
        coord[0] = (mask.shape[0]) - coord[0]
        #centres[row + len(key_ord), :] = coord
        centres[row + len(key_ord), :] = Affinity.dot([coord[0],coord[1],coord[2],1])
        #n = order[graph_ord_inj][1]
        n = NewMat[1].keys()[idx]
        right = 'Right '
        right += n
        right = str(right)
        names.append(right)
        sel_centre = centres[idx,:]
        print sel_centre
    for idx,node_id in enumerate(str_ids):
        #n = order[graph_ord_inj][1]
        n = NewMat[1].keys()[idx]
        left = 'Left '
        left += n
        left = str(left)
        names.append(left)
       
    return centres, names

In [13]:
def ParseAtlasData():
    
    # *********** initial download of tracing experiments and brain structures ***********#
    print 'Commencing cre-line mission'
    # *********** Partition tracers based on their cre-line *****#
    cre15      = ['Syt6-Cre_KI148', 'Ntsr1-Cre_GN220', 'Sim1-Cre_KJ18',
                'Efr3a-Cre_NO108', 'Chrna2-Cre_OE25', 'A93-Tg1-Cre',
                'Tlx3-Cre_PL56', 'Rbp4-Cre_KL100', 'Rorb-IRES2-Cre',
                'Scnn1a-Tg3-Cre', 'Nr5a1-Cre', 'Sepw1-Cre_NP39',
                'C57BL/6J', 'Emx1-IRES-Cre', 'Cux2-IRES-Cre']
    #CreMeta, ProjPerCre = GetCreLines(infile = cre15)
    print 'Cre-line parsing has been completed'
    print 'Commencing rAAv mission'
    #WTMeta, ProjPerExp = ReadConnectivityData()
    WTMeta = pk.load(open('saves/MetaPerInj.pkl','rb'))
    print 'rAAv parsing has been completed'
    #structureIds,structHash = DownloadStructures() 
    #pk.dump(structureIds, open('saves/structureIds.pkl','wb'))
    #pk.dump(structHash, open('saves/structHash.pkl','wb'))
    structureIds      = pk.load(open('saves/structureIds.pkl','rb'))
    structHash        = pk.load(open('saves/structHash.pkl','rb'))
    with open('structures.csv', "w") as fp:
        M = []
        for sid in structureIds:
            v = structHash[sid]
            M.append([v['id'], v['acronym'], v['name'],
                      v['parent_structure_id'], v['color_hex_triplet']])
        w = csv.writer(fp, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
        w.writerow(['id', 'acronym', 'name',
                    'parent_structure_id', 'color_hex_triplet'])
        for line in M:
            w.writerow(line)

    #pk.dump(structureIds,open('structureIds.pkl','wb'))
    #pk.dump(structHash,open('structHash.pkl','wb'))
    #*********** wild type-data unionization **************************************#
    unionizes_wt_proj = [QueryAPI(UNIONIZE_CON_FMT % (API_PATH,d['id'])) for d in WTMeta]
    #mcc = MouseConnectivityCache(resolution = 100, cache = True, manifest_file = 'voxel_model_manifest.json')
    #unionizes_wt_proj = mcc.get_structure_unionizes([val['id'] for val in WTMeta], is_injection=False)
    #pk.dump(unionizes_wt_proj, open('unionizes_wt_proj.pkl', 'wb'))
    #unionizes_wt_proj = pk.load(open('unionizes_wt_proj.pkl', 'rb'))
    #np.asarray(unionizes_wt_proj[['max_voxel_x','max_voxel_y','max_voxel_z']])
    print 'unionizing of wild-type is complete'
  
    conL, conR = CreateConnectivityMatrix(WTMeta,
                                           structureIds,structHash,
                                           unionizes_wt_proj)
    fp1 = h5py.File('conL.hdf5', 'w')
    fp2 = h5py.File('conR.hdf5', 'w')
    fp1.create_dataset('dataset1', data = conL)
    fp2.create_dataset('dataset1', data = conR)
    
    # Cre-data unionization *************************************************#
    #unionizes_cre_proj = [QueryAPI(UNIONIZE_CON_FMT % (API_PATH,d['id'])) for d in CreMeta]
    #pk.dump(unionizes_cre_proj, open('unionizes_cre_proj.pkl', 'wb'))
    print 'unionization is complete'
    #cre_pr_L, cre_pr_R = CreateConnectivityMatrix(CreMeta, structureIds, structHash, unionizes_cre_proj)
    #fp1 = h5py.File('cre_pr_L.hdf5', 'w')
    #fp2 = h5py.File('cre_pr_R.hdf5', 'w')
    #fp1.create_dataset('dataset1', data = cre_pr_L)
    #fp2.create_dataset('dataset1', data = cre_pr_R)
    #************************************************************************#


    # Expression Data Parsing
    ExpMeta = QueryAPI(DATA_EXP_SET_QUERY_URL)
    #pk.dump(ExpMeta,open('GeneMeta.pkl','wb'))
    unionizes_exp = [QueryAPI(UNIONIZE_EXP_FMT % (API_PATH,d['id'])) for d in ExpMeta]
    gene_expression = CreateExpressionMatrix(ExpMeta,structureIds,structHash,unionizes_exp)
    #fp1 = h5py.File('G_Exp.hdf5','w')
    #fp1.create_dataset('dataset1',data = gene_expression)
    
    ''' 
    conR              = pk.load(open('saves/conR.pkl','rb'))
    #conL              = pk.load(open('saves/conL.pkl','rb'))
    CreMeta           = pk.load(open('MetaPerCre.pkl','rb'))
    cre_pr_L          = h5py.File('cre_pr_L.hdf5', 'r')['dataset1']
    cre_pr_R          = h5py.File('cre_pr_R.hdf5', 'r')['dataset1']
    #unionizes_wt_proj = pk.load(open('unionizes_wt_proj.pkl', 'rb')) 
    '''

    creCategories = [cre['transgenic-line'] for cre in CreMeta]
    creCategories = list(set(creCategories))

    Affinity = np.asarray([[0,0,0.001,-5.7125],
                           [-0.001,0,0,5.3625],
                           [0,-0.001,0,5.1625]])

    InjCoo  = []
    InjCoo2 = []
    for idx,injection in enumerate(WTMeta):
      coord = injection['injection-coordinates'];
      coord = np.array([coord[0],coord[1],coord[2],1.0]);
      InjCoo.append( Affinity.dot(coord) )
    InjCoo = np.asarray(InjCoo,dtype = 'float32')      # Wild_type coordinates
    for idx,injection in enumerate(CreMeta):
      coord = injection['injection-coordinates'];
      coord = np.array([coord[0],coord[1],coord[2],1.0]);
      InjCoo2.append( Affinity.dot(coord) )
    InjCoo2 = np.asarray(InjCoo2); InjCoo = np.asarray(InjCoo) 
    
    CreLineDict = OrderedDict()
    for category in cre15:
        cre_members = np.asarray([idx for idx, val in enumerate(CreMeta)\
        if val['transgenic-line'] == category])
        if len(cre_members) > 0:
             
            CreLineDict[category] = \
            {'ConMat' : cre_pr_R[:, cre_members],\
            'structure-abbrev' : [CreMeta[val]['structure-abbrev'] 
                                  for val in cre_members],\
            'layer'       : [CreMeta[val]['layer'] for val in cre_members],\
            'cell-type'   : [CreMeta[val]['Cell Type'] for val in cre_members],\
            'indices'     : cre_members,\
            'id'          : [CreMeta[val]['id'] for val in cre_members],\
            'Coordinates' : InjCoo2[cre_members,:],\
            'injection_volume': [CreMeta[val]['injection_volume']
                                 for val in cre_members]}


    CreLineDict['wild_type'] = {'ConMat' : conR, \
                                'structure-abbrev' : \
                                [val['structure-abbrev'] for val in WTMeta],\
                                'layer'    : ['inspecific' for idx in range(len(WTMeta))],\
                                'cell-type': ['inspecific' for idx in range(len(WTMeta))],\
                                'id'       : [val['id'] for val in WTMeta],\
                                'Coordinates' : InjCoo,\
                                'injection_volume': [val['injection-volume'] for val in WTMeta]}
    
    ConDict        = pk.dump(CreLineDict, open('scrambled_debug/CreLineDict.pkl','wb'))
    
    return CreLineDict

### Make an xml file with the Allen anatomical template

### Experimental: Custom Unionization Script

### Final Step - Pack the connectome in an hdf5 file in a format ready to be used by the TVMB pipeline