- Normalize microarray expression for each of 6 donors to perform AUROC for SRP gene list individually on each donor
- Get CHAT expression across sampled brain structures in each donor
- Output values to csv file to generate plot in R

In [1]:
import glob
from pathlib import Path
import os
import pandas as pd
from pandas.api.types import is_numeric_dtype
from scipy.stats import zscore


adult_data_path = Path('./data/raw/allen_HBA')
fetal_data_path = Path('./data/raw/allen_human_fetal_brain')


def read_expression_file(file_name):
    expression_df = pd.read_csv(file_name, index_col=0, header=None)
    expression_df.index.rename('probe_id', inplace=True)
    return expression_df


def read_samples_file(samples_file):
    sample_df = pd.read_csv(samples_file)
    sample_df.set_index(sample_df.index + 1, inplace=True)
    sample_df.index.rename('sample_id', inplace=True)
    return sample_df


def get_probes_data(probes_file, probes_strategy='default'):
    strats = ['default', 'reannotator', 'qc_filter', 'qc_scale']
    assert probes_strategy in strats

    # depending on strategy, may merge in diff tables to update probes info
    probes_df = pd.read_csv(probes_file)

    # rename columns for consistency between adult and fetal brain datasets
    if 'probeset_name' in probes_df.columns:
        probes_df.rename(columns={'probeset_name': 'probe_name',
                                  'probeset_id': 'probe_id'}, inplace=True)
    cols = ['probe_id', 'probe_name', 'gene_symbol']
    probes_df = probes_df.loc[:, cols]

    if probes_strategy == 'reannotator':
        reannotations = get_probe_reannotations(
            './data/raw/gene_symbol_annotations/AllenInstitute_custom_Agilent_Array.txt')
        # drop the original gene_symbol column
        probes_df.drop('gene_symbol', axis=1, inplace=True)
        # merge in the reannotated gene_symbols
        probes_df = probes_df.merge(reannotations, on='probe_name')

    elif probes_strategy in ['qc_filter', 'qc_scale']:
        # update gene_symbols using same method as was used in Rscript spatial transcriptomic analysis
        probes_df.drop('gene_symbol', axis=1, inplace=True)
        updated_annotations = pd.read_csv('./data/probe_annotations/updated_gene_symbol_annotations.csv')
        probes_df = probes_df.merge(updated_annotations, on='probe_name', how='left')
        #print(probes_df.head())
        qc_filt = get_probe_qc_filter(
            './data/probe_annotations/Miller et al. doi.org_10.1186_1471-2164-15-154 12864_2013_7016_MOESM8_ESM.xlsx')
        probes_df = probes_df.merge(
            qc_filt, left_on='probe_name', right_on='probe')
        probes_df = probes_df[probes_df.qc_filter == True]
        assert is_numeric_dtype(probes_df.m)
        assert is_numeric_dtype(probes_df.b)

        print('After getting probes_df which merged qc data, shape is {}'.format(
            probes_df.shape))

    probes_df.set_index('probe_id', inplace=True)

    return probes_df


def get_probe_reannotations(re_annotations_file):
    # pre-processing function to prepare the probe reannotations file to be merge with probes df
    re_annotations = pd.read_table(re_annotations_file,
                                   usecols=['#PROBE_ID', 'Gene_symbol'])
    re_annotations.rename(columns={'#PROBE_ID': 'probe_name'}, inplace=True)
    re_annotations.dropna(inplace=True)
    re_annotations.set_index('probe_name', inplace=True)
    # split gene_symbols which have multiple genes associated with a single
    # probe_name creates a new row for each of the gene_symbols
    re_annotations = (re_annotations.Gene_symbol.str.split(';', expand=True)
                                    .stack()
                                    .reset_index()
                                    .drop('level_1', axis=1)
                                    .rename(columns={0: 'gene_symbol'}))

    return re_annotations


def get_probe_qc_filter(qc_file):
    # pre-processes the Allen qc filter file
    df = pd.read_excel(qc_file)
    df = df.iloc[:, [0, 1, 2, 3, 7]].drop(0)  # .rename(columns=[col_names])
    col_names = ['probe', 'gene_symbol', 'm', 'b', 'qc_filter']
    df.columns = col_names
    df[['m', 'b']] = df[['m', 'b']].apply(pd.to_numeric)
    return df.drop('gene_symbol', axis=1)


def scale_expression_vals(exp_df, probes_qc):
    print('Expression df shape before scaling: {}'.format(exp_df.shape))
    globScale = exp_df.values.flatten().mean()
    exp_df = exp_df - globScale

    df = exp_df.merge(probes_qc.loc[:, ['m', 'b']],
                      left_index=True, right_index=True)
    df.m = df.m.astype(float)
    df.b = df.b.astype(float)
    try:
        df = df.apply(lambda x: df.m * x + df.b)
    except Exception as e:
        print('Exception occurred on row when applying scale fxn, skip row')
        print(e)
    df = df.drop(['m', 'b'], axis=1)

    df.dropna(inplace=True)
    print('Expression df shape after scaling: {}'.format(df.shape))
    return df + globScale


def get_donor_data(donor_file_list, probes_strategy):
    # to work with both fetal and adult metadata
    probe_file_strings = ['Probes', 'rows_meta']
    samples_file_strings = ['Sample', 'columns_meta']
    expression_file_strings = ['Expression', 'expression']

    for file in donor_file_list:
        if any(string in file.stem for string in probe_file_strings):
            probes_df = get_probes_data(file, probes_strategy=probes_strategy)
        elif any(string in file.stem for string in samples_file_strings):
            samples_df = read_samples_file(file)
        elif any(string in file.stem for string in expression_file_strings):
            exp_df = read_expression_file(file)
        else:
            continue

    return exp_df, samples_df, probes_df


def get_mean_expression_by_brain_area(exp_df, samples_df):
    assert(exp_df.T.shape[0] == samples_df.shape[0])

    # merge in metadata (brain area of sample)
    annotated_df = exp_df.T.merge(samples_df[['structure_name']],
                                  left_index=True, right_index=True)

    # get mean expression level for samples within a brain area
    expression_by_structure = annotated_df.groupby('structure_name').mean()
    expression_by_structure.T.index.rename('gene_symbol', inplace=True)

    return expression_by_structure.T


def get_exp_by_genes(exp_df, probes_df):
    """
    input is exp_df and probes metadata
    output is exp_df grouped by gene_symbols and averaged
    """
    annotated_df = exp_df.merge(probes_df[['gene_symbol']],
                                left_index=True, right_index=True)

    exp_by_genes = (annotated_df.groupby('gene_symbol')
                                .mean())

    return exp_by_genes


def strip_left_right(structure_name):
    brain_area_fragments = structure_name.split(',')
    clean_fragments = []

    for frag in brain_area_fragments:
        if frag.strip() not in ['left', 'right', 'Left', 'Right']:
            clean_fragments.append(frag)

    clean_structure_name = ','.join(clean_fragments)
    return clean_structure_name


def get_single_donor_tidy_df(exp_df, samples_df, probes_df, donor_id, probes_strategy):
    # remove left/right from brain structure_names
    samples_df.structure_name = samples_df.structure_name.apply(
        strip_left_right)

    if probes_strategy == 'qc_scale':
        exp_df = scale_expression_vals(exp_df, probes_df)
        print('size of df after scaling: {}'.format(exp_df.shape))

    # merge in probes metadata and get expression by gene_symbol
    expression_by_genes = get_exp_by_genes(exp_df, probes_df)
    print('size of df after merging probe info, grouping by gene: {}'.format(
        expression_by_genes.shape))
    
    # merge in sample metadata and aggregate expression by brain area
    exp_brain_area_by_genes = get_mean_expression_by_brain_area(expression_by_genes, samples_df)
    

    ranked_exp_by_area = exp_brain_area_by_genes.rank(ascending=True)
    zscored_exp_by_area = pd.DataFrame(zscore(ranked_exp_by_area, axis=1), 
                                       index=ranked_exp_by_area.index,
                                       columns= ranked_exp_by_area.columns)
    #zscored_exp_by_area = ranked_exp_by_area.apply(zscore, axis=1, result_type='broadcast')

    melted = pd.melt(zscored_exp_by_area.reset_index(),
                     id_vars='gene_symbol',
                     var_name='brain_area')
    melted['donor_id'] = donor_id

    return melted


def generate_HBA_dataset(dataset, probes_strategy, selected_donor_ids=['10021', '9861', '14380', '15697', '15496', '12876']):
    assert dataset in ['adult', 'fetal']
    if dataset == 'adult':
        print('--- Generating adult human HBA dataset ---')
        #data_path = Path('./data/raw/allen_HBA')
        data_path = adult_data_path
    elif dataset == 'fetal':
        print('--- Generating fetal human HBA dataset ---')
        #data_path = Path('./data/raw/allen_human_fetal_brain')
        data_path = fetal_data_path


    selected_donor_paths = []
    for donor_id in selected_donor_ids:
        selected_donor_paths.append(list(data_path.glob(f'normalized_microarray*{donor_id}*')))

    all_donors = []
    for i, donor_folder in enumerate(selected_donor_paths):
        print(f'Processing donor #{i+1}')
        #donor_id = donor_folder.split('/')[-1].split('_')[-1]
        #donor_folder = list(donor_folder)
        donor_id = donor_folder[0].stem.split('_')[-1]
        print(f'Donor ID: {donor_id}')
        #donor_files = glob.glob(os.path.join(donor_folder, '*'))
        donor_files = list(donor_folder[0].glob('*.csv'))
        expression, samples, probes = get_donor_data(donor_files,
                                                     probes_strategy)

        tidy_donor = get_single_donor_tidy_df(expression,
                                              samples,
                                              probes,
                                              donor_id=donor_id,
                                              probes_strategy=probes_strategy)
        all_donors.append(tidy_donor)

    all_donors_long = pd.concat(all_donors)

    structure_genes_exp_matrix = pd.pivot_table(all_donors_long,
                                                values='value',
                                                index='gene_symbol',
                                                columns='brain_area')
    return structure_genes_exp_matrix


def get_dataset(dataset, probes_strategy, selected_donor_ids=['10021', '9861', '14380', '15697', '15496', '12876']):
    # to reduce code duplication between get_HBA_dataset and
    #  get_fetal_HBA_dataset
    strats = ['reannotator', 'qc_filter', 'qc_scale', 'default']
    datasets = ['adult', 'fetal']
    assert probes_strategy in strats
    assert dataset in datasets

    donor_str = '-'.join(selected_donor_ids)
    filename = '{0}_brainarea_vs_genes_exp_{1}_donors_{2}.tsv'.format(dataset,
                                                                      probes_strategy, donor_str)
    HBA_data_out_path = os.path.join('data', 'processed_HBA', filename)

    if os.path.exists(HBA_data_out_path):
        print(('Processed HBA brain dataset found locally. ' +
               'Loading from {}'.format(HBA_data_out_path)))
        structure_genes_exp_matrix = pd.read_csv(HBA_data_out_path,
                                                 index_col='gene_symbol',
                                                 sep='\t')

    else:
        os.makedirs('./data/processed_HBA', exist_ok=True)
        structure_genes_exp_matrix = generate_HBA_dataset(dataset,
                                                          probes_strategy,
                                                          selected_donor_ids=selected_donor_ids)
        print('-- Writing data to ' + HBA_data_out_path + ' -- ')
        structure_genes_exp_matrix.to_csv(HBA_data_out_path, sep='\t')
    return structure_genes_exp_matrix


def generate_aggregate_data(probes_strategy='default', 
                             donor_ids=['9861','10021', '12876', '14380', '15496', '15697'], 
                             min_donors=1):
    """
    """
    data_path = adult_data_path
    hba_donor_folders = [donor_dir for donor_dir in data_path.iterdir() if (donor_dir.is_dir() & ('microarray' in donor_dir.stem))]
    
    
    selected_donor_paths = []
    for path in hba_donor_folders:
        if any(donor_id in path.stem for donor_id in donor_ids):
            selected_donor_paths.append(path)
    
    all_donors = []
    for i, donor_folder in enumerate(selected_donor_paths):
        print(f'Processing donor #{i+1}')
        print(f'Donor directory: {donor_folder}')
        donor_id = donor_folder.stem
        print(f'Donor ID: {donor_id}')
        donor_files = list(donor_folder.glob('*')) 
        
        expression, samples, probes = get_donor_data(donor_files, probes_strategy=probes_strategy)
        donor_gene_exp = get_exp_by_genes(expression, probes)
        samples.structure_name = samples.structure_name.apply(strip_left_right)
        # this takes mean of gene expression from different brain structures
        # an alternative way to try doing it is by concatenating all samples (from all donors) 
        # and average the final values
        #exp_by_brain_area = merge_samples_w_exp(donor_gene_exp, samples)
        exp_by_brain_area = get_mean_expression_by_brain_area(donor_gene_exp, samples)
        #exp_by_brain_area['donor_id'] = donor_id
        donor_expression  = exp_by_brain_area.T
        donor_expression['donor_id'] = donor_id
        
        all_donors.append(donor_expression)
        
    concat_data = pd.concat(all_donors)
    n_donors_per_structure = concat_data.groupby('structure_name').donor_id.nunique().reset_index()
    selected_structures = n_donors_per_structure[n_donors_per_structure.donor_id >= min_donors].structure_name
    # the index of concatenated data is the sampled structure_name
    selected_samples = concat_data[concat_data.index.isin(selected_structures)]
    final_matrix = selected_samples.groupby(['donor_id', 'structure_name']).mean().groupby('structure_name').mean()
    
    return final_matrix.T

In [5]:
from scipy.stats import mannwhitneyu
import statsmodels.sandbox.stats.multicomp
from sklearn import metrics


def calc_pvalues(exp_series, gene_list):
    X = exp_series[exp_series.index.isin(gene_list)]
    y = exp_series[~exp_series.index.isin(gene_list)]
    assert(X.shape[0] + y.shape[0] == exp_series.shape[0])

    return mannwhitneyu(X, y, alternative='two-sided')[1]


def calc_AUC(exp_series, gene_list):
    # simplified function that calcs AUC for a single brain area series
    y_true = exp_series.index.isin(gene_list)
    y_score = exp_series.values

    return metrics.roc_auc_score(y_true, y_score)


def generate_stats_table(exp_df, gene_list, verbose=True):
    """
    Creates a table of summary stats for each brain area

    Parameters
    ----------
    exp_df : dataframe
        expression matrix: rows->genes ; columns->brain_areas
    gene_list : series
        list of gene symbols of interest
    Returns
    -------
    table
        results dataframe with brain areas as index

    """
    count = len(gene_list)
    n_genes_in_matrix = gene_list.isin(exp_df.index).sum()
    genes_not_found = gene_list[~gene_list.isin(exp_df.index)].values

    if verbose:
        print('You submitted a gene list with {} genes.\n\
    {} of those genes are present in the reference dataset.\n\
    Genes not found in our reference data: {}'.format(
            count, n_genes_in_matrix, genes_not_found))

    pvalues = exp_df.apply(lambda col: calc_pvalues(
        exp_series=col, gene_list=gene_list))
    fdr_corrected = statsmodels.sandbox.stats.multicomp.multipletests(
        pvalues, method="fdr_bh")[1]
    fdr_corrected = pd.Series(fdr_corrected, index=pvalues.index)
    auc = exp_df.apply(lambda col: calc_AUC(
        exp_series=col, gene_list=gene_list))

    table = pd.concat([auc, pvalues, fdr_corrected],
                      keys=['AUROC', 'p', 'pFDR', ], axis=1)
    table.set_index(exp_df.columns, inplace=True)
    return table.sort_values('AUROC', ascending=False)

### Process all 6 donors at once

In [6]:
all_donors = get_dataset(dataset='adult', probes_strategy='qc_filter')

Processed HBA brain dataset found locally. Loading from data/processed_HBA/adult_brainarea_vs_genes_exp_qc_filter_donors_10021-9861-14380-15697-15496-12876.tsv


In [7]:
srp_genes = pd.read_csv('./data/gene_lists/GO.SRPdependentTranslationalProteinTargetingMembrane.txt', header=None, names=['gene_symbol'])
srp_genes = srp_genes.loc[:, 'gene_symbol']

In [8]:
all_donors_results = generate_stats_table(exp_df=all_donors, gene_list=srp_genes)

You submitted a gene list with 87 genes.
    87 of those genes are present in the reference dataset.
    Genes not found in our reference data: []


In [9]:
all_donors_results.head(10)

Unnamed: 0,AUROC,p,pFDR
substantia innominata,0.840875,4.66823e-28,2.1660589999999998e-26
nucleus accumbens,0.787368,2.0808679999999998e-20,3.713549e-19
septal nuclei,0.78446,4.981247e-20,8.034374999999999e-19
central gray of the pons,0.777503,3.881691e-19,5.628452e-18
medial parabrachial nucleus,0.77398,1.077122e-18,1.469955e-17
lateral parabrachial nucleus,0.773447,1.255559e-18,1.618276e-17
"substantia nigra, pars reticulata",0.769991,3.369243e-18,3.9083220000000004e-17
"globus pallidus, internal segment",0.765021,1.363611e-17,1.375469e-16
"globus pallidus, external segment",0.764426,1.6095330000000002e-17,1.555882e-16
paraterminal gyrus,0.760793,4.391694e-17,3.918743e-16


In [10]:
if not os.path.exists('./results/microarray/'):
        os.makedirs('./results/microarray/')
all_donors_results.to_csv('./results/microarray/srp_six_brains_regional_enrichment.csv')

In [11]:
four_donors = get_dataset(dataset='adult', probes_strategy='qc_filter', selected_donor_ids=['14380', '12876', '15496', '15697'])

Processed HBA brain dataset found locally. Loading from data/processed_HBA/adult_brainarea_vs_genes_exp_qc_filter_donors_14380-12876-15496-15697.tsv


In [12]:
four_donors_results = generate_stats_table(exp_df=four_donors, gene_list=srp_genes)

You submitted a gene list with 87 genes.
    87 of those genes are present in the reference dataset.
    Genes not found in our reference data: []


In [13]:
four_donors_results.to_csv('./results/microarray/srp_four_brains_regional_enrichment.csv')
four_donors_results.head(10)

Unnamed: 0,AUROC,p,pFDR
medial parabrachial nucleus,0.766002,1.0370700000000001e-17,1.273292e-16
subcallosal cingulate gyrus,0.759053,7.068262e-17,7.81043e-16
VI,0.753385,3.260264e-16,3.275084e-15
head of caudate nucleus,0.752497,4.130169e-16,3.968553e-15
nucleus accumbens,0.750515,6.983407e-16,6.430554e-15
septal nuclei,0.747778,1.432149e-15,1.26602e-14
"substantia nigra, pars reticulata",0.745536,2.565192e-15,2.180413e-14
"IV, lateral hemisphere",0.745119,2.857143e-15,2.338624e-14
"globus pallidus, external segment",0.7435,4.335573e-15,3.304006e-14
"globus pallidus, internal segment",0.738749,1.451444e-14,9.507221e-14


### Process each donor separately

In [14]:
donor_ids = ['10021', '9861', '14380', '15697', '15496', '12876']

In [15]:
donor_9861 = get_dataset(dataset='adult', probes_strategy='qc_filter', selected_donor_ids=['9861'])
donor_10021 = get_dataset(dataset='adult', probes_strategy='qc_filter', selected_donor_ids=['10021'])
donor_12876 = get_dataset(dataset='adult', probes_strategy='qc_filter', selected_donor_ids=['12876'])
donor_14380 = get_dataset(dataset='adult', probes_strategy='qc_filter', selected_donor_ids=['14380'])
donor_15496 = get_dataset(dataset='adult', probes_strategy='qc_filter', selected_donor_ids=['15496'])
donor_15697 = get_dataset(dataset='adult', probes_strategy='qc_filter', selected_donor_ids=['15697'])

Processed HBA brain dataset found locally. Loading from data/processed_HBA/adult_brainarea_vs_genes_exp_qc_filter_donors_9861.tsv
Processed HBA brain dataset found locally. Loading from data/processed_HBA/adult_brainarea_vs_genes_exp_qc_filter_donors_10021.tsv
Processed HBA brain dataset found locally. Loading from data/processed_HBA/adult_brainarea_vs_genes_exp_qc_filter_donors_12876.tsv
Processed HBA brain dataset found locally. Loading from data/processed_HBA/adult_brainarea_vs_genes_exp_qc_filter_donors_14380.tsv
Processed HBA brain dataset found locally. Loading from data/processed_HBA/adult_brainarea_vs_genes_exp_qc_filter_donors_15496.tsv
Processed HBA brain dataset found locally. Loading from data/processed_HBA/adult_brainarea_vs_genes_exp_qc_filter_donors_15697.tsv


In [16]:
CHAT_9861 = generate_aggregate_data(probes_strategy='qc_filter', donor_ids=['9861']).loc['CHAT']
CHAT_10021 = generate_aggregate_data(probes_strategy='qc_filter', donor_ids=['10021']).loc['CHAT']
CHAT_12876 = generate_aggregate_data(probes_strategy='qc_filter', donor_ids=['12876']).loc['CHAT']
CHAT_14380 = generate_aggregate_data(probes_strategy='qc_filter', donor_ids=['14380']).loc['CHAT']
CHAT_15496 = generate_aggregate_data(probes_strategy='qc_filter', donor_ids=['15496']).loc['CHAT']
CHAT_15697 = generate_aggregate_data(probes_strategy='qc_filter', donor_ids=['15697']).loc['CHAT']

Processing donor #1
Donor directory: data/raw/allen_HBA/normalized_microarray_donor9861
Donor ID: normalized_microarray_donor9861
After getting probes_df which merged qc data, shape is (31452, 8)
Processing donor #1
Donor directory: data/raw/allen_HBA/normalized_microarray_donor10021
Donor ID: normalized_microarray_donor10021
After getting probes_df which merged qc data, shape is (31452, 8)
Processing donor #1
Donor directory: data/raw/allen_HBA/normalized_microarray_donor12876
Donor ID: normalized_microarray_donor12876
After getting probes_df which merged qc data, shape is (31452, 8)
Processing donor #1
Donor directory: data/raw/allen_HBA/normalized_microarray_donor14380
Donor ID: normalized_microarray_donor14380
After getting probes_df which merged qc data, shape is (31452, 8)
Processing donor #1
Donor directory: data/raw/allen_HBA/normalized_microarray_donor15496
Donor ID: normalized_microarray_donor15496
After getting probes_df which merged qc data, shape is (31452, 8)
Processing d

In [17]:
individual_donors = [donor_9861, donor_10021, donor_12876, donor_14380, donor_15496, donor_15697]
CHAT_donors = [CHAT_9861, CHAT_10021, CHAT_12876, CHAT_14380, CHAT_15496, CHAT_15697]
donor_ids = ['9861','10021', '12876', '14380', '15496', '15697']

In [18]:
srp_results = {}
chat_dfs = []

for donor_id, donor, chat_exp in zip(donor_ids, individual_donors, CHAT_donors):
    srp_results[donor_id] = generate_stats_table(exp_df=donor, gene_list=srp_genes)
    srp_results[donor_id]['rank'] = srp_results[donor_id].AUROC.rank(ascending=False)
    
    chat_exp = chat_exp.reset_index()
    chat_exp['donor_id'] = donor_id
    chat_dfs.append(chat_exp)
    
srp_table = pd.concat(srp_results)
chat_table = pd.concat(chat_dfs)

srp_table = srp_table.rename_axis(['donor', 'brain_structure']).reset_index()
srp_table = srp_table.rename(columns={'AUROC': 'srp_AUC'}).loc[:, ['brain_structure', 'srp_AUC', 'donor']]
chat_table = chat_table.rename(columns={'structure_name': 'brain_structure', 'donor_id': 'donor'})

You submitted a gene list with 87 genes.
    87 of those genes are present in the reference dataset.
    Genes not found in our reference data: []
You submitted a gene list with 87 genes.
    87 of those genes are present in the reference dataset.
    Genes not found in our reference data: []
You submitted a gene list with 87 genes.
    87 of those genes are present in the reference dataset.
    Genes not found in our reference data: []
You submitted a gene list with 87 genes.
    87 of those genes are present in the reference dataset.
    Genes not found in our reference data: []
You submitted a gene list with 87 genes.
    87 of those genes are present in the reference dataset.
    Genes not found in our reference data: []
You submitted a gene list with 87 genes.
    87 of those genes are present in the reference dataset.
    Genes not found in our reference data: []


### Top 5 ranked brain structures for SRP enrichment for each donor

In [19]:
srp_table.set_index('brain_structure').groupby('donor').apply(lambda x: x.nlargest(5, 'srp_AUC')).drop('donor', axis=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,srp_AUC
donor,brain_structure,Unnamed: 2_level_1
10021,central gray of the pons,0.815393
10021,nucleus accumbens,0.804307
10021,amygdalohippocampal transition zone,0.791947
10021,"globus pallidus, internal segment",0.784864
10021,zona incerta,0.777503
12876,"inferior temporal gyrus, lateral bank of gyrus",0.850401
12876,subcallosal cingulate gyrus,0.843944
12876,"cingulate gyrus, frontal part, inferior bank of gyrus",0.817135
12876,"globus pallidus, external segment",0.791497
12876,"temporal pole, medial aspect",0.761181


### Combine SRP and CHAT enrichment data together and save as output for figure

In [20]:
enrichment_table = chat_table.merge(srp_table, on=['brain_structure', 'donor'])

In [21]:
enrichment_table.to_csv('./data/processed_HBA/srp_chat_enrichment_table.csv',index=None)