## Imports

In [342]:
import pandas as pd
from plotnine import *
pd.set_option('display.max_columns', None)

## Cluster to Cell-type Mapping

In [343]:
clust_to_celltype = {1: 'excitatory_neurons', 2: 'inhibitory_neurons', 3: 'excitatory_neurons',
                     4: 'excitatory_neurons', 5: 'nigral_neurons', 6: 'nigral_neurons', 7: 'unknown_neurons',
                     8: 'opcs', 9: 'opcs', 10: 'opcs', 11: 'inhibitory_neurons',12: 'inhibitory_neurons',
                     13: 'astrocytes', 14: 'astrocytes', 15: 'astrocytes', 16: 'astrocytes', 17: 'astrocytes',
                     18: 'doublets', 19: 'oligodendrocytes', 20: 'oligodendrocytes', 21: 'oligodendrocytes',
                     22: 'oligodendrocytes', 23: 'oligodendrocytes', 24: 'microglia'}

## Define Scores Dictionary Update Function

In [333]:
def update_dict(snps_dict, snp_row, cluster, celltype, version):
    if version == 'new':
        update_fields = ['effect', 'noneffect', 'explain_pval', 'ism_pval', 'delta_pval',
                         'magnitude_pval', 'prominence_pval', 'confidence']
    elif version == 'old':
        update_fields = ['effect', 'noneffect', 'explain_pval', 'ism_pval', 'delta_pval',
                         'confidence']
    for i in update_fields:
        snps_dict[snp_row['rsid']][i] = snp_row[i]
    snps_dict[snp_row['rsid']]['best_cluster'] = cluster
    snps_dict[snp_row['rsid']]['best_celltype'] = celltype
    if snp_row['confidence'] > -1 and snp_row['effect'] != 'NA' and snp_row['noneffect'] != 'NA':
        if cluster not in snps_dict[snp_row['rsid']]['sig_clusters']:
            snps_dict[snp_row['rsid']]['sig_clusters'].append(cluster)
        if celltype not in snps_dict[snp_row['rsid']]['sig_celltypes']:
            snps_dict[snp_row['rsid']]['sig_celltypes'].append(celltype)

## Define Populate Scores Dictionary Function

In [334]:
def populate_dict(basedir, snps_dict, version):
    for cluster in range(1,25):
        celltype = clust_to_celltype[cluster]
        if version == 'new':
            all_scores = pd.read_csv(basedir + 'Cluster' + str(cluster) + '.' + celltype + '.updated.snp_scores.tsv', sep='\t', keep_default_na=False)
            all_scores = all_scores[['chr', 'start', 'end', 'rsid',
                                     'effect', 'noneffect', 'ref', 'alt', 
                                     'major', 'minor', 'direction',
                                     'locus_num', 'gwas', 'gwas_pval', 'coloc',
                                     'explain_pval', 'ism_pval', 'delta_pval',
                                     'magnitude_pval', 'prominence_pval', 'confidence']]
            key_pval = 'prominence_pval'
        elif version == 'old':
            all_scores = pd.read_csv(basedir + 'Cluster' + str(cluster) + '.' + celltype + '.snps.csv', sep='\t', keep_default_na=False)
            all_scores = all_scores[['chr', 'start', 'end', 'rsid',
                                 'effect', 'noneffect', 'direction',
                                 'locus_num', 'gwas', 'gwas_pval', 'coloc',
                                 'explain_pval', 'ism_pval', 'delta_pval','confidence']]
            key_pval = 'explain_pval'
        for index, row in all_scores.iterrows():
            if row['rsid'] in snps_dict:
                if cluster not in snps_dict[row['rsid']]['scored_clusters']:
                    snps_dict[row['rsid']]['scored_clusters'].append(cluster)
                if celltype not in snps_dict[row['rsid']]['scored_celltypes']:
                    snps_dict[row['rsid']]['scored_celltypes'].append(celltype)
                if snps_dict[row['rsid']]['effect'] == 'NA' or snps_dict[row['rsid']]['noneffect'] == 'NA':
                    if row['effect'] != 'NA' and row['noneffect'] != 'NA':
                        update_dict(snps_dict, row, cluster, celltype, version)
                    elif row['confidence'] > snps_dict[row['rsid']]['confidence']:
                        update_dict(snps_dict, row, cluster, celltype, version)
                    elif row['confidence'] == snps_dict[row['rsid']]['confidence'] and row[key_pval] < snps_dict[row['rsid']][key_pval]:
                        update_dict(snps_dict, row, cluster, celltype, version)
                elif row['effect'] != 'NA' and row['noneffect'] != 'NA':
                    if row['confidence'] > snps_dict[row['rsid']]['confidence']:
                        update_dict(snps_dict, row, cluster, celltype, version)
                    elif row['confidence'] == snps_dict[row['rsid']]['confidence'] and row[key_pval] < snps_dict[row['rsid']][key_pval]:
                        update_dict(snps_dict, row, cluster, celltype, version)
                    elif row['confidence'] > -1:
                        if cluster not in snps_dict[row['rsid']]['sig_clusters']:
                            snps_dict[row['rsid']]['sig_clusters'].append(cluster)
                        if celltype not in snps_dict[row['rsid']]['sig_celltypes']:
                            snps_dict[row['rsid']]['sig_celltypes'].append(celltype)
            else:
                snps_dict[row['rsid']] = {all_scores.columns[i]:row[all_scores.columns[i]] for i in range(len(all_scores.columns))}
                snps_dict[row['rsid']]['best_cluster'] = ''
                snps_dict[row['rsid']]['best_celltype'] = ''
                snps_dict[row['rsid']]['sig_clusters'] = []
                snps_dict[row['rsid']]['sig_celltypes'] = []
                snps_dict[row['rsid']]['scored_clusters'] = [cluster]
                snps_dict[row['rsid']]['scored_celltypes'] = [celltype]
                update_dict(snps_dict, row, cluster, celltype, version)

## Make DataFrame from Updated Scores

In [335]:
def make_df(basedir, version):
    snps_dict = {}
    if version == 'new':
        populate_dict(basedir, snps_dict, 'new')
    elif version == 'old':
        populate_dict(basedir, snps_dict, 'old')
    pandas_dict = {i:[] for i in snps_dict[list(snps_dict.keys())[0]]}
    for snp in snps_dict:
        for key in snps_dict[snp]:
            pandas_dict[key].append(snps_dict[snp][key])
    merged_scores = pd.DataFrame.from_dict(pandas_dict)
    if version == 'new':
        merged_scores.sort_values(by='prominence_pval', inplace=True)
    elif version == 'old':
        merged_scores.sort_values(by='explain_pval', inplace=True)
    print('All Scored SNPs:', merged_scores.shape[0])
    print()
    display(merged_scores.head())
    print()
    sig_snps = merged_scores.loc[merged_scores['confidence'] > -1].copy()
    print('Significant SNPs:', sig_snps.shape[0])
    print()
    display(sig_snps.head())
    print()
    high_conf_snps = merged_scores.loc[merged_scores['confidence'] == 2].copy()
    print('High Confidence SNPs:', high_conf_snps.shape[0])
    print()
    display(high_conf_snps.head())
    print()
    medium_conf_snps = merged_scores.loc[merged_scores['confidence'] == 1].copy()
    print('Medium Confidence SNPs:', medium_conf_snps.shape[0])
    print()
    display(medium_conf_snps.head())
    print()
    low_conf_snps = merged_scores.loc[merged_scores['confidence'] == 0].copy()
    print('Low Confidence SNPs:', low_conf_snps.shape[0])
    print()
    display(low_conf_snps.head())
    print()
    return merged_scores, sig_snps, high_conf_snps, medium_conf_snps, low_conf_snps

## Get Updated Scores for SNPs

In [336]:
new_scores, new_sig_snps, new_high_conf_snps, \
new_medium_conf_snps, new_low_conf_snps = make_df('/oak/stanford/groups/akundaje/projects/alzheimers_parkinsons/updated_snp_scores/', 'new')

All Scored SNPs: 1677



Unnamed: 0,chr,start,end,rsid,effect,noneffect,ref,alt,major,minor,direction,locus_num,gwas,gwas_pval,coloc,explain_pval,ism_pval,delta_pval,magnitude_pval,prominence_pval,confidence,best_cluster,best_celltype,sig_clusters,sig_celltypes,scored_clusters,scored_celltypes
1147,chr15,64453059,64453060,rs143560707,G,C,C,G,C,G,+,43,Alzheimers_Lambert_2013,3.03e-06,True,0.000279,0.000209,0.000212,0.001749,0.000266,2,22,oligodendrocytes,"[14, 15, 22]","[astrocytes, oligodendrocytes]","[14, 15, 22]","[astrocytes, oligodendrocytes]"
1672,chr8,16837908,16837909,rs1717289,T,C,C,T,C,T,-,124,Chang_23andMe_Parkinsons,0.000222,False,0.001125,0.000902,0.00092,0.003577,0.000772,2,24,microglia,[24],[microglia],[24],[microglia]
54,chr11,86103987,86103988,rs1237999,G,A,G,A,A,G,+,16,Alzheimers_Jansen_2018,7.14e-16,False,0.000174,0.000128,0.000133,0.0,0.001521,2,1,excitatory_neurons,"[1, 3, 7, 18, 19, 20, 21, 22, 23]","[excitatory_neurons, unknown_neurons, doublets...","[1, 3, 7, 13, 18, 19, 20, 21, 22, 23]","[excitatory_neurons, unknown_neurons, astrocyt..."
38,chr11,60251676,60251677,rs636317,C,T,C,T,T,C,-,11,Alzheimers_Kunkle_2019,5.91e-15,False,3.2e-05,2.5e-05,2.4e-05,0.0,0.001546,2,20,oligodendrocytes,"[1, 4, 19, 20, 24]","[excitatory_neurons, oligodendrocytes, microglia]","[1, 4, 19, 20, 24]","[excitatory_neurons, oligodendrocytes, microglia]"
208,chr17,45896863,45896864,rs62056782,T,C,C,T,C,T,-,54,23andme_PD_hg38,3.94e-22,True,0.000958,0.000744,0.000757,0.000309,0.001546,2,20,oligodendrocytes,"[1, 2, 3, 4, 9, 11, 20]","[excitatory_neurons, inhibitory_neurons, opcs,...","[1, 2, 3, 4, 9, 11, 20]","[excitatory_neurons, inhibitory_neurons, opcs,..."



Significant SNPs: 128



Unnamed: 0,chr,start,end,rsid,effect,noneffect,ref,alt,major,minor,direction,locus_num,gwas,gwas_pval,coloc,explain_pval,ism_pval,delta_pval,magnitude_pval,prominence_pval,confidence,best_cluster,best_celltype,sig_clusters,sig_celltypes,scored_clusters,scored_celltypes
1147,chr15,64453059,64453060,rs143560707,G,C,C,G,C,G,+,43,Alzheimers_Lambert_2013,3.03e-06,True,0.000279,0.000209,0.000212,0.001749,0.000266,2,22,oligodendrocytes,"[14, 15, 22]","[astrocytes, oligodendrocytes]","[14, 15, 22]","[astrocytes, oligodendrocytes]"
1672,chr8,16837908,16837909,rs1717289,T,C,C,T,C,T,-,124,Chang_23andMe_Parkinsons,0.000222,False,0.001125,0.000902,0.00092,0.003577,0.000772,2,24,microglia,[24],[microglia],[24],[microglia]
54,chr11,86103987,86103988,rs1237999,G,A,G,A,A,G,+,16,Alzheimers_Jansen_2018,7.14e-16,False,0.000174,0.000128,0.000133,0.0,0.001521,2,1,excitatory_neurons,"[1, 3, 7, 18, 19, 20, 21, 22, 23]","[excitatory_neurons, unknown_neurons, doublets...","[1, 3, 7, 13, 18, 19, 20, 21, 22, 23]","[excitatory_neurons, unknown_neurons, astrocyt..."
38,chr11,60251676,60251677,rs636317,C,T,C,T,T,C,-,11,Alzheimers_Kunkle_2019,5.91e-15,False,3.2e-05,2.5e-05,2.4e-05,0.0,0.001546,2,20,oligodendrocytes,"[1, 4, 19, 20, 24]","[excitatory_neurons, oligodendrocytes, microglia]","[1, 4, 19, 20, 24]","[excitatory_neurons, oligodendrocytes, microglia]"
208,chr17,45896863,45896864,rs62056782,T,C,C,T,C,T,-,54,23andme_PD_hg38,3.94e-22,True,0.000958,0.000744,0.000757,0.000309,0.001546,2,20,oligodendrocytes,"[1, 2, 3, 4, 9, 11, 20]","[excitatory_neurons, inhibitory_neurons, opcs,...","[1, 2, 3, 4, 9, 11, 20]","[excitatory_neurons, inhibitory_neurons, opcs,..."



High Confidence SNPs: 36



Unnamed: 0,chr,start,end,rsid,effect,noneffect,ref,alt,major,minor,direction,locus_num,gwas,gwas_pval,coloc,explain_pval,ism_pval,delta_pval,magnitude_pval,prominence_pval,confidence,best_cluster,best_celltype,sig_clusters,sig_celltypes,scored_clusters,scored_celltypes
1147,chr15,64453059,64453060,rs143560707,G,C,C,G,C,G,+,43,Alzheimers_Lambert_2013,3.03e-06,True,0.000279,0.000209,0.000212,0.001749,0.000266,2,22,oligodendrocytes,"[14, 15, 22]","[astrocytes, oligodendrocytes]","[14, 15, 22]","[astrocytes, oligodendrocytes]"
1672,chr8,16837908,16837909,rs1717289,T,C,C,T,C,T,-,124,Chang_23andMe_Parkinsons,0.000222,False,0.001125,0.000902,0.00092,0.003577,0.000772,2,24,microglia,[24],[microglia],[24],[microglia]
54,chr11,86103987,86103988,rs1237999,G,A,G,A,A,G,+,16,Alzheimers_Jansen_2018,7.14e-16,False,0.000174,0.000128,0.000133,0.0,0.001521,2,1,excitatory_neurons,"[1, 3, 7, 18, 19, 20, 21, 22, 23]","[excitatory_neurons, unknown_neurons, doublets...","[1, 3, 7, 13, 18, 19, 20, 21, 22, 23]","[excitatory_neurons, unknown_neurons, astrocyt..."
38,chr11,60251676,60251677,rs636317,C,T,C,T,T,C,-,11,Alzheimers_Kunkle_2019,5.91e-15,False,3.2e-05,2.5e-05,2.4e-05,0.0,0.001546,2,20,oligodendrocytes,"[1, 4, 19, 20, 24]","[excitatory_neurons, oligodendrocytes, microglia]","[1, 4, 19, 20, 24]","[excitatory_neurons, oligodendrocytes, microglia]"
208,chr17,45896863,45896864,rs62056782,T,C,C,T,C,T,-,54,23andme_PD_hg38,3.94e-22,True,0.000958,0.000744,0.000757,0.000309,0.001546,2,20,oligodendrocytes,"[1, 2, 3, 4, 9, 11, 20]","[excitatory_neurons, inhibitory_neurons, opcs,...","[1, 2, 3, 4, 9, 11, 20]","[excitatory_neurons, inhibitory_neurons, opcs,..."



Medium Confidence SNPs: 40



Unnamed: 0,chr,start,end,rsid,effect,noneffect,ref,alt,major,minor,direction,locus_num,gwas,gwas_pval,coloc,explain_pval,ism_pval,delta_pval,magnitude_pval,prominence_pval,confidence,best_cluster,best_celltype,sig_clusters,sig_celltypes,scored_clusters,scored_celltypes
405,chr2,134719288,134719289,rs62171396,T,C,C,T,C,T,-,78,Nalls_23andMe,0.0103987,False,0.027072,0.019955,0.019626,0.011934,0.051207,1,1,excitatory_neurons,"[1, 4]",[excitatory_neurons],"[1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 24]","[excitatory_neurons, inhibitory_neurons, nigra..."
458,chr4,960458,960459,rs3733345,T,G,G,"A,T",T,"G,A",-,102,23andme_PD_hg38,3.3500000000000003e-10,True,0.025915,0.020699,0.020442,0.027467,0.052356,1,18,doublets,[18],[doublets],"[1, 4, 18, 21]","[excitatory_neurons, doublets, oligodendrocytes]"
357,chr19,45039212,45039213,rs34034621,T,C,C,T,C,T,+,71,Alzheimers_Jansen_2018,4.190000000000001e-08,True,0.024023,0.018148,0.01771,0.020225,0.052697,1,5,nigral_neurons,"[5, 6]",[nigral_neurons],"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...","[excitatory_neurons, inhibitory_neurons, nigra..."
878,chr17,45947185,45947186,rs62062784,G,C,C,G,C,G,-,54,23andme_PD_hg38,3.33e-22,True,0.009359,0.007227,0.006943,0.013671,0.052754,1,9,opcs,[9],[opcs],"[7, 9, 10, 13, 14, 15, 16, 17, 18, 19, 20, 21,...","[unknown_neurons, opcs, astrocytes, doublets, ..."
210,chr17,45897109,45897110,rs80346216,T,G,G,T,G,T,-,54,23andme_PD_hg38,3.94e-22,True,0.019177,0.015755,0.0165,0.007281,0.054593,1,21,oligodendrocytes,"[1, 12, 20, 21]","[excitatory_neurons, inhibitory_neurons, oligo...","[1, 2, 3, 4, 7, 9, 11, 12, 20, 21]","[excitatory_neurons, inhibitory_neurons, unkno..."



Low Confidence SNPs: 52



Unnamed: 0,chr,start,end,rsid,effect,noneffect,ref,alt,major,minor,direction,locus_num,gwas,gwas_pval,coloc,explain_pval,ism_pval,delta_pval,magnitude_pval,prominence_pval,confidence,best_cluster,best_celltype,sig_clusters,sig_celltypes,scored_clusters,scored_celltypes
1210,chr8,11845330,11845331,rs1736081,T,G,T,G,T,G,+,122,Chang_23andMe_Parkinsons,0.000111,False,0.028035,0.023248,0.023554,0.125724,0.106447,0,16,astrocytes,"[14, 15, 16]",[astrocytes],"[14, 15, 16, 20, 21]","[astrocytes, oligodendrocytes]"
145,chr17,45703883,45703884,rs968027,T,C,C,T,C,T,-,54,23andme_PD_hg38,2.07e-22,True,0.006964,0.00556,0.005559,0.085626,0.109651,0,2,inhibitory_neurons,[2],[inhibitory_neurons],"[1, 2]","[excitatory_neurons, inhibitory_neurons]"
242,chr17,45966011,45966012,rs62063291,T,C,T,C,T,C,+,54,23andme_PD_hg38,2.3000000000000003e-22,True,0.007955,0.006149,0.006232,0.166039,0.109662,0,4,excitatory_neurons,"[1, 3, 4, 7, 12]","[excitatory_neurons, unknown_neurons, inhibito...","[1, 3, 4, 7, 11, 12]","[excitatory_neurons, unknown_neurons, inhibito..."
1392,chr17,45825432,45825433,rs3885075,G,A,A,"C,G",A,"C,G",-,54,23andme_PD_hg38,1.3400000000000002e-22,True,0.044006,0.034176,0.034578,0.094839,0.114501,0,21,oligodendrocytes,[21],[oligodendrocytes],"[20, 21, 22]",[oligodendrocytes]
297,chr17,75032747,75032748,rs7218004,A,G,G,A,G,A,+,61,Alzheimers_Kunkle_2019,8.54e-06,True,0.008568,0.006912,0.007055,0.075209,0.126198,0,22,oligodendrocytes,"[14, 15, 19, 20, 21, 22, 23]","[astrocytes, oligodendrocytes]","[1, 2, 3, 11, 12, 14, 15, 19, 20, 21, 22, 23, 24]","[excitatory_neurons, inhibitory_neurons, astro..."





## Get Old Scores for SNPs

In [337]:
old_scores, old_sig_snps, old_high_conf_snps, \
old_medium_conf_snps, old_low_conf_snps = make_df('/mnt/lab_data3/soumyak/adpd/sig_snps/', 'old')

All Scored SNPs: 123



Unnamed: 0,chr,start,end,rsid,effect,noneffect,direction,locus_num,gwas,gwas_pval,coloc,explain_pval,ism_pval,delta_pval,confidence,best_cluster,best_celltype,sig_clusters,sig_celltypes,scored_clusters,scored_celltypes
19,chr12,33530906,33530907,rs935534,G,A,-,23,23andme_PD_hg38,1.46067e-07,False,0.001517,0.003705,0.004138,2,2,inhibitory_neurons,[2],[inhibitory_neurons],[2],[inhibitory_neurons]
40,chr2,127128581,127128582,rs13025717,T,C,+,77,Alzheimers_Jansen_2018,9.125833000000001e-17,False,0.002794,0.003049,0.003243,2,8,opcs,"[6, 8, 14, 18, 19, 20, 21, 24]","[nigral_neurons, opcs, astrocytes, doublets, o...","[6, 8, 14, 18, 19, 20, 21, 24]","[nigral_neurons, opcs, astrocytes, doublets, o..."
39,chr19,45039212,45039213,rs34034621,T,C,+,71,Alzheimers_Jansen_2018,4.186092e-08,True,0.003949,0.024156,0.025927,2,5,nigral_neurons,"[5, 6]",[nigral_neurons],"[5, 6]",[nigral_neurons]
0,chr11,86103987,86103988,rs1237999,A,G,+,16,Alzheimers_Jansen_2018,7.143148e-16,False,0.004595,0.000424,0.000511,2,7,unknown_neurons,"[1, 3, 7, 13, 18, 19, 20, 21, 22, 23]","[excitatory_neurons, unknown_neurons, astrocyt...","[1, 3, 7, 13, 18, 19, 20, 21, 22, 23]","[excitatory_neurons, unknown_neurons, astrocyt..."
114,chr8,16837908,16837909,rs1717289,T,C,-,124,Chang_23andMe_Parkinsons,0.000221605,False,0.004829,0.00174,0.001774,2,24,microglia,[24],[microglia],[24],[microglia]



Significant SNPs: 123



Unnamed: 0,chr,start,end,rsid,effect,noneffect,direction,locus_num,gwas,gwas_pval,coloc,explain_pval,ism_pval,delta_pval,confidence,best_cluster,best_celltype,sig_clusters,sig_celltypes,scored_clusters,scored_celltypes
19,chr12,33530906,33530907,rs935534,G,A,-,23,23andme_PD_hg38,1.46067e-07,False,0.001517,0.003705,0.004138,2,2,inhibitory_neurons,[2],[inhibitory_neurons],[2],[inhibitory_neurons]
40,chr2,127128581,127128582,rs13025717,T,C,+,77,Alzheimers_Jansen_2018,9.125833000000001e-17,False,0.002794,0.003049,0.003243,2,8,opcs,"[6, 8, 14, 18, 19, 20, 21, 24]","[nigral_neurons, opcs, astrocytes, doublets, o...","[6, 8, 14, 18, 19, 20, 21, 24]","[nigral_neurons, opcs, astrocytes, doublets, o..."
39,chr19,45039212,45039213,rs34034621,T,C,+,71,Alzheimers_Jansen_2018,4.186092e-08,True,0.003949,0.024156,0.025927,2,5,nigral_neurons,"[5, 6]",[nigral_neurons],"[5, 6]",[nigral_neurons]
0,chr11,86103987,86103988,rs1237999,A,G,+,16,Alzheimers_Jansen_2018,7.143148e-16,False,0.004595,0.000424,0.000511,2,7,unknown_neurons,"[1, 3, 7, 13, 18, 19, 20, 21, 22, 23]","[excitatory_neurons, unknown_neurons, astrocyt...","[1, 3, 7, 13, 18, 19, 20, 21, 22, 23]","[excitatory_neurons, unknown_neurons, astrocyt..."
114,chr8,16837908,16837909,rs1717289,T,C,-,124,Chang_23andMe_Parkinsons,0.000221605,False,0.004829,0.00174,0.001774,2,24,microglia,[24],[microglia],[24],[microglia]



High Confidence SNPs: 44



Unnamed: 0,chr,start,end,rsid,effect,noneffect,direction,locus_num,gwas,gwas_pval,coloc,explain_pval,ism_pval,delta_pval,confidence,best_cluster,best_celltype,sig_clusters,sig_celltypes,scored_clusters,scored_celltypes
19,chr12,33530906,33530907,rs935534,G,A,-,23,23andme_PD_hg38,1.46067e-07,False,0.001517,0.003705,0.004138,2,2,inhibitory_neurons,[2],[inhibitory_neurons],[2],[inhibitory_neurons]
40,chr2,127128581,127128582,rs13025717,T,C,+,77,Alzheimers_Jansen_2018,9.125833000000001e-17,False,0.002794,0.003049,0.003243,2,8,opcs,"[6, 8, 14, 18, 19, 20, 21, 24]","[nigral_neurons, opcs, astrocytes, doublets, o...","[6, 8, 14, 18, 19, 20, 21, 24]","[nigral_neurons, opcs, astrocytes, doublets, o..."
39,chr19,45039212,45039213,rs34034621,T,C,+,71,Alzheimers_Jansen_2018,4.186092e-08,True,0.003949,0.024156,0.025927,2,5,nigral_neurons,"[5, 6]",[nigral_neurons],"[5, 6]",[nigral_neurons]
0,chr11,86103987,86103988,rs1237999,A,G,+,16,Alzheimers_Jansen_2018,7.143148e-16,False,0.004595,0.000424,0.000511,2,7,unknown_neurons,"[1, 3, 7, 13, 18, 19, 20, 21, 22, 23]","[excitatory_neurons, unknown_neurons, astrocyt...","[1, 3, 7, 13, 18, 19, 20, 21, 22, 23]","[excitatory_neurons, unknown_neurons, astrocyt..."
114,chr8,16837908,16837909,rs1717289,T,C,-,124,Chang_23andMe_Parkinsons,0.000221605,False,0.004829,0.00174,0.001774,2,24,microglia,[24],[microglia],[24],[microglia]



Medium Confidence SNPs: 40



Unnamed: 0,chr,start,end,rsid,effect,noneffect,direction,locus_num,gwas,gwas_pval,coloc,explain_pval,ism_pval,delta_pval,confidence,best_cluster,best_celltype,sig_clusters,sig_celltypes,scored_clusters,scored_celltypes
90,chr4,89837895,89837896,rs2619363,T,G,+,103,23andme_PD_hg38,1.42772e-08,True,0.052379,0.028788,0.026183,1,18,doublets,"[18, 23]","[doublets, oligodendrocytes]","[18, 23]","[doublets, oligodendrocytes]"
74,chr8,11845330,11845331,rs1736081,T,G,+,122,Chang_23andMe_Parkinsons,0.000111475,False,0.053279,0.026795,0.027377,1,16,astrocytes,"[14, 15, 16]",[astrocytes],"[14, 15, 16]",[astrocytes]
56,chr2,95057163,95057164,rs872580,C,A,-,85,Nalls_23andMe,0.0403717,False,0.053327,0.037319,0.046819,1,14,astrocytes,"[9, 10, 14, 15]","[opcs, astrocytes]","[9, 10, 14, 15]","[opcs, astrocytes]"
73,chr17,78426790,78426791,rs72914885,C,A,-,62,Nalls_23andMe,0.0446717,False,0.055781,0.033627,0.036038,1,22,oligodendrocytes,"[14, 18, 19, 20, 21, 22]","[astrocytes, doublets, oligodendrocytes]","[14, 18, 19, 20, 21, 22]","[astrocytes, doublets, oligodendrocytes]"
105,chr17,45783750,45783751,rs62057073,T,C,-,54,23andme_PD_hg38,2.15762e-22,True,0.059129,0.04715,0.046331,1,21,oligodendrocytes,[21],[oligodendrocytes],[21],[oligodendrocytes]



Low Confidence SNPs: 39



Unnamed: 0,chr,start,end,rsid,effect,noneffect,direction,locus_num,gwas,gwas_pval,coloc,explain_pval,ism_pval,delta_pval,confidence,best_cluster,best_celltype,sig_clusters,sig_celltypes,scored_clusters,scored_celltypes
111,chr4,959909,959910,rs4690326,C,A,-,102,23andme_PD_hg38,3.52953e-10,True,0.223639,0.024375,0.027923,0,22,oligodendrocytes,[22],[oligodendrocytes],[22],[oligodendrocytes]
26,chr17,46003041,46003042,17_46003042,G,C,+,54,23andme_PD_hg38,2.17786e-22,True,0.235242,0.037891,0.038912,0,2,inhibitory_neurons,"[2, 21]","[inhibitory_neurons, oligodendrocytes]","[2, 21]","[inhibitory_neurons, oligodendrocytes]"
58,chr3,122416407,122416408,rs55911744,T,G,-,86,Nalls_23andMe,0.000498637,False,0.331754,0.01986,0.01949,0,16,astrocytes,"[9, 13, 15, 16, 19, 23, 24]","[opcs, astrocytes, oligodendrocytes, microglia]","[9, 13, 15, 16, 19, 23, 24]","[opcs, astrocytes, oligodendrocytes, microglia]"
59,chr17,4908319,4908320,rs7214776,C,T,+,55,Alzheimers_Lambert_2013,4.266e-05,True,0.467064,0.011118,0.011455,0,9,opcs,"[9, 14, 15]","[opcs, astrocytes]","[9, 14, 15]","[opcs, astrocytes]"
65,chr17,45775254,45775255,rs62055948,T,C,+,54,23andme_PD_hg38,2.65541e-22,True,0.467814,0.023884,0.026665,0,12,inhibitory_neurons,[12],[inhibitory_neurons],[12],[inhibitory_neurons]





## Find Significant SNPs that were gained or lost

In [338]:
lost_sig_snps = []
gained_sig_snps = []
for ind,rw in old_scores.iterrows():
    found = False
    for index,row in new_scores.iterrows():
        if rw['rsid'] == row['rsid']:
            found = True
            if row['confidence'] == -1:
                lost_sig_snps.append(rw['rsid'])
    assert found == True
for index,row in new_scores.iterrows():
    if row['confidence'] > -1:
        found = False
        for ind,rw in old_scores.iterrows():
            if row['rsid'] == rw['rsid']:
                found = True
        if found == False:
            gained_sig_snps.append(row['rsid'])

print('Lost Significant SNPs: ', lost_sig_snps)
print('Count: ', len(lost_sig_snps))
print('Gained Significant SNPs: ', gained_sig_snps)
print('Count: ', len(gained_sig_snps))

Lost Significant SNPs:  ['rs62057150', 'rs4802235', 'rs755934', 'rs2270425', 'rs181190702', 'rs1532276']
Count:  6
Gained Significant SNPs:  ['rs3772034', 'rs62063842', 'rs893433', 'rs1912151', 'rs34746918', 'rs62060787', 'rs59587437', 'rs12493578', 'rs62054439', 'rs10425982', 'rs4854244']
Count:  11


## Compare SNPs between old and new scoring methods

In [339]:
rsid = 'rs2619363'
print('Old Score:')
print()
display(old_scores.loc[old_scores['rsid'] == rsid])
print()
print('New Score:')
print()
display(new_scores.loc[new_scores['rsid'] == rsid])
print()

Old Score:



Unnamed: 0,chr,start,end,rsid,effect,noneffect,direction,locus_num,gwas,gwas_pval,coloc,explain_pval,ism_pval,delta_pval,confidence,best_cluster,best_celltype,sig_clusters,sig_celltypes,scored_clusters,scored_celltypes
90,chr4,89837895,89837896,rs2619363,T,G,+,103,23andme_PD_hg38,1.42772e-08,True,0.052379,0.028788,0.026183,1,18,doublets,"[18, 23]","[doublets, oligodendrocytes]","[18, 23]","[doublets, oligodendrocytes]"



New Score:



Unnamed: 0,chr,start,end,rsid,effect,noneffect,ref,alt,major,minor,direction,locus_num,gwas,gwas_pval,coloc,explain_pval,ism_pval,delta_pval,magnitude_pval,prominence_pval,confidence,best_cluster,best_celltype,sig_clusters,sig_celltypes,scored_clusters,scored_celltypes
513,chr4,89837895,89837896,rs2619363,T,G,G,"C,T",G,"C,T",.,103,23andme_PD_hg38,1.43e-08,True,0.027418,0.025403,0.021808,0.021244,0.0728,1,18,doublets,"[18, 23]","[doublets, oligodendrocytes]","[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15...","[excitatory_neurons, inhibitory_neurons, nigra..."





## Save merged SNP scores table

In [340]:
new_scores.to_csv('/oak/stanford/groups/akundaje/projects/alzheimers_parkinsons/updated_snp_scores/all.unique.snps.scores.tsv', sep='\t', index=False)
new_sig_snps.to_csv('/oak/stanford/groups/akundaje/projects/alzheimers_parkinsons/updated_sig_snps/all.unique.sig.snps.scores.tsv', sep='\t', index=False)

## Create Cluster Summary Table

In [341]:
summary_dict = {'cluster': ['Cluster'+str(i) for i in range(1,25)],
                'celltype': [clust_to_celltype[i] for i in range(1,25)],
                'scored': [], 'significant': [], 'high_confidence': [],
                'medium_confidence': [], 'low_confidence': []}
                
for cluster in range(1, 25):
    scored = 0
    significant = 0
    high = 0
    medium = 0
    low = 0
    for index,row in new_scores.iterrows():
        if cluster in row['scored_clusters']:
            scored += 1
        if cluster in row['sig_clusters']:
            #print(row['rsid'])
            #print(row['effect'])
            significant += 1
    sig_snps = pd.read_csv('/oak/stanford/groups/akundaje/projects/alzheimers_parkinsons/updated_sig_snps/Cluster'
                           + str(cluster) + '.' + clust_to_celltype[cluster] + '.updated.unique.sig_snps.tsv', sep='\t', keep_default_na=False)
    #print(len(sig_snps))
    #print(significant)
    assert len(sig_snps) == significant
    high = len(sig_snps.loc[sig_snps['confidence'] == 2])
    medium = len(sig_snps.loc[sig_snps['confidence'] == 1])
    low = len(sig_snps.loc[sig_snps['confidence'] == 0])
    summary_dict['scored'].append(scored)
    summary_dict['significant'].append(significant)
    summary_dict['high_confidence'].append(high)
    summary_dict['medium_confidence'].append(medium)
    summary_dict['low_confidence'].append(low)
    
summary_dict['cluster'].append('All')
summary_dict['celltype'].append('All')
summary_dict['scored'].append(len(new_scores))
summary_dict['significant'].append(len(new_sig_snps))
summary_dict['high_confidence'].append(len(new_high_conf_snps))
summary_dict['medium_confidence'].append(len(new_medium_conf_snps))
summary_dict['low_confidence'].append(len(new_low_conf_snps))
    
summary_df = pd.DataFrame.from_dict(summary_dict)
display(summary_df)
print()
print('Shape: ', summary_df.shape)
summary_df.to_csv('/oak/stanford/groups/akundaje/projects/alzheimers_parkinsons/updated_snp_scores/snp_score_summary_by_cluster.tsv', sep='\t', index=False)

Unnamed: 0,cluster,celltype,scored,significant,high_confidence,medium_confidence,low_confidence
0,Cluster1,excitatory_neurons,567,23,8,7,8
1,Cluster2,inhibitory_neurons,456,19,4,7,8
2,Cluster3,excitatory_neurons,356,13,3,4,6
3,Cluster4,excitatory_neurons,391,19,2,9,8
4,Cluster5,nigral_neurons,175,6,0,3,3
5,Cluster6,nigral_neurons,221,13,5,3,5
6,Cluster7,unknown_neurons,339,12,3,7,2
7,Cluster8,opcs,183,5,1,3,1
8,Cluster9,opcs,397,22,5,7,10
9,Cluster10,opcs,209,9,2,3,4



Shape:  (25, 7)
