# Comprehensive comparison between GTEx results and TOPMed results

In [1]:
import pandas as pd
import topmed_manuscript as tm
import numpy as np
import requests
import re
import glob

PREFIX = 'replication-of-gtex-trans-qtl.'
USE_CLUMPED = False # TODO: if use clumped, should also adjust slope

def distance_between_variants(variant_1, variant_2):
    if pd.isnull(variant_1) or pd.isnull(variant_2):
        return np.NaN
    variant_1_chrom, variant_1_pos = variant_1.split('_')[:2]
    variant_2_chrom, variant_2_pos = variant_2.split('_')[:2]
    if variant_1_chrom != variant_2_chrom:
        return np.NaN
    else:
        return abs(int(variant_1_pos) - int(variant_2_pos))
    



# see: https://ldlink.nci.nih.gov/?var=rs10000030&pop=GBR&r2_d=r2&tab=ldproxy
TOKEN = '9ed563e03ece' # TODO: remove and user should provide own. Token for LDLink

def rsID_to_buddies(rsid, population='EUR', token=TOKEN):
    """Queries LDLink to return buddies with R2 >= 0.01 and within 500kb"""
    print(f'Fetching buddies for {rsid}')
    if population == 'EUR':
        population = 'CEU%2BTSI%2BFIN%2BGBR%2BIBS'
    URL = 'https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var={rsid}&pop={population}&genome_build=grch38&r2_d=r2&token={token}'.format(**locals())
    r = requests.get(URL)
    tab = [i.split('\t') for i in r.text.split('\n')]
    header = tab[0]
    body = [i for i in tab[1:] if len(i) > 1]
    df = pd.DataFrame(body, columns=header)
    df['MAF'] = df['MAF'].astype(float)
    df.Distance = df.Distance.astype(int)
    df.Dprime = df.Dprime.astype(float)
    df.R2 = df.R2.astype(float)
    return df


def ldpair(rsid_1, rsid_2, population='EUR', token=TOKEN):
    """Queries LDLink to return buddies with R2 >= 0.01 and within 500kb
    rsIDs can also be chrom:pos"""
    if 'rs' not in rsid_1 and 'chr' not in rsid_1:
        rsid_1 = 'chr' + rsid_1
    if 'rs' not in rsid_2 and 'chr' not in rsid_2:
        rsid_2 = 'chr' + rsid_2
    if population == 'EUR':
        population = 'CEU%2BTSI%2BFIN%2BGBR%2BIBS'
    URL = 'https://ldlink.nci.nih.gov/LDlinkRest/ldpair?var1={rsid_1}&var2={rsid_2}&pop={population}&genome_build=grch38&token={token}'.format(**locals())
    r = requests.get(URL)
    return r.text


def ldpair_to_r2(ldpair_output):
    return float(re.search('R2: (\d.\d+)', ldpair_output).group(1))

  from pandas import (to_datetime, Int64Index, DatetimeIndex, Period,
  from pandas import (to_datetime, Int64Index, DatetimeIndex, Period,


In [2]:
replicated_in_topmed = pd.concat([pd.read_csv(f, sep='\t').assign(tissue=re.search('gtex-(.*?)/', f).group(1)) for f in glob.glob('../work/replicate-trans/gtex-*/results/replicated/results.txt')])
replicated_in_topmed.tissue = replicated_in_topmed.tissue.map({'whole-blood': 'Whole_blood', 'lung': 'Lung'})
replicated_in_topmed

Unnamed: 0,variant_id,gene_id,slope,p,tissue
0,chr9_21413704_C_T,ENSG00000088827,0.704024,3.749309e-70,Lung
1,chr9_106734349_T_C,ENSG00000114113,-0.591215,1.17975e-48,Lung
0,chr2_60498316_G_C,ENSG00000187017,-0.573497,6.785692e-232,Whole_blood
1,chr19_44435185_A_G,ENSG00000133985,0.457685,3.667865e-138,Whole_blood
2,chr8_105750887_G_A,ENSG00000142185,0.034591,0.1799051,Whole_blood
3,chr10_99490397_C_T,ENSG00000102572,-0.4497,1.43962e-40,Whole_blood
4,chr16_57029582_C_T,ENSG00000204642,-0.215734,3.7546609999999996e-19,Whole_blood
5,chr4_68818235_G_C,ENSG00000184428,0.018577,0.5688853,Whole_blood
6,chr13_81448444_A_G,ENSG00000213088,0.068498,0.04119115,Whole_blood
7,chr7_50360284_G_A,ENSG00000187699,-0.193428,8.410644e-28,Whole_blood


## trans-eQTL

In [3]:
GTEX_TRANS_EQTL_TOP = '/net/topmed11/working/porchard/gtex-preprocessing/data/gtex/GTEx_Analysis_v8_trans_eGenes_fdr05.txt'

In [4]:
gtex_trans_eqtl = pd.read_csv(GTEX_TRANS_EQTL_TOP, sep='\t')
gtex_trans_eqtl = gtex_trans_eqtl[gtex_trans_eqtl.tissue_id.isin(['Whole_Blood', 'Lung'])]
gtex_trans_eqtl['tissue'] = gtex_trans_eqtl.tissue_id.str.replace('Blood', 'blood')
gtex_trans_eqtl['gene_id'] = gtex_trans_eqtl.gene_id.str.split('.', expand=True)[0]
gtex_trans_eqtl['variant_id'] = gtex_trans_eqtl.variant_id.str.replace('_b38', '')
gtex_trans_eqtl

Unnamed: 0,tissue_id,gene_id,gene_name,gene_chr,biotype,gene_mappability,variant_id,tissue_af,slope,slope_se,pval_nominal,fdr,tissue
45,Lung,ENSG00000088827,SIGLEC1,chr20,protein_coding,1.0,chr9_21413704_C_T,0.39903,0.27797,0.029672,3.7283999999999997e-19,5.66605e-09,Lung
46,Lung,ENSG00000196569,LAMA2,chr6,protein_coding,1.0,chr1_183096225_T_TA,0.51359,0.19312,0.024741,4.2773e-14,0.000325011,Lung
47,Lung,ENSG00000114113,RBP2,chr3,protein_coding,1.0,chr9_106734349_T_C,0.54757,-0.31058,0.040904,1.8549e-13,0.000939631,Lung
57,Whole_Blood,ENSG00000187017,ESPN,chr1,protein_coding,0.84235,chr2_60498316_G_C,0.4097,-0.31986,0.03747,1.1368e-16,1.5074e-06,Whole_blood
58,Whole_Blood,ENSG00000133985,TTC9,chr14,protein_coding,0.99969,chr19_44435185_A_G,0.42687,0.23137,0.028179,1.3521e-15,8.96442e-06,Whole_blood
59,Whole_Blood,ENSG00000142185,TRPM2,chr21,protein_coding,1.0,chr8_105750887_G_A,0.092537,0.32262,0.042298,9.4441e-14,0.000417429,Whole_blood
60,Whole_Blood,ENSG00000102572,STK24,chr13,protein_coding,0.98115,chr10_99490397_C_T,0.087313,-0.35022,0.046824,2.6651e-13,0.000883481,Whole_blood
61,Whole_Blood,ENSG00000204642,HLA-F,chr6,protein_coding,0.95119,chr16_57029582_C_T,0.19851,-0.31975,0.044537,2.0786e-12,0.00551245,Whole_blood
62,Whole_Blood,ENSG00000184428,TOP1MT,chr8,protein_coding,1.0,chr4_68818235_G_C,0.94627,0.54631,0.077224,4.1984e-12,0.00927846,Whole_blood
63,Whole_Blood,ENSG00000213088,ACKR1,chr1,protein_coding,0.99049,chr13_81448444_A_G,0.053731,0.46846,0.066648,5.6802e-12,0.0107599,Whole_blood


In [5]:
topmed_trans_eqtl = pd.read_csv('../work/clump-trans-variants/clump-trans-signals.significant-trans-eqtl-clumped.tsv', sep='\t')
if USE_CLUMPED:
    topmed_trans_eqtl.variant_id = topmed_trans_eqtl.clumped_variant_id
topmed_trans_eqtl = topmed_trans_eqtl[topmed_trans_eqtl.tissue.isin(['Lung', 'Whole_blood'])]
topmed_trans_eqtl['gene_id'] = topmed_trans_eqtl.phenotype_id.str.split('.', expand=True)[0]
topmed_trans_eqtl.head()

Unnamed: 0,variant_id,phenotype_id,pval,b,b_se,r2,af,gene_mappability,gene_crossmaps_to_gene_near_variant,biotype,...,beta_shape2,true_df,pval_true_df,pval_perm,pval_beta,pval_beta_no_zero,qvalue,tissue,clumped_variant_id,gene_id
0,chr7_50330658_C_T,ENSG00000000938.13,3.5030500000000003e-25,-0.234596,0.022533,0.01669,0.235203,0.999762,False,protein_coding,...,979517.7,6086.276059,4.468785e-24,5e-05,3.457262e-18,3.457262e-18,1.392881e-16,Whole_blood,chr7_50342615_A_G,ENSG00000000938
1,chr1_156302480_C_T,ENSG00000002330.13,1.02777e-13,0.134956,0.018106,0.008625,0.500775,0.999556,False,protein_coding,...,1008630.0,6081.962583,3.926652e-13,5e-05,3.664337e-07,3.664337e-07,5.779733e-06,Whole_blood,chr1_156344836_A_G,ENSG00000002330
2,chr6_144036619_C_A,ENSG00000004059.11,1.98404e-17,0.331091,0.038865,0.011237,0.055934,1.0,False,protein_coding,...,958740.6,6060.399614,1.281332e-16,5e-05,9.188711e-11,9.188711e-11,2.1e-09,Whole_blood,chr6_144036619_C_A,ENSG00000004059
3,chr6_122440739_T_C,ENSG00000004478.8,1.03467e-10,0.126184,0.019495,0.006518,0.706539,0.960425,False,protein_coding,...,1105596.0,6130.610651,2.431723e-10,0.00025,0.0002609707,0.0002609707,0.002773133,Whole_blood,chr6_122440739_T_C,ENSG00000004478
4,chr22_46290431_C_G,ENSG00000004799.8,7.74095e-13,0.207077,0.028839,0.008009,0.10753,1.0,False,protein_coding,...,958740.6,6060.399614,2.939505e-12,5e-05,2.389651e-06,2.389651e-06,3.403684e-05,Whole_blood,chr22_46290431_C_G,ENSG00000004799


In [6]:
trans_eqtl_replication = gtex_trans_eqtl[['tissue', 'gene_id', 'gene_name', 'variant_id', 'slope']].rename(columns={'variant_id': 'gtex_lead_variant', 'slope': 'gtex_slope'})
trans_eqtl_replication = trans_eqtl_replication.merge(topmed_trans_eqtl[['gene_id', 'tissue', 'variant_id', 'b']].rename(columns={'variant_id': 'topmed_lead_variant', 'b': 'topmed_slope'}), how='left')
trans_eqtl_replication['is_topmed_trans_egene'] = ~trans_eqtl_replication.topmed_lead_variant.isnull()
trans_eqtl_replication['same_lead_variant'] = trans_eqtl_replication.topmed_lead_variant == trans_eqtl_replication.gtex_lead_variant
trans_eqtl_replication['linear_distance'] = [distance_between_variants(gtex, topmed) for gtex, topmed in zip(trans_eqtl_replication.gtex_lead_variant, trans_eqtl_replication.topmed_lead_variant)]
trans_eqtl_replication.head()

Unnamed: 0,tissue,gene_id,gene_name,gtex_lead_variant,gtex_slope,topmed_lead_variant,topmed_slope,is_topmed_trans_egene,same_lead_variant,linear_distance
0,Lung,ENSG00000088827,SIGLEC1,chr9_21413704_C_T,0.27797,chr9_21413704_C_T,0.704031,True,True,0.0
1,Lung,ENSG00000196569,LAMA2,chr1_183096225_T_TA,0.19312,,,False,False,
2,Lung,ENSG00000114113,RBP2,chr9_106734349_T_C,-0.31058,chr9_106734349_T_C,-0.591215,True,True,0.0
3,Whole_blood,ENSG00000187017,ESPN,chr2_60498316_G_C,-0.31986,chr2_60498316_G_C,-0.573496,True,True,0.0
4,Whole_blood,ENSG00000133985,TTC9,chr19_44435185_A_G,0.23137,chr19_44428797_C_T,0.586932,True,False,6388.0


In [7]:
r2 = []
for gtex_variant, topmed_variant in zip(trans_eqtl_replication.gtex_lead_variant, trans_eqtl_replication.topmed_lead_variant):
    print(gtex_variant, topmed_variant)
    if pd.isnull(topmed_variant) or gtex_variant.split('_')[0] != topmed_variant.split('_')[0]:
        r2.append(np.NaN)
    else:
        rsid1 = ':'.join(gtex_variant.split('_')[:2])
        rsid2 = ':'.join(topmed_variant.split('_')[:2])
        r2.append(ldpair_to_r2(ldpair(rsid1, rsid2)))
trans_eqtl_replication['r2'] = r2

chr9_21413704_C_T chr9_21413704_C_T
chr1_183096225_T_TA nan
chr9_106734349_T_C chr9_106734349_T_C
chr2_60498316_G_C chr2_60498316_G_C
chr19_44435185_A_G chr19_44428797_C_T
chr8_105750887_G_A chr4_127699285_G_GT
chr10_99490397_C_T nan
chr16_57029582_C_T chr16_57025062_C_T
chr4_68818235_G_C chr5_80208393_C_A
chr13_81448444_A_G chr9_127235634_AGTTTTTTT_A
chr7_50360284_G_A chr7_50360284_G_A
chr3_101477243_C_T chr3_101391301_T_A
chr15_66112666_TAAA_T nan
chr4_60659731_G_A nan
chr13_109907029_C_T nan
chr14_87900975_C_T nan


In [8]:
trans_eqtl_replication.head()

Unnamed: 0,tissue,gene_id,gene_name,gtex_lead_variant,gtex_slope,topmed_lead_variant,topmed_slope,is_topmed_trans_egene,same_lead_variant,linear_distance,r2
0,Lung,ENSG00000088827,SIGLEC1,chr9_21413704_C_T,0.27797,chr9_21413704_C_T,0.704031,True,True,0.0,1.0
1,Lung,ENSG00000196569,LAMA2,chr1_183096225_T_TA,0.19312,,,False,False,,
2,Lung,ENSG00000114113,RBP2,chr9_106734349_T_C,-0.31058,chr9_106734349_T_C,-0.591215,True,True,0.0,1.0
3,Whole_blood,ENSG00000187017,ESPN,chr2_60498316_G_C,-0.31986,chr2_60498316_G_C,-0.573496,True,True,0.0,1.0
4,Whole_blood,ENSG00000133985,TTC9,chr19_44435185_A_G,0.23137,chr19_44428797_C_T,0.586932,True,False,6388.0,0.6269


In [9]:
trans_eqtl_replication = trans_eqtl_replication.merge(replicated_in_topmed.rename(columns={'variant_id': 'gtex_lead_variant', 'slope': 'gtex_variant_in_topmed_slope', 'p': 'gtex_variant_in_topmed_p'}), how='left')

In [10]:
trans_eqtl_replication

Unnamed: 0,tissue,gene_id,gene_name,gtex_lead_variant,gtex_slope,topmed_lead_variant,topmed_slope,is_topmed_trans_egene,same_lead_variant,linear_distance,r2,gtex_variant_in_topmed_slope,gtex_variant_in_topmed_p
0,Lung,ENSG00000088827,SIGLEC1,chr9_21413704_C_T,0.27797,chr9_21413704_C_T,0.704031,True,True,0.0,1.0,0.704024,3.749309e-70
1,Lung,ENSG00000196569,LAMA2,chr1_183096225_T_TA,0.19312,,,False,False,,,,
2,Lung,ENSG00000114113,RBP2,chr9_106734349_T_C,-0.31058,chr9_106734349_T_C,-0.591215,True,True,0.0,1.0,-0.591215,1.17975e-48
3,Whole_blood,ENSG00000187017,ESPN,chr2_60498316_G_C,-0.31986,chr2_60498316_G_C,-0.573496,True,True,0.0,1.0,-0.573497,6.785692e-232
4,Whole_blood,ENSG00000133985,TTC9,chr19_44435185_A_G,0.23137,chr19_44428797_C_T,0.586932,True,False,6388.0,0.6269,0.457685,3.667865e-138
5,Whole_blood,ENSG00000142185,TRPM2,chr8_105750887_G_A,0.32262,chr4_127699285_G_GT,-0.270572,True,False,,,0.034591,0.1799051
6,Whole_blood,ENSG00000102572,STK24,chr10_99490397_C_T,-0.35022,,,False,False,,,-0.4497,1.43962e-40
7,Whole_blood,ENSG00000204642,HLA-F,chr16_57029582_C_T,-0.31975,chr16_57025062_C_T,-0.281557,True,False,4520.0,0.14,-0.215734,3.7546609999999996e-19
8,Whole_blood,ENSG00000184428,TOP1MT,chr4_68818235_G_C,0.54631,chr5_80208393_C_A,-0.288034,True,False,,,0.018577,0.5688853
9,Whole_blood,ENSG00000213088,ACKR1,chr13_81448444_A_G,0.46846,chr9_127235634_AGTTTTTTT_A,0.223305,True,False,,,0.068498,0.04119115


In [11]:
trans_eqtl_replication.to_csv(f'tables/{PREFIX}transeqtl.tsv', sep='\t', index=False)

## trans-sQTL

In [12]:
GTEX_TRANS_SQTL_TOP = '/net/topmed11/working/porchard/gtex-preprocessing/data/gtex/GTEx_Analysis_v8_trans_sGenes_fdr05.txt'

In [13]:
topmed_trans_sqtl = pd.read_csv('../work/clump-trans-variants/clump-trans-signals.significant-trans-sqtl-clumped.tsv', sep='\t')
if USE_CLUMPED:
    topmed_trans_eqtl.variant_id = topmed_trans_eqtl.clumped_variant_id
topmed_trans_sqtl = topmed_trans_sqtl[topmed_trans_sqtl.tissue.isin(['Lung', 'Whole_blood'])]
topmed_trans_sqtl['gene_id'] = topmed_trans_sqtl.phenotype_id.map(tm.phenotype_id_to_gene_id).str.split('.', expand=True)[0]
topmed_trans_sqtl = topmed_trans_sqtl[topmed_trans_sqtl.qvalue<=0.05]
topmed_trans_sqtl.head()

Unnamed: 0,variant_id,phenotype_id,pval,b,b_se,r2,af,gene_mappability,gene_crossmaps_to_gene_near_variant,biotype,...,pval_true_df,pval_perm,pval_beta,pval_beta_no_zero,phenotypes_tested_for_gene,pval_beta_corrected_across_phenotypes,pval_beta_corrected_across_phenotypes_no_zero,qvalue,tissue,clumped_variant_id
0,chr1_156344313_T_C,chr19:35907762:35908175:clu_21928_-:ENSG000000...,1.36365e-16,0.149414,0.018023,0.010584,0.497753,1.0,False,protein_coding,...,4.74685e-16,5e-05,5.94567e-10,5.94567e-10,8,4.756536e-09,4.756536e-09,6.856111e-07,Whole_blood,chr1_156344313_T_C
1,chr7_50360284_G_A,chrX:48467925:48468305:clu_44487_-:ENSG0000001...,2.93022e-18,-0.153455,0.017557,0.01175,0.498528,0.993406,False,protein_coding,...,1.444039e-17,5e-05,1.439984e-11,1.439984e-11,11,1.583983e-10,1.583983e-10,2.573092e-08,Whole_blood,chr7_50360284_G_A
2,chr3_187117653_G_A,chr4:25256674:25259037:clu_33710_+:ENSG0000003...,1.42395e-10,0.121504,0.018915,0.006382,0.633561,0.997751,False,protein_coding,...,2.838788e-10,0.0004,0.0003450879,0.0003450879,1,0.0003450879,0.0003450879,0.02992907,Whole_blood,chr3_187117653_G_A
3,chr6_163405178_G_A,chr11:63903158:63904786:clu_8082_+:ENSG0000007...,3.75038e-26,0.369453,0.034772,0.017268,0.933065,0.999213,False,protein_coding,...,3.9064330000000003e-25,5e-05,3.5027309999999995e-19,3.5027309999999995e-19,21,7.355734e-18,7.355734e-18,2.150817e-15,Whole_blood,chr6_163408503_T_C
4,chr6_163394158_C_T,chr11:85983973:85990250:clu_7229_-:ENSG0000007...,6.41241e-55,-0.54213,0.034404,0.037209,0.932987,1.0,False,protein_coding,...,1.027886e-52,5e-05,6.956716e-47,6.956716e-47,21,1.46091e-45,1.46091e-45,2.1358509999999998e-42,Whole_blood,chr6_163408503_T_C


In [14]:
gtex_trans_sqtl = pd.read_csv(GTEX_TRANS_SQTL_TOP, sep='\t')
gtex_trans_sqtl = gtex_trans_sqtl[gtex_trans_sqtl.tissue_id.isin(['Whole_Blood', 'Lung'])]
gtex_trans_sqtl['tissue'] = gtex_trans_sqtl.tissue_id.str.replace('Blood', 'blood')
gtex_trans_sqtl['gene_id'] = gtex_trans_sqtl.trans_gene_id.str.split('.', expand=True)[0]
gtex_trans_sqtl['variant_id'] = gtex_trans_sqtl.variant_id.str.replace('_b38', '')
gtex_trans_sqtl

Unnamed: 0,tissue_id,trans_gene_id,trans_gene_name,trans_gene_type,trans_gene_chr,trans_phenotype_id,variant_id,maf,pval_nominal,num_phenotypes,...,pval_true_df,pval_perm,pval_beta,pval_beta_k,pval_beta_k_bh,cis_cand_id,cis_cand_name,pp4,tissue,gene_id
7,Lung,ENSG00000130635.15,COL5A1,protein_coding,9,chr9:134830176:134834971:clu_64026:ENSG0000013...,chr13_43507920_A_C,0.235922,1.24577e-40,19,...,2.3162e-39,1e-05,1.31829e-33,2.50475e-32,2.7241600000000003e-28,ENSG00000120658.13,ENOX1,0.881263,Lung,ENSG00000130635
27,Whole_Blood,ENSG00000082074.15,FYB,protein_coding,5,chr5:39124278:39127741:clu_31068:ENSG000000820...,chr6_163381871_T_G,0.064925,6.43146e-15,9,...,2.09461e-14,1e-05,1.60954e-08,1.44858e-07,0.000565455,ENSG00000112531.16,QKI,0.992996,Whole_blood,ENSG00000082074
28,Whole_Blood,ENSG00000158201.9,ABHD3,protein_coding,18,chr18:21651763:21656861:clu_22238:ENSG00000158...,chr9_65074798_G_A,0.363433,2.1659899999999998e-19,9,...,1.15119e-18,1e-05,7.81033e-13,7.0293e-12,5.48777e-08,,,,Whole_blood,ENSG00000158201


In [15]:
trans_sqtl_replication = gtex_trans_sqtl[['tissue', 'gene_id', 'trans_gene_name', 'variant_id']].rename(columns={'variant_id': 'gtex_lead_variant'})
trans_sqtl_replication = trans_sqtl_replication.merge(topmed_trans_sqtl[['gene_id', 'tissue', 'variant_id']].rename(columns={'variant_id': 'topmed_lead_variant'}), how='left')
trans_sqtl_replication['is_topmed_trans_sgene'] = ~trans_sqtl_replication.topmed_lead_variant.isnull()
trans_sqtl_replication['same_lead_variant'] = trans_sqtl_replication.topmed_lead_variant == trans_sqtl_replication.gtex_lead_variant
trans_sqtl_replication['linear_distance'] = [distance_between_variants(gtex, topmed) for gtex, topmed in zip(trans_sqtl_replication.gtex_lead_variant, trans_sqtl_replication.topmed_lead_variant)]
trans_sqtl_replication.head()

Unnamed: 0,tissue,gene_id,trans_gene_name,gtex_lead_variant,topmed_lead_variant,is_topmed_trans_sgene,same_lead_variant,linear_distance
0,Lung,ENSG00000130635,COL5A1,chr13_43507920_A_C,chr13_43492750_C_T,True,False,15170
1,Whole_blood,ENSG00000082074,FYB,chr6_163381871_T_G,chr6_163408503_T_C,True,False,26632
2,Whole_blood,ENSG00000158201,ABHD3,chr9_65074798_G_A,chr9_65073699_C_T,True,False,1099


In [16]:
r2 = []
for gtex_variant, topmed_variant in zip(trans_sqtl_replication.gtex_lead_variant, trans_sqtl_replication.topmed_lead_variant):
    print(gtex_variant, topmed_variant)
    if pd.isnull(topmed_variant) or gtex_variant.split('_')[0] != topmed_variant.split('_')[0]:
        r2.append(np.NaN)
    else:
        rsid1 = ':'.join(gtex_variant.split('_')[:2])
        rsid2 = ':'.join(topmed_variant.split('_')[:2])
        r2.append(ldpair_to_r2(ldpair(rsid1, rsid2)))
trans_sqtl_replication['r2'] = r2
trans_sqtl_replication.to_csv(f'tables/{PREFIX}transsqtl.tsv', sep='\t', index=False)

chr13_43507920_A_C chr13_43492750_C_T
chr6_163381871_T_G chr6_163408503_T_C
chr9_65074798_G_A chr9_65073699_C_T


In [17]:
trans_sqtl_replication

Unnamed: 0,tissue,gene_id,trans_gene_name,gtex_lead_variant,topmed_lead_variant,is_topmed_trans_sgene,same_lead_variant,linear_distance,r2
0,Lung,ENSG00000130635,COL5A1,chr13_43507920_A_C,chr13_43492750_C_T,True,False,15170,0.995
1,Whole_blood,ENSG00000082074,FYB,chr6_163381871_T_G,chr6_163408503_T_C,True,False,26632,0.8592
2,Whole_blood,ENSG00000158201,ABHD3,chr9_65074798_G_A,chr9_65073699_C_T,True,False,1099,0.0283
