In [20]:
import pandas as pd
import numpy as np
import os
import gzip
import time
import seaborn as sns
import matplotlib.pyplot as plt
import gdreg

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [21]:
DATA_PATH = "/n/groups/price/martin/data_GDREG/UKBimp_337K_MAF001/pannot"
DIC_PANNOT_SEED = {    
    "proxy_0_100@s" : DATA_PATH + "/proxy_0_100/proxy_0_100@s.chr@.pannot_mat.npz",
    "proxy_100_1000@s" : DATA_PATH+"/proxy_100_1000/proxy_100_1000@s.chr@.pannot_mat.npz",
    "proxy_1000_10000@s" : DATA_PATH+"/proxy_1000_10000/proxy_1000_10000@s.chr@.pannot_mat.npz",
    "ldp5_proxy_10000@s" : DATA_PATH + "/ldp5_proxy_10000/ldp5_proxy_10000@s.chr@.pannot_mat.npz",
    "exon@s" : DATA_PATH + "/exon/exon@s.chr@.pannot_mat.npz",
    "gene@s" : DATA_PATH + "/gene/gene@s.chr@.pannot_mat.npz",
    "exonic_gene@s" : DATA_PATH + "/exonic_gene/exonic_gene@s.chr@.pannot_mat.npz",
    "protein_domain@s" : DATA_PATH + "/protein_domain/protein_domain@s.chr@.pannot_mat.npz",
    "cS2G_all@s" : DATA_PATH + "/cS2G_all/cS2G_all@s.chr@.pannot_mat.npz",
    "cS2G_promoter@s" : DATA_PATH + "/cS2G_promoter/cS2G_promoter@s.chr@.pannot_mat.npz",
    "cS2G_other@s" : DATA_PATH + "/cS2G_other/cS2G_other@s.chr@.pannot_mat.npz",
}

DIC_PANNOT = {}
for term in DIC_PANNOT_SEED:
    for suffix in ["_common_common", "_common_lf", "_lf_lf"]:
        DIC_PANNOT[term.replace("@s", suffix)] = DIC_PANNOT_SEED[term].replace("@s", suffix)
for val in DIC_PANNOT.values():
    print(val)

/n/groups/price/martin/data_GDREG/UKBimp_337K_MAF001/pannot/proxy_0_100/proxy_0_100_common_common.chr@.pannot_mat.npz
/n/groups/price/martin/data_GDREG/UKBimp_337K_MAF001/pannot/proxy_0_100/proxy_0_100_common_lf.chr@.pannot_mat.npz
/n/groups/price/martin/data_GDREG/UKBimp_337K_MAF001/pannot/proxy_0_100/proxy_0_100_lf_lf.chr@.pannot_mat.npz
/n/groups/price/martin/data_GDREG/UKBimp_337K_MAF001/pannot/proxy_100_1000/proxy_100_1000_common_common.chr@.pannot_mat.npz
/n/groups/price/martin/data_GDREG/UKBimp_337K_MAF001/pannot/proxy_100_1000/proxy_100_1000_common_lf.chr@.pannot_mat.npz
/n/groups/price/martin/data_GDREG/UKBimp_337K_MAF001/pannot/proxy_100_1000/proxy_100_1000_lf_lf.chr@.pannot_mat.npz
/n/groups/price/martin/data_GDREG/UKBimp_337K_MAF001/pannot/proxy_1000_10000/proxy_1000_10000_common_common.chr@.pannot_mat.npz
/n/groups/price/martin/data_GDREG/UKBimp_337K_MAF001/pannot/proxy_1000_10000/proxy_1000_10000_common_lf.chr@.pannot_mat.npz
/n/groups/price/martin/data_GDREG/UKBimp_337K_

In [22]:
# Unfinished files
for pannot in DIC_PANNOT:
    for CHR in range(1,23):
        if os.path.exists(DIC_PANNOT[pannot].replace("@", "%d" % CHR)) is False:
            print(pannot, CHR)

### Check correctness 

In [4]:
CHR = 22

# PGEN
PGEN_FILE = '/n/scratch3/users/j/jz286/imp_geno/ukb_imp_chr@_v3'
df_snp_chr = gdreg.util.read_pgen(PGEN_FILE.replace("@", "%d" % CHR))["pvar"]
df_snp_chr["MAF"] = gdreg.util.read_pgen(PGEN_FILE.replace("@", "%d" % CHR))["afreq"]["MAF"]
df_snp_chr["mbin"] = [gdreg.util.get_mbin(x) for x in df_snp_chr["MAF"]]
df_snp_chr.index = df_snp_chr['SNP']

# LD
mat_ld, dic_range = gdreg.util.read_ld(
    '/n/scratch3/users/j/jz286/imp_geno.gdreg_ld/ukb_imp_v3.c%d_s0_e10000_ld.npz' % CHR
)

# Gencode
df_gene = pd.read_csv("/n/groups/price/martin/data_GDREG/gene_annotation/ENSG_gene_annot_v41.txt", sep="\t")
df_gene_chr = df_gene.loc[df_gene['CHR'] == 'chr%d' % CHR]
df_exon = pd.read_csv("/n/groups/price/martin/data_GDREG/gene_annotation/ENSE_exon_annot_v41.txt", sep="\t")
df_exon_chr = df_exon.loc[df_exon['CHR'] == 'chr%d' % CHR]

file_folder = '/n/groups/price/martin/data_GDREG/UKBimp_337K_MAF001/baseline_annot/vep'
df_pd = gdreg.util.read_annot(file_folder + '/ukb_imp_chr%s_v3.vep.annot.gz' % CHR)
dic_pd = {x:set(y.split(',')) for x,y in zip(df_pd['SNP'], df_pd['AN:DOMAINS'])}

# cS2G
df_cs2g = pd.read_csv(
        '/n/groups/price/martin/data_GDREG/gene_annotation/cS2G/cS2G_UKBB/cS2G.%s.SGscore.gz' % CHR, sep='\t',
    )
df_snpmap = pd.read_csv(
    '/n/groups/price/martin/data_GDREG/gene_annotation/cS2G/00_bim/UKBB.%s.info' % CHR, sep=' ',
)
temp_dic = {x:y for x,y in zip(df_snpmap['ID'], df_snpmap['RS'])}
df_cs2g['ID'] = df_cs2g['SNP']
df_cs2g['SNP'] = [temp_dic[x] for x in df_cs2g['ID']]
df_cs2g.index = df_cs2g['SNP']

In [9]:
mat_G[i,j]

False

In [17]:
for pAN in DIC_PANNOT:
    print(pAN)
    print(DIC_PANNOT[pAN])
    mat_G = gdreg.util.read_pannot_mat(DIC_PANNOT[pAN].replace("@", "%d" % CHR))
    print('    mat_G', mat_G.shape, mat_G.getformat())
    
    n_print = 0
    i=0
    while i < 10000:      
        if mat_G.indptr[i]==mat_G.indptr[i+1]:
            i += 1
            continue    
            
        j = mat_G.indices[mat_G.indptr[i]:mat_G.indptr[i+1]][0] 
        
        print('    ------------------------------------------------------------------------------')
        print('    MAF = %0.4f, %0.4f,    dist = %d,    LD=%0.4f' % (
            df_snp_chr['MAF'][i], df_snp_chr['MAF'][j], df_snp_chr['BP'][i] - df_snp_chr['BP'][j], mat_ld[i,j]
        ))
        
        # Gene & Exon & Domain
        if pAN.startswith(('exon', 'gene', 'protein')):
            for idx in [i,j]:
                snp,bp = df_snp_chr['SNP'][idx],df_snp_chr['BP'][idx]
                ind_select = (df_gene_chr['START']<=bp) & (df_gene_chr['END']>=bp)
                gene = ','.join(df_gene_chr.loc[ind_select, 'GENE_NAME'])
                ind_select = (df_exon_chr['START']<=bp) & (df_exon_chr['END']>=bp)
                exon = ','.join(df_exon_chr.loc[ind_select, 'ENSE'])
                domain = dic_pd[snp] if snp in dic_pd else ''
                print('    %s' % snp)
                print('        gene = %s' % gene)
                print('        exon = %s' % exon)
                print('        domain = %s' % domain)
        
        # cS2G
        if pAN.startswith('cS2G'):
            for idx in [i,j]:
                snp,bp = df_snp_chr['SNP'][idx],df_snp_chr['BP'][idx]
                if snp in df_cs2g.index:
                    display(df_cs2g.loc[[snp], ['SNP', 'GENE', 'cS2G', 'INFO']])
        
        i+=10
        n_print+=1
        if n_print>2:
            break
    
    print('==================================================================================')
#     break

proxy_0_100_common_common
/n/groups/price/martin/data_GDREG/UKBimp_337K_MAF001/pannot/proxy_0_100/proxy_0_100_common_common.chr@.pannot_mat.npz
    mat_G (199680, 199680) csr
    ------------------------------------------------------------------------------
    MAF = 0.0809, 0.0800,    dist = -67,    LD=0.9956
    ------------------------------------------------------------------------------
    MAF = 0.0543, 0.0543,    dist = -81,    LD=0.9990
    ------------------------------------------------------------------------------
    MAF = 0.0530, 0.0538,    dist = -77,    LD=0.9343
proxy_0_100_common_lf
/n/groups/price/martin/data_GDREG/UKBimp_337K_MAF001/pannot/proxy_0_100/proxy_0_100_common_lf.chr@.pannot_mat.npz
    mat_G (199680, 199680) csr
    ------------------------------------------------------------------------------
    MAF = 0.0221, 0.3130,    dist = -1,    LD=-0.0740
    ------------------------------------------------------------------------------
    MAF = 0.3139, 0.0438,  

    rs2236639
        gene = CCT8L2
        exon = ENSE00001806026
        domain = {'Gene3D:3.50.7.10', 'Pfam:PF00118', 'Superfamily:SSF52029', 'PANTHER:PTHR11353', 'PANTHER:PTHR11353:SF70'}
    ------------------------------------------------------------------------------
    MAF = 0.1219, 0.0493,    dist = -192,    LD=-0.0851
    rs5992598
        gene = GAB4
        exon = ENSE00001543665
        domain = 
    rs9606550
        gene = GAB4
        exon = ENSE00001543665
        domain = 
    ------------------------------------------------------------------------------
    MAF = 0.3707, 0.0323,    dist = -151,    LD=-0.1409
    rs2041607
        gene = GAB4
        exon = ENSE00003829050
        domain = 
    rs61743878
        gene = GAB4
        exon = ENSE00003829050
        domain = 
exon_lf_lf
/n/groups/price/martin/data_GDREG/UKBimp_337K_MAF001/pannot/exon/exon_lf_lf.chr@.pannot_mat.npz
    mat_G (199680, 199680) csr
    -------------------------------------------------------

    rs2236639
        gene = CCT8L2
        exon = ENSE00001806026
        domain = {'Gene3D:3.50.7.10', 'Pfam:PF00118', 'Superfamily:SSF52029', 'PANTHER:PTHR11353', 'PANTHER:PTHR11353:SF70'}
    rs5747988
        gene = CCT8L2
        exon = ENSE00001806026
        domain = {'Pfam:PF00118', 'Gene3D:1.10.560.10', 'Superfamily:SSF48592', 'PANTHER:PTHR11353', 'Low_complexity_(Seg):seg', 'PANTHER:PTHR11353:SF70'}
    ------------------------------------------------------------------------------
    MAF = 0.1397, 0.1387,    dist = -559,    LD=0.9931
    rs5748622
        gene = XKR3
        exon = ENSE00001305313,ENSE00003916355
        domain = {'PANTHER:PTHR14297:SF7', 'PANTHER:PTHR14297'}
    rs5748623
        gene = XKR3
        exon = ENSE00001305313,ENSE00003916355
        domain = {'PANTHER:PTHR14297:SF7', 'PANTHER:PTHR14297', 'Pfam:PF09815'}
    ------------------------------------------------------------------------------
    MAF = 0.0555, 0.1397,    dist = 16257,    LD=-0.0620
  

Unnamed: 0_level_0,SNP,GENE,cS2G,INFO
SNP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rs4008588,rs4008588,TPTEP1,1.0,|GTeX_Finemapped=1


Unnamed: 0_level_0,SNP,GENE,cS2G,INFO
SNP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rs143558414,rs143558414,TPTEP1,1.0,|GTeX_Finemapped=1


    ------------------------------------------------------------------------------
    MAF = 0.1781, 0.1258,    dist = 1738,    LD=0.8148


Unnamed: 0_level_0,SNP,GENE,cS2G,INFO
SNP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rs5747939,rs5747939,TPTEP1,1.0,|GTeX_Finemapped=1


Unnamed: 0_level_0,SNP,GENE,cS2G,INFO
SNP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rs4008588,rs4008588,TPTEP1,1.0,|GTeX_Finemapped=1


    ------------------------------------------------------------------------------
    MAF = 0.1229, 0.1258,    dist = 4513,    LD=0.9822


Unnamed: 0_level_0,SNP,GENE,cS2G,INFO
SNP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rs16980739,rs16980739,TPTEP1,1.0,|GTeX_Finemapped=1


Unnamed: 0_level_0,SNP,GENE,cS2G,INFO
SNP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rs4008588,rs4008588,TPTEP1,1.0,|GTeX_Finemapped=1


cS2G_all_common_lf
/n/groups/price/martin/data_GDREG/UKBimp_337K_MAF001/pannot/cS2G_all/cS2G_all_common_lf.chr@.pannot_mat.npz
    mat_G (199680, 199680) csr
    ------------------------------------------------------------------------------
    MAF = 0.0105, 0.1258,    dist = -134439,    LD=-0.0301


Unnamed: 0_level_0,SNP,GENE,cS2G,INFO
SNP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rs146816456,rs146816456,TPTEP1,1.0,|GTeX_Finemapped=1


Unnamed: 0_level_0,SNP,GENE,cS2G,INFO
SNP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rs4008588,rs4008588,TPTEP1,1.0,|GTeX_Finemapped=1


    ------------------------------------------------------------------------------
    MAF = 0.0229, 0.2890,    dist = -405059,    LD=-0.0301


Unnamed: 0_level_0,SNP,GENE,cS2G,INFO
SNP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rs3888501,rs3888501,CECR5,1.0,|GTeX_Finemapped=1


Unnamed: 0_level_0,SNP,GENE,cS2G,INFO
SNP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rs5748756,rs5748756,CECR5,1.0,|GTeX_Finemapped=1


    ------------------------------------------------------------------------------
    MAF = 0.0482, 0.2726,    dist = -447281,    LD=-0.0142


Unnamed: 0_level_0,SNP,GENE,cS2G,INFO
SNP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rs370558029,rs370558029,CECR7,1.0,|ABC=1


Unnamed: 0_level_0,SNP,GENE,cS2G,INFO
SNP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rs4819941,rs4819941,CECR7,0.798,|GTeX_Finemapped=1
rs4819941,rs4819941,XKR3,0.202,|EpiMap=1


cS2G_all_lf_lf
/n/groups/price/martin/data_GDREG/UKBimp_337K_MAF001/pannot/cS2G_all/cS2G_all_lf_lf.chr@.pannot_mat.npz
    mat_G (199680, 199680) csr
    ------------------------------------------------------------------------------
    MAF = 0.0105, 0.0142,    dist = -105223,    LD=-0.0065


Unnamed: 0_level_0,SNP,GENE,cS2G,INFO
SNP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rs146816456,rs146816456,TPTEP1,1.0,|GTeX_Finemapped=1


Unnamed: 0_level_0,SNP,GENE,cS2G,INFO
SNP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
22:17024887_CAAGG_C,22:17024887_CAAGG_C,TPTEP1,1.0,|ABC=1


    ------------------------------------------------------------------------------
    MAF = 0.0229, 0.0065,    dist = -522494,    LD=-0.0035


Unnamed: 0_level_0,SNP,GENE,cS2G,INFO
SNP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rs3888501,rs3888501,CECR5,1.0,|GTeX_Finemapped=1


Unnamed: 0_level_0,SNP,GENE,cS2G,INFO
SNP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rs190098544,rs190098544,CECR5,1.0,|EpiMap=1


    ------------------------------------------------------------------------------
    MAF = 0.0482, 0.0478,    dist = -470733,    LD=0.0082


Unnamed: 0_level_0,SNP,GENE,cS2G,INFO
SNP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rs370558029,rs370558029,CECR7,1.0,|ABC=1


Unnamed: 0_level_0,SNP,GENE,cS2G,INFO
SNP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rs13055424,rs13055424,CECR7,1.0,|Promoter=1|ABC=1


cS2G_promoter_common_common
/n/groups/price/martin/data_GDREG/UKBimp_337K_MAF001/pannot/cS2G_promoter/cS2G_promoter_common_common.chr@.pannot_mat.npz
    mat_G (199680, 199680) csr
    ------------------------------------------------------------------------------
    MAF = 0.1024, 0.1025,    dist = -46,    LD=1.0013


Unnamed: 0_level_0,SNP,GENE,cS2G,INFO
SNP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rs5747994,rs5747994,CCT8L2,1.0,|Promoter=1


Unnamed: 0_level_0,SNP,GENE,cS2G,INFO
SNP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rs5746662,rs5746662,CCT8L2,1.0,|Promoter=1


    ------------------------------------------------------------------------------
    MAF = 0.3782, 0.3775,    dist = -438,    LD=0.9482


Unnamed: 0_level_0,SNP,GENE,cS2G,INFO
SNP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rs112886011,rs112886011,TPTEP1,1.0,|Promoter=1|ABC=1


Unnamed: 0_level_0,SNP,GENE,cS2G,INFO
SNP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rs2236640,rs2236640,TPTEP1,1.0,|Promoter=1|GTeX_Finemapped=1|ABC=1|Cicero=1


    ------------------------------------------------------------------------------
    MAF = 0.1401, 0.3782,    dist = 16731,    LD=-0.1499


Unnamed: 0_level_0,SNP,GENE,cS2G,INFO
SNP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rs9618554,rs9618554,TPTEP1,1.0,|Promoter=1


Unnamed: 0_level_0,SNP,GENE,cS2G,INFO
SNP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rs112886011,rs112886011,TPTEP1,1.0,|Promoter=1|ABC=1


cS2G_promoter_common_lf
/n/groups/price/martin/data_GDREG/UKBimp_337K_MAF001/pannot/cS2G_promoter/cS2G_promoter_common_lf.chr@.pannot_mat.npz
    mat_G (199680, 199680) csr
    ------------------------------------------------------------------------------
    MAF = 0.0217, 0.1024,    dist = -14,    LD=0.4645


Unnamed: 0_level_0,SNP,GENE,cS2G,INFO
SNP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rs77689376,rs77689376,TPTEP1,0.07,|GTeX_Finemapped=1
rs77689376,rs77689376,CCT8L2,0.93,|Promoter=1


Unnamed: 0_level_0,SNP,GENE,cS2G,INFO
SNP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rs5747994,rs5747994,CCT8L2,1.0,|Promoter=1


    ------------------------------------------------------------------------------
    MAF = 0.0065, 0.3782,    dist = -995,    LD=-0.0486


Unnamed: 0_level_0,SNP,GENE,cS2G,INFO
SNP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rs536231265,rs536231265,TPTEP1,1.0,|Promoter=1|ABC=1|Cicero=1


Unnamed: 0_level_0,SNP,GENE,cS2G,INFO
SNP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rs112886011,rs112886011,TPTEP1,1.0,|Promoter=1|ABC=1


    ------------------------------------------------------------------------------
    MAF = 0.1401, 0.0065,    dist = 17726,    LD=-0.0250


Unnamed: 0_level_0,SNP,GENE,cS2G,INFO
SNP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rs9618554,rs9618554,TPTEP1,1.0,|Promoter=1


Unnamed: 0_level_0,SNP,GENE,cS2G,INFO
SNP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rs536231265,rs536231265,TPTEP1,1.0,|Promoter=1|ABC=1|Cicero=1


cS2G_promoter_lf_lf
/n/groups/price/martin/data_GDREG/UKBimp_337K_MAF001/pannot/cS2G_promoter/cS2G_promoter_lf_lf.chr@.pannot_mat.npz
    mat_G (199680, 199680) csr
    ------------------------------------------------------------------------------
    MAF = 0.0065, 0.0101,    dist = -556,    LD=-0.0063


Unnamed: 0_level_0,SNP,GENE,cS2G,INFO
SNP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rs536231265,rs536231265,TPTEP1,1.0,|Promoter=1|ABC=1|Cicero=1


Unnamed: 0_level_0,SNP,GENE,cS2G,INFO
SNP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rs189605007,rs189605007,TPTEP1,1.0,|Promoter=1|GTeX_Finemapped=1|ABC=1|Cicero=1


    ------------------------------------------------------------------------------
    MAF = 0.0391, 0.0065,    dist = 17808,    LD=-0.0079


Unnamed: 0_level_0,SNP,GENE,cS2G,INFO
SNP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rs551935374,rs551935374,TPTEP1,1.0,|Promoter=1


Unnamed: 0_level_0,SNP,GENE,cS2G,INFO
SNP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rs536231265,rs536231265,TPTEP1,1.0,|Promoter=1|ABC=1|Cicero=1


    ------------------------------------------------------------------------------
    MAF = 0.0319, 0.0401,    dist = -1374,    LD=-0.0368


Unnamed: 0_level_0,SNP,GENE,cS2G,INFO
SNP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rs73147679,rs73147679,GAB4,1.0,|Promoter=1|Cicero=1


Unnamed: 0_level_0,SNP,GENE,cS2G,INFO
SNP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rs5746961,rs5746961,GAB4,1.0,|Promoter=1


cS2G_other_common_common
/n/groups/price/martin/data_GDREG/UKBimp_337K_MAF001/pannot/cS2G_other/cS2G_other_common_common.chr@.pannot_mat.npz
    mat_G (199680, 199680) csr
    ------------------------------------------------------------------------------
    MAF = 0.1258, 0.1259,    dist = -229,    LD=0.9995


Unnamed: 0_level_0,SNP,GENE,cS2G,INFO
SNP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rs4008588,rs4008588,TPTEP1,1.0,|GTeX_Finemapped=1


Unnamed: 0_level_0,SNP,GENE,cS2G,INFO
SNP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rs143558414,rs143558414,TPTEP1,1.0,|GTeX_Finemapped=1


    ------------------------------------------------------------------------------
    MAF = 0.1781, 0.1258,    dist = 1738,    LD=0.8148


Unnamed: 0_level_0,SNP,GENE,cS2G,INFO
SNP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rs5747939,rs5747939,TPTEP1,1.0,|GTeX_Finemapped=1


Unnamed: 0_level_0,SNP,GENE,cS2G,INFO
SNP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rs4008588,rs4008588,TPTEP1,1.0,|GTeX_Finemapped=1


    ------------------------------------------------------------------------------
    MAF = 0.1229, 0.1258,    dist = 4513,    LD=0.9822


Unnamed: 0_level_0,SNP,GENE,cS2G,INFO
SNP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rs16980739,rs16980739,TPTEP1,1.0,|GTeX_Finemapped=1


Unnamed: 0_level_0,SNP,GENE,cS2G,INFO
SNP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rs4008588,rs4008588,TPTEP1,1.0,|GTeX_Finemapped=1


cS2G_other_common_lf
/n/groups/price/martin/data_GDREG/UKBimp_337K_MAF001/pannot/cS2G_other/cS2G_other_common_lf.chr@.pannot_mat.npz
    mat_G (199680, 199680) csr
    ------------------------------------------------------------------------------
    MAF = 0.0105, 0.1258,    dist = -134439,    LD=-0.0301


Unnamed: 0_level_0,SNP,GENE,cS2G,INFO
SNP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rs146816456,rs146816456,TPTEP1,1.0,|GTeX_Finemapped=1


Unnamed: 0_level_0,SNP,GENE,cS2G,INFO
SNP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rs4008588,rs4008588,TPTEP1,1.0,|GTeX_Finemapped=1


    ------------------------------------------------------------------------------
    MAF = 0.0229, 0.2890,    dist = -405059,    LD=-0.0301


Unnamed: 0_level_0,SNP,GENE,cS2G,INFO
SNP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rs3888501,rs3888501,CECR5,1.0,|GTeX_Finemapped=1


Unnamed: 0_level_0,SNP,GENE,cS2G,INFO
SNP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rs5748756,rs5748756,CECR5,1.0,|GTeX_Finemapped=1


    ------------------------------------------------------------------------------
    MAF = 0.0482, 0.2726,    dist = -447281,    LD=-0.0142


Unnamed: 0_level_0,SNP,GENE,cS2G,INFO
SNP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rs370558029,rs370558029,CECR7,1.0,|ABC=1


Unnamed: 0_level_0,SNP,GENE,cS2G,INFO
SNP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rs4819941,rs4819941,CECR7,0.798,|GTeX_Finemapped=1
rs4819941,rs4819941,XKR3,0.202,|EpiMap=1


cS2G_other_lf_lf
/n/groups/price/martin/data_GDREG/UKBimp_337K_MAF001/pannot/cS2G_other/cS2G_other_lf_lf.chr@.pannot_mat.npz
    mat_G (199680, 199680) csr
    ------------------------------------------------------------------------------
    MAF = 0.0105, 0.0142,    dist = -105223,    LD=-0.0065


Unnamed: 0_level_0,SNP,GENE,cS2G,INFO
SNP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rs146816456,rs146816456,TPTEP1,1.0,|GTeX_Finemapped=1


Unnamed: 0_level_0,SNP,GENE,cS2G,INFO
SNP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
22:17024887_CAAGG_C,22:17024887_CAAGG_C,TPTEP1,1.0,|ABC=1


    ------------------------------------------------------------------------------
    MAF = 0.0229, 0.0065,    dist = -522494,    LD=-0.0035


Unnamed: 0_level_0,SNP,GENE,cS2G,INFO
SNP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rs3888501,rs3888501,CECR5,1.0,|GTeX_Finemapped=1


Unnamed: 0_level_0,SNP,GENE,cS2G,INFO
SNP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rs190098544,rs190098544,CECR5,1.0,|EpiMap=1


    ------------------------------------------------------------------------------
    MAF = 0.0482, 0.0300,    dist = -481714,    LD=0.0206


Unnamed: 0_level_0,SNP,GENE,cS2G,INFO
SNP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rs370558029,rs370558029,CECR7,1.0,|ABC=1


Unnamed: 0_level_0,SNP,GENE,cS2G,INFO
SNP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rs532305349,rs532305349,CECR7,1.0,|ABC=1




### Old code