# GWAS Locus Browser Locus Zoom Scripts
- **Author** - Frank Grenn
- **Date Started** - June 2019
- **Quick Description:** code to generate json files for interactive locus zoom.
- **Data:** 
input files obtained from: [META5](https://www.ncbi.nlm.nih.gov/pubmed/31701892) and [PD Progression](https://movementdisorders.onlinelibrary.wiley.com/doi/full/10.1002/mds.27845)  
[Static Locus Zoom](http://locuszoom.org/)  
[Interactive Locus Zoom](https://github.com/statgen/locuszoom/wiki)


In [None]:
import pandas as pd
import numpy as np

In [None]:
DATADIR= "/path/to/AppDataProcessing"
WRKDIR=f"{DATADIR}/locuszoom"

### Must select GWAS corresponding to the summary stats loaded
would need to modify code for progression loci, because each of the two progression loci will have a different summary statistics file to read from

In [None]:
GWAS='Progression'

In [None]:
#get summary stats
#data = pd.read_csv(f"{DATADIR}/meta5_sumstats_harmonized.csv")
data = pd.read_csv(f"{DATADIR}/prog_hy_sumstats_harmonized.csv")
#data = pd.read_csv(f"{DATADIR}/prog_ins_sumstats_harmonized.csv")
#data = pd.read_csv(f"{DATADIR}/asiangwas_sumstats_harmonized.csv")

In [None]:
gwas_risk_vars = pd.read_csv(f"{DATADIR}/gwas_risk_variants.csv")
print(gwas_risk_vars.shape)
print(gwas_risk_vars.head())

In [None]:
loci = gwas_risk_vars[gwas_risk_vars.GWAS==GWAS]
loci

In [None]:
#if using progression loci may need to select a specific variant
loci = loci[loci.CHR==9]
loci

In [None]:
#loop
for i in range(len(loci.index)):
    chrm = (loci.iloc[i]['CHR']);
    pos = loci.iloc[i]['BP'];
    start = pos - 1000000;
    end = pos + 1000000;
    

    #subset by chromosome
    chrdata = data[(data['CHR'] == chrm.astype('int32'))]

    chrdata['BP'] = chrdata['BP'].astype('int32')


    #and then by position
    rangeddata = chrdata[(chrdata['BP'] >= start) & (chrdata['BP'] <= end)]
    print(len(rangeddata.index))
    
    #then drop rows with missing RSIDs
    rangeddata = rangeddata.dropna(subset=['RSID'])
    print(len(rangeddata.index))
 
    #add quotes around certain fields to make locus zoom happy
    ref_allele_str = '"'+rangeddata['REF']+'"'
    chr_str = '"'+rangeddata['CHR'].astype(str)+'"'
    variant_str = '"'+rangeddata['CHR_BP_REF_ALT']+'"'

    rangeddata['REF']=ref_allele_str
    rangeddata['CHR']=chr_str
    rangeddata['CHR_BP_REF_ALT']=variant_str


    chromosome=','.join(map(str,rangeddata['CHR'].tolist()))
    log_pvalue=','.join(map(str,rangeddata['LOG_P'].tolist()))
    position=','.join(map(str,rangeddata['BP'].tolist()))
    ref_allele=','.join(map(str,rangeddata['REF'].tolist()))
    variant=','.join(map(str,rangeddata['CHR_BP_REF_ALT'].tolist()))

    jsonstring = '{{\
	    "data": {{\
	        "chromosome": [\
                {}\
	        ],\
    	    "log_pvalue": [\
        	    {}\
        	],\
        	"position": [\
	            {}\
	        ],\
	        "ref_allele": [\
	            {}\
	        ],\
	        "variant": [\
	            {}\
	        ]\
	    }},\
	    "lastPage": null\
	}}'.format(chromosome,log_pvalue,position,ref_allele,variant)

    print(loci.iloc[i]['RSID'] + " " + str(loci.iloc[i]['CHR']) + ":" + str(loci.iloc[i]['BP']))
    json_file = open(f"{WRKDIR}/interactive_stats/"+loci.iloc[i]['RSID']+"_locus.json", "w")
    json_file.write(jsonstring)
    json_file.close()
	
	
	