In [14]:
import os 
import pandas as pd
import subprocess
import glob
import pybedtools as pbt 

pbt.set_bedtools_path('/mnt/BioApps/bedtools/bin/')

os.chdir('/mnt/BioHome/jreyna/jreyna/projects/dchallenge/')

# make the directory to save our data
outdir = 'results/main/gwas_pieqtls/2021_chiou_et_al/2021_chandra_et_al/'
os.makedirs(outdir, exist_ok=True)

## Load Fine Mapped GWAS

In [15]:
gwas = 'results/main/gwas/2021_chiou_et_al/gwas.finemapping.supp_table3.tsv'
gwas_df = pd.read_table(gwas)

gwas_df['Chrom.'] = gwas_df['Chrom.'].astype(int).astype(str) 
gwas_df['Position (hg19)'] = gwas_df['Position (hg19)'].astype(int)
gwas_df['Position (hg38)'] = gwas_df['Position (hg38)'].astype(int)

In [16]:
gwas_bed = gwas_df.iloc[:, [1,2,2]]
#gwas_bed = gwas_df.iloc[:, [1,3,3]]
gwas_bed.columns = ['chrom', 'start', 'end']
gwas_bed['start'] = gwas_bed['start'] - 1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gwas_bed['start'] = gwas_bed['start'] - 1


In [17]:
gwas_df.head()

Unnamed: 0,Marker,Chrom.,Position (hg19),Position (hg38),Allele Ref.,Alt.,Signal name,Alt.AF,PPA,Previous studies Index (r2),PMID
0,rs10751776,1,25296743,24970252,A,C,RUNX3 (1:25296743:A:C),0.509915,0.040594,,
1,rs574384,1,36087661,35622060,C,A,PSMB2 (1:36087661:C:A),0.894668,0.195293,,
2,rs12742756,1,38347417,37881745,A,G,INPP5B (1:38347417:A:G),0.428204,0.070499,,
3,rs855330,1,64113889,63648218,T,C,PGM1 (1:64113889:T:C),0.259422,0.16116,,
4,rs150709401,1,114135880,113593258,A,G,PTPN22 (1:114135880:A:G),0.009602,0.257619,,


In [18]:
gwas_bed.head()

Unnamed: 0,chrom,start,end
0,1,25296742,25296743
1,1,36087660,36087661
2,1,38347416,38347417
3,1,64113888,64113889
4,1,114135879,114135880


In [19]:
gwas_pbt = pbt.BedTool.from_dataframe(gwas_bed)

## Intersect Fine Mapped GWAS and pieQTLs

In [20]:
pieqtls = glob.glob('results/main/pieqtls/2021_chandra_et_al/*/proximal.pieqtls.tsv')

In [21]:
pieqtl_data = []
for pieqtl in pieqtls:
    print(pieqtl)
    
    cline = pieqtl.split('/')[-2]
    df = pd.read_table(pieqtl)    
    df['cline'] = cline
    pieqtl_data.append(df) 

pieqtl_df = pd.concat(pieqtl_data)

results/main/pieqtls/2021_chandra_et_al/B-cell_naive/proximal.pieqtls.tsv
results/main/pieqtls/2021_chandra_et_al/NK-cell_naive/proximal.pieqtls.tsv
results/main/pieqtls/2021_chandra_et_al/monocyte_naive/proximal.pieqtls.tsv
results/main/pieqtls/2021_chandra_et_al/CD4_T-cell_naive/proximal.pieqtls.tsv
results/main/pieqtls/2021_chandra_et_al/CD8_T-cell_naive/proximal.pieqtls.tsv


In [22]:
pieqtl_bed = pieqtl_df.iloc[:, [1,2,2,-1]]
pieqtl_bed.columns = ['chrom', 'start', 'end', 'cline']
pieqtl_bed.loc[:, 'chrom'] = pieqtl_bed['chrom'].str.replace('chr', '')
pieqtl_bed.loc[:, 'start'] = pieqtl_bed['start'] - 1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pieqtl_bed.loc[:, 'chrom'] = pieqtl_bed['chrom'].str.replace('chr', '')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pieqtl_bed.loc[:, 'start'] = pieqtl_bed['start'] - 1


In [23]:
pieqtl_pbt = pbt.BedTool.from_dataframe(pieqtl_bed)
intersect_pbt = gwas_pbt.intersect(pieqtl_pbt, wa=True, wb=True)

In [24]:
intersect_pbt.to_dataframe()

Unnamed: 0,chrom,start,end,name,score,strand,thickStart
0,11,64107734,64107735,11,64107734,64107735,B-cell_naive
1,11,64107734,64107735,11,64107734,64107735,NK-cell_naive


In [25]:
check = pieqtl_df[(pieqtl_df['Chromosome'] == 'chr11') & (pieqtl_df['pieQTL.Position'] == 64107735) ]

In [26]:
check.iloc[:, [0,1,2,3,4,5,6,7,8,9,10, 14, 16]]

Unnamed: 0,pieQTL.ID,Chromosome,pieQTL.Position,Target_geneID,Target_geneName,TSS,pvalue,FDR (DICE),beta,ref,alt,Interaction_type,cline
292,rs663743,chr11,64107735,ENSG00000236935.1,AP003774.1,64096976,2.2800000000000002e-17,8.89e-13,1.11,G,A,Indirect_pieQTL,B-cell_naive
173,rs663743,chr11,64107735,ENSG00000236935.1,AP003774.1,64096976,2.18e-18,3.23e-13,1.16,G,A,Indirect_pieQTL,NK-cell_naive
