In [1]:
import os 
import pandas as pd
import subprocess
import glob
import pybedtools as pbt 
pd.set_option('display.max_columns', None)

pbt.set_bedtools_path('/mnt/BioApps/bedtools/bin/')

os.chdir('/mnt/BioHome/jreyna/jreyna/projects/dchallenge/')

# make the directory to save our data
outdir = 'results/main/gwas_pieqtls/2021_chiou_et_al/2021_chandra_et_al/'
os.makedirs(outdir, exist_ok=True)

## Load Fine Mapped GWAS

In [2]:
gwas = 'results/main/finemapping/T1D_34012112_Gaulton/GRCh37/offset_1000000/Summary/sss/FINAL_top_snp_credible_set.txt'
gwas_df = pd.read_table(gwas)

# loading finemap data into bedtools
gwas_bed = gwas_df.iloc[:, [4,5,5]]
gwas_bed.columns = ['chrom', 'start', 'end']
gwas_bed['start'] = gwas_bed['start'] - 1
gwas_pbt = pbt.BedTool.from_dataframe(gwas_bed)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gwas_bed['start'] = gwas_bed['start'] - 1


In [3]:
gwas_df.head()

Unnamed: 0,regionID,GWASLoci,index,rsid,chromosome,position,allele1,allele2,maf,beta,se,z,prob,log10bf,mean,sd,mean_incl,sd_incl,pval
0,16,chr10:89512436-90644950,2448,10:90023033,10,90023033,C,T,0.267,-0.146508,0.015935,-9.1941,1.0,13.5355,0.187599,3e-06,0.187599,3e-06,1.0
1,16,chr10:89512436-90644950,2574,10:90051035,10,90051035,T,G,0.251,-0.165103,0.016333,-10.1086,1.0,13.5355,0.187599,3e-06,0.187599,3e-06,1.0
2,16,chr10:89512436-90644950,3644,10:90319220,10,90319220,T,C,0.000864,0.380395,0.258607,1.47094,1.0,13.5355,0.187599,3e-06,0.187599,3e-06,0.070654
3,16,chr10:89512436-90644950,2524,10:90039355,10,90039355,AACAC,A,0.241,-0.138832,0.044625,-3.11108,1.0,13.5355,0.187599,3e-06,0.187599,3e-06,0.999068
4,16,chr10:89512436-90644950,2514,10:90036367,10,90036367,G,C,0.222,-0.155665,0.017116,-9.09471,1.0,13.5355,0.187599,3e-06,0.187599,3e-06,1.0


In [4]:
gwas_pbt = pbt.BedTool.from_dataframe(gwas_bed)

## Intersect Fine Mapped GWAS and pieQTLs

In [5]:
pieqtls = glob.glob('results/main/pieqtls/2021_chandra_et_al/*/proximal.pieqtls.tsv')

In [6]:
pieqtl_data = []
for pieqtl in pieqtls:
    print(pieqtl)
    
    cline = pieqtl.split('/')[-2]
    df = pd.read_table(pieqtl)    
    df['cline'] = cline
    pieqtl_data.append(df) 

pieqtl_df = pd.concat(pieqtl_data)
pieqtl_df['Chromosome'] = pieqtl_df['Chromosome'].str.replace('chr', '').astype(int)

results/main/pieqtls/2021_chandra_et_al/B-cell_naive/proximal.pieqtls.tsv
results/main/pieqtls/2021_chandra_et_al/NK-cell_naive/proximal.pieqtls.tsv
results/main/pieqtls/2021_chandra_et_al/monocyte_naive/proximal.pieqtls.tsv
results/main/pieqtls/2021_chandra_et_al/CD4_T-cell_naive/proximal.pieqtls.tsv
results/main/pieqtls/2021_chandra_et_al/CD8_T-cell_naive/proximal.pieqtls.tsv


In [7]:
pieqtl_bed = pieqtl_df.iloc[:, [1,2,2,-1]]
pieqtl_bed.columns = ['chrom', 'start', 'end', 'cline']
pieqtl_bed.loc[:, 'start'] = pieqtl_bed['start'] - 1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pieqtl_bed.loc[:, 'start'] = pieqtl_bed['start'] - 1


In [8]:
pieqtl_pbt = pbt.BedTool.from_dataframe(pieqtl_bed)
intersect_pbt = gwas_pbt.intersect(pieqtl_pbt, wa=True, wb=True)

## Summarize

In [9]:
intersect_df = intersect_pbt.to_dataframe()
intersect_df.rename(columns={'thickStart': 'cline'}, inplace=True)
intersect_df = intersect_df.iloc[:, [0,1,2,6]]
intersect_df = intersect_df.merge(gwas_df, left_on=['chrom', 'end'], right_on=['chromosome', 'position'], how='left')
intersect_df = intersect_df.merge(pieqtl_df, left_on=['chrom', 'end'], right_on=['Chromosome', 'pieQTL.Position'], how='left')

In [10]:
intersect_df

Unnamed: 0,chrom,start,end,cline_x,regionID,GWASLoci,index,rsid,chromosome,position,allele1,allele2,maf,beta_x,se,z,prob,log10bf,mean,sd,mean_incl,sd_incl,pval,pieQTL.ID,Chromosome,pieQTL.Position,Target_geneID,Target_geneName,TSS,pvalue,FDR (DICE),beta_y,ref,alt,Mean.TPM.Homozygous.Reference,Mean.TPM.Heterozygous,Mean.TPM.Homozygous.Alternative,Interaction_type,GWAS.Trait,cline_y
0,2,242294912,242294913,B-cell_naive,52,chr2:241778007-242778547,3666,2:242294913,2,242294913,G,A,0.0977,-0.066868,0.062422,-1.07122,1.0,13.6344,-0.33434,0.539277,-0.33434,0.539277,0.857966,rs3755397,2,242294913,ENSG00000168385.13,2020-09-02 00:00:00,242254515,1.7e-09,7.49e-06,1.17,A,G,129.61,160.13,210.54,Direct_pieQTL,Leukemia chronic lymphocytic,B-cell_naive
1,2,242294912,242294913,B-cell_naive,52,chr2:241778007-242778547,3666,2:242294913,2,242294913,G,A,0.0977,-0.066868,0.062422,-1.07122,1.0,13.6344,-0.33434,0.539277,-0.33434,0.539277,0.857966,rs3755397,2,242294913,ENSG00000168385.13,2020-09-02 00:00:00,242254515,2.14e-07,0.000716649,1.03,A,G,175.26,211.63,229.33,Direct_pieQTL,Leukemia chronic lymphocytic,NK-cell_naive
2,2,242294912,242294913,B-cell_naive,52,chr2:241778007-242778547,3666,2:242294913,2,242294913,G,A,0.0977,-0.066868,0.062422,-1.07122,1.0,13.6344,-0.33434,0.539277,-0.33434,0.539277,0.857966,rs3755397,2,242294913,ENSG00000168385.13,2020-09-02 00:00:00,242254515,1.83e-11,1.63e-07,1.28,A,G,144.53,187.66,224.97,Direct_pieQTL,Leukemia chronic lymphocytic,monocyte_naive
3,2,242294912,242294913,B-cell_naive,52,chr2:241778007-242778547,3666,2:242294913,2,242294913,G,A,0.0977,-0.066868,0.062422,-1.07122,1.0,13.6344,-0.33434,0.539277,-0.33434,0.539277,0.857966,rs3755397,2,242294913,ENSG00000168385.13,2020-09-02 00:00:00,242254515,1.16e-12,1.21e-08,1.35,A,G,133.93,197.27,244.58,Direct_pieQTL,Leukemia chronic lymphocytic,CD4_T-cell_naive
4,2,242294912,242294913,B-cell_naive,52,chr2:241778007-242778547,3666,2:242294913,2,242294913,G,A,0.0977,-0.066868,0.062422,-1.07122,1.0,13.6344,-0.33434,0.539277,-0.33434,0.539277,0.857966,rs3755397,2,242294913,ENSG00000168385.13,2020-09-02 00:00:00,242254515,9.46e-13,8.65e-09,1.33,A,G,130.83,183.62,225.51,Direct_pieQTL,Leukemia chronic lymphocytic,CD8_T-cell_naive
5,2,242294912,242294913,NK-cell_naive,52,chr2:241778007-242778547,3666,2:242294913,2,242294913,G,A,0.0977,-0.066868,0.062422,-1.07122,1.0,13.6344,-0.33434,0.539277,-0.33434,0.539277,0.857966,rs3755397,2,242294913,ENSG00000168385.13,2020-09-02 00:00:00,242254515,1.7e-09,7.49e-06,1.17,A,G,129.61,160.13,210.54,Direct_pieQTL,Leukemia chronic lymphocytic,B-cell_naive
6,2,242294912,242294913,NK-cell_naive,52,chr2:241778007-242778547,3666,2:242294913,2,242294913,G,A,0.0977,-0.066868,0.062422,-1.07122,1.0,13.6344,-0.33434,0.539277,-0.33434,0.539277,0.857966,rs3755397,2,242294913,ENSG00000168385.13,2020-09-02 00:00:00,242254515,2.14e-07,0.000716649,1.03,A,G,175.26,211.63,229.33,Direct_pieQTL,Leukemia chronic lymphocytic,NK-cell_naive
7,2,242294912,242294913,NK-cell_naive,52,chr2:241778007-242778547,3666,2:242294913,2,242294913,G,A,0.0977,-0.066868,0.062422,-1.07122,1.0,13.6344,-0.33434,0.539277,-0.33434,0.539277,0.857966,rs3755397,2,242294913,ENSG00000168385.13,2020-09-02 00:00:00,242254515,1.83e-11,1.63e-07,1.28,A,G,144.53,187.66,224.97,Direct_pieQTL,Leukemia chronic lymphocytic,monocyte_naive
8,2,242294912,242294913,NK-cell_naive,52,chr2:241778007-242778547,3666,2:242294913,2,242294913,G,A,0.0977,-0.066868,0.062422,-1.07122,1.0,13.6344,-0.33434,0.539277,-0.33434,0.539277,0.857966,rs3755397,2,242294913,ENSG00000168385.13,2020-09-02 00:00:00,242254515,1.16e-12,1.21e-08,1.35,A,G,133.93,197.27,244.58,Direct_pieQTL,Leukemia chronic lymphocytic,CD4_T-cell_naive
9,2,242294912,242294913,NK-cell_naive,52,chr2:241778007-242778547,3666,2:242294913,2,242294913,G,A,0.0977,-0.066868,0.062422,-1.07122,1.0,13.6344,-0.33434,0.539277,-0.33434,0.539277,0.857966,rs3755397,2,242294913,ENSG00000168385.13,2020-09-02 00:00:00,242254515,9.46e-13,8.65e-09,1.33,A,G,130.83,183.62,225.51,Direct_pieQTL,Leukemia chronic lymphocytic,CD8_T-cell_naive
