In [None]:
%matplotlib inline

In [None]:
import numpy as np
import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
sns.set_context('talk')

## Load data

In [None]:
df = pd.read_table('results/disgenet_enhanced_hg38.tsv')
df.sample(5)

In [None]:
df_enr = pd.read_csv('results/TAD_enrichment.csv')

# mark cancer diseases
iscancer_map = df[['diseaseId', 'is_cancer']].set_index('diseaseId').to_dict()['is_cancer']
df_enr['is_cancer'] = df_enr['disease'].map(iscancer_map)

# add disease name
disname_map = df[['diseaseId', 'diseaseName']].set_index('diseaseId').to_dict()['diseaseName']
df_enr['disease_name'] = df_enr['disease'].map(disname_map)

df_enr.sample(5)

## Tables

### Enriched cancers

In [None]:
for tad_type, group in df_enr.groupby('TAD_type'):
    sub = group[group['is_cancer'] & (group['pval_boundary'] < .05)]
    
    sub = sub.sort_values('pval_boundary')
    sub = sub[['disease', 'disease_name', 'pval_boundary', '#snp', '#border_snp']]
    
    display(tad_type, sub.head())

## Plots

In [None]:
# rescale p-values
df_enr_log = df_enr.copy()

df_enr_log['pval_boundary'] = df_enr_log['pval_boundary'].apply(lambda x: -np.log10(x))
df_enr_log['pval_tad'] = df_enr_log['pval_tad'].apply(lambda x: -np.log10(x))
df_enr_log['pval_none'] = df_enr_log['pval_none'].apply(lambda x: -np.log10(x))

df_enr_log = df_enr_log.replace([np.inf, -np.inf], np.nan)

### TAD border enrichment

In [None]:
g = sns.FacetGrid(
    df_enr_log, col='TAD_type', col_wrap=3, hue='is_cancer',
    size=5, aspect=1.2)

g = g.map(plt.hist, 'pval_boundary', alpha=.5, density=True, bins=30)
g = g.map(plt.axvline, x=-np.log10(.05), color='red', linestyle='dashed')

g.set(xlim=(0,3))

g.add_legend()

plt.savefig('images/tad_border_enrichment.pdf')