In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(font="Arial", font_scale=3, style='ticks')
%matplotlib inline

### Input File

In [None]:
ID, Length = list(), list()
with open("AK1_WGBS_PMD.bed", 'r') as pmdf:
    for line in pmdf:
        line = line.strip('\n').split('\t')
    
        id = f'{line[0]}:{line[1]}-{line[2]}'
        length = int(line[2]) - int(line[1])
    
        ID.append(id)
        Length.append(length)

pmd_from_ak1_wgbs = pd.DataFrame(list(zip(ID, Length)), columns=['ID', 'Length']).set_index('ID')
del ID, Length

fig, ax = plt.subplots(figsize=(8,8))
sns.histplot(data=pmd_from_ak1_wgbs, x='Length', kde=True, stat='count', ax=ax)
ax.set_xlim(pmd_from_ak1_wgbs['Length'].min(), pmd_from_ak1_wgbs['Length'].max())
ax.set_xlabel("Length of PMD (bp)")
sns.despine(ax=ax)


ak1 = pd.read_table("AK1_AK1_WGBS_PMD_met.txt", index_col=0)
ipsc = pd.read_table("iPSC_AK1_WGBS_PMD_met.txt", index_col=0)
npc = pd.read_table("NSC_AK1_WGBS_PMD_met.txt", index_col=0)

### Merging

In [None]:
merge = pd.concat([ak1, ipsc, npc], axis=1)

### Analysis

In [None]:
fig, ax = plt.subplots(figsize=(10,15))
heatmap = sns.heatmap(merge, vmin=0, vmax=100, xticklabels=True, yticklabels=False, cbar=True, cbar_kws={'label':'DNA methylation (%)'}, cmap='coolwarm', ax=ax)
heatmap.collections[0].colorbar.set_label(label='DNA methylation (%)', rotation=270, labelpad=30)
ax.set_ylabel('PMDs from AK1 WGBS (N=1,427)', labelpad=15)