In [None]:
import os
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.decomposition import PCA
from matplotlib.colors import LinearSegmentedColormap

#cmap = matplotlib.colors.LinearSegmentedColormap.from_list("", ["#104e8b", "#ffdab9", "#8b0a50"])
cmap = matplotlib.colors.LinearSegmentedColormap.from_list("", ["#FFFF00", "#000000", "#0066CC"])

sns.set_theme(font="Arial", font_scale=1.15, style='ticks')
plt.rc("axes.spines", top=False, right=False)
%matplotlib
%autoindent

hap_sample_palette = {'AK1_Hap1': '#CC0033', 
                      'AK1_Hap2': '#FF0033',
                      'AK1_Aggregate': '#FF0000', 
                      'AK1_WGBS': '#FF00FF',
                      'HG002_Hap1': '#999999',
                      'HG002_Hap2': '#CCCCCC',
                      'HG002_Aggregate': '#666666',
                      'iPSC_Hap1': '#0066CC', 
                      'iPSC_Hap2': '#0000CC', 
                      'iPSC_Aggregate': '#154360',
                      'NPC_Hap1': '#33CC00', 
                      'NPC_Hap2': '#66FF66', 
                      'NPC_Aggregate': '#229954',
                      'H1': '#40E0D0',
                      'HUES64': '#437299'}

### Haplotype DMR Methylation Difference

In [None]:
df_AK1_Hap = pd.read_table("DMRs_DSS_AK1_Hap_ALL.bed", header=None)
#df_AK1_Hap = df_AK1_Hap.copy()
df_AK1_Hap['mCG_HapDiff'] = df_AK1_Hap.iloc[:, 3].str.split('/').str[1].str.rstrip(')').astype(float) * 100

fig, ax = plt.subplots(figsize=(6,4), constrained_layout=True)
hist = sns.histplot(data=df_AK1_Hap, x='mCG_HapDiff', kde=True, bins=100, color='#664E00', stat='count', ax=ax)
ax.set_xlabel("Difference in DNA methylation between AK1 Haplotypes (%)")
ax.set_ylabel("The Number of AK1 Haplotype DMRs")
ax.axvline(65, color='r', linestyle='--')
ax.axvline(-65, color='b', linestyle='--')
ax.text(70, ax.get_ylim()[1] * 0.9, '65%', color='r')
ax.text(-85, ax.get_ylim()[1] * 0.9, '-65%', color='b')

plt.savefig('Histogram_AK1_Haplotype_Methylation_Difference.pdf')
plt.clf()

### Variants Inside Candidate AK1 ICR

In [None]:
var_candidateICR = pd.read_table("Variants_inside_CandidateICR.tab", index_col=0)
var_candidateICR['ICR_region'] = var_candidateICR.index.str.split(';').str[0]
var_candidateICR['Name'] = var_candidateICR.index.str.split(';').str[1]
var_candidateICR['PhasedHet'].describe()


PhasedHetCategories = [
    var_candidateICR['PhasedHet'] == 0,
    var_candidateICR['PhasedHet'] == 1,
    var_candidateICR['PhasedHet'] == 2,
    var_candidateICR['PhasedHet'] == 3,
    (var_candidateICR['PhasedHet'] > 3) & (var_candidateICR['PhasedHet'] < 20),
    var_candidateICR['PhasedHet'] >= 20
    ]
PhasedHetCategories_Names = ['0', '1', '2', '3', '4-19', '20+']

var_candidateICR['PhasedHetCategories'] = np.select(PhasedHetCategories, PhasedHetCategories_Names, default='Other')

PhasedHetCategories_percentage = var_candidateICR['PhasedHetCategories'].value_counts(normalize=True) * 100
PhasedHetCategories_percentage_ordered = PhasedHetCategories_percentage.reindex(PhasedHetCategories_Names)
cat_starts = PhasedHetCategories_percentage_ordered.cumsum() - PhasedHetCategories_percentage

sns.set_theme(font="Arial", font_scale=1.3, style='ticks')
plt.rc("axes.spines", top=False, right=False)

colors = LinearSegmentedColormap.from_list(
    'white_to_dark_red', ['#ffffff', '#ff0000']
)
color_range = colors(np.linspace(0,1, len(PhasedHetCategories_percentage_ordered)))[::-1]

fig, ax = plt.subplots(figsize=(9,1.5), constrained_layout=True)
for i, (cat, prop) in enumerate(PhasedHetCategories_percentage_ordered.iteritems()):
    ax.barh(y=0, width=prop, left=cat_starts[cat], color=color_range[i], edgecolor='black')

for i, (cat, prop) in enumerate(PhasedHetCategories_percentage_ordered.iteritems()):
    ax.text(x=cat_starts[cat] + prop / 2, y=0, s=str(cat), ha='center', va='center')
    
ax.set_yticks([])
ax.set_xlim(0,100.05)
ax.set_xlabel('Percentage of phased heterozygous variants in Candidate AK1-ICR')

plt.savefig("Percentage_of_phased_hetvars_in_candidateAK1ICR.pdf")
plt.clf()

sns.set_theme(font="Arial", font_scale=1.15, style='ticks')
plt.rc("axes.spines", top=False, right=False)

### mCG inside Known ICRs from Imprintome (w/HG002)

In [None]:
df_known = pd.read_table("Known_ICR_cleaned.bed", header=None)
df_known['ICR_region'] = df_known.iloc[:, 0] + ':' + df_known.iloc[:, 1].astype(str) + '-' + df_known.iloc[:, 2].astype(str)
df_known = df_known.set_index('ICR_region')
df_known = df_known[[3]]
df_known.columns = ['Name']

df_ak1_hap1 = pd.read_table("AK1_hap1_KnownICR.tab", index_col=0, header=None)
df_ak1_hap2 = pd.read_table("AK1_hap2_KnownICR.tab", index_col=0, header=None)
df_ak1 = pd.concat([df_ak1_hap1, df_ak1_hap2], axis=1)
df_ak1.index.name = 'ICR_region'
df_ak1.columns = ['AK1_Hap1', 'AK1_Hap2']

df_hg002_hap1 = pd.read_table("HG002_hap1_KnownICR.tab", index_col=0, header=None)
df_hg002_hap2 = pd.read_table("HG002_hap2_KnownICR.tab", index_col=0, header=None)
df_hg002 = pd.concat([df_hg002_hap1, df_hg002_hap2], axis=1)
df_hg002.index.name = 'ICR_region'
df_hg002.columns = ['HG002_Hap1', 'HG002_Hap2']

df_ipsc_hap1 = pd.read_table("iPSC_hap1_KnownICR.tab", index_col=0, header=None)
df_ipsc_hap2 = pd.read_table("iPSC_hap2_KnownICR.tab", index_col=0, header=None)
df_ipsc = pd.concat([df_ipsc_hap1, df_ipsc_hap2], axis=1)
df_ipsc.index.name = 'ICR_region'
df_ipsc.columns = ['iPSC_Hap1', 'iPSC_Hap2']

df_npc_hap1 = pd.read_table("NPC_hap1_KnownICR.tab", index_col=0, header=None)
df_npc_hap2 = pd.read_table("NPC_hap2_KnownICR.tab", index_col=0, header=None)
df_npc = pd.concat([df_npc_hap1, df_npc_hap2], axis=1)
df_npc.index.name = 'ICR_region'
df_npc.columns = ['NPC_Hap1', 'NPC_Hap2']

df_h1 = pd.read_table("H1_MAPQ10_HapMerged_KnownICR.tab", index_col=0, header=None)
df_h1.index.name = 'ICR_region'
df_h1.columns = ['H1']

df_hues64 = pd.read_table("HUES64_MAPQ10_HapMerged_KnownICR.tab", index_col=0, header=None)
df_hues64.index.name = 'ICR_region'
df_hues64.columns = ['HUES64']

df_known_met = pd.concat([df_known, df_ak1, df_hg002, df_ipsc, df_npc, df_h1, df_hues64], axis=1)
df_known_met = df_known_met.iloc[np.argsort(df_known_met['Name'].apply(lambda x: int(x.split('_')[-1])))]
df_known_met_hm = df_known_met.iloc[:, 1:]
df_known_met_hm['H1_HUES64_mean'] = df_known_met_hm[['H1', 'HUES64']].mean(axis=1)
df_known_met_hm = df_known_met_hm.sort_values(by='H1_HUES64_mean', ascending=False)
del df_known_met_hm['H1_HUES64_mean']

annot = df_known_met_hm.applymap(lambda f: f'{f: .1f}')

fig, ax = plt.subplots(figsize=(8.5, 5), squeeze=False, constrained_layout=True)
# For NA values
heatmap1 = sns.heatmap(
    np.where(df_known_met_hm.isna(), 0, np.nan),
    ax = ax[0, 0],
    cbar = False,
    yticklabels = True,
    annot = np.full_like(df_known_met_hm, "NA", dtype = object),
    fmt = "",
    annot_kws = {"size": 10, "va": "center_baseline", "color": "black"},
    cmap = ListedColormap(['grey']),
    linewidth = 0)
# For non-NA values
heatmap2 = sns.heatmap(
    df_known_met_hm,
    ax = ax[0, 0],
    xticklabels = True,
    yticklabels = True,
    annot = annot, 
    fmt = "",
    annot_kws = {"size": 10, "va": "center_baseline"},
    cmap = cmap,
    linewidth = 0.5, 
    linecolor = "black",
    vmin = 0,
    vmax = 100)
ax[0, 0].xaxis.tick_top()
ax[0, 0].set_xticklabels(['AK1\nHap1', 'AK1\nHap2', 'HG002\nHap1', 'HG002\nHap2', 'iPSC\nHap1', 'iPSC\nHap2', 'NPC\nHap1', 'NPC\nHap2', 'H1', 'HUES64'], rotation=0)
ax[0, 0].set_yticklabels(ax[0, 0].get_yticklabels(), fontsize=7)
ax[0, 0].yaxis.set_tick_params(width=0)
ax[0, 0].set_ylabel('Known ICR from Imprintome (N = 25)')
ax[0, 0].yaxis.set_tick_params(width=0, length=0)
ax[0, 0].xaxis.set_tick_params(width=0, length=0)
heatmap1.set_xlim(0, df_known_met_hm.shape[1] + 0.1)
heatmap1.set_ylim(df_known_met_hm.shape[0] + 0.1, 0)
heatmap2.collections[1].colorbar.set_label(label='DNA methylation (%)', rotation=270, labelpad=10)
plt.savefig('Heatmap_mCG_KnownICRs.png')



### mCG inside Known ICRs from Imprintome (wo/HG002)

In [None]:
df_known = pd.read_table("Known_ICR_cleaned.bed", header=None)
df_known['ICR_region'] = df_known.iloc[:, 0] + ':' + df_known.iloc[:, 1].astype(str) + '-' + df_known.iloc[:, 2].astype(str)
df_known = df_known.set_index('ICR_region')
df_known = df_known[[3]]
df_known.columns = ['Name']

df_known_met_hm = df_known_met.iloc[:, [1, 2, 5, 6, 7, 8, 9, 10]]
df_known_met_hm['H1_HUES64_mean'] = df_known_met_hm[['H1', 'HUES64']].mean(axis=1)
df_known_met_hm = df_known_met_hm.sort_values(by='H1_HUES64_mean', ascending=False)
del df_known_met_hm['H1_HUES64_mean']

annot = df_known_met_hm.applymap(lambda f: f'{f: .1f}')

fig, ax = plt.subplots(figsize=(6.5, 5), squeeze=False, constrained_layout=True)
# For NA values
heatmap1 = sns.heatmap(
    np.where(df_known_met_hm.isna(), 0, np.nan),
    ax = ax[0, 0],
    cbar = False,
    yticklabels = True,
    annot = np.full_like(df_known_met_hm, "NA", dtype = object),
    fmt = "",
    annot_kws = {"size": 10, "va": "center_baseline", "color": "black"},
    cmap = ListedColormap(['grey']),
    linewidth = 0)
# For non-NA values
heatmap2 = sns.heatmap(
    df_known_met_hm,
    ax = ax[0, 0],
    xticklabels = True,
    yticklabels = True,
    annot = annot, 
    fmt = "",
    annot_kws = {"size": 10, "va": "center_baseline"},
    cmap = cmap,
    linewidth = 0.5, 
    linecolor = "black",
    vmin = 0,
    vmax = 100)
ax[0, 0].xaxis.tick_top()
ax[0, 0].set_xticklabels(['AK1\nHap1', 'AK1\nHap2', 'iPSC\nHap1', 'iPSC\nHap2', 'NPC\nHap1', 'NPC\nHap2', 'H1', 'HUES64'], rotation=0)
ax[0, 0].set_yticklabels(ax[0, 0].get_yticklabels(), fontsize=7)
ax[0, 0].yaxis.set_tick_params(width=0)
ax[0, 0].set_ylabel('Known ICR from Imprintome (N = 25)')
ax[0, 0].yaxis.set_tick_params(width=0, length=0)
ax[0, 0].xaxis.set_tick_params(width=0, length=0)
heatmap1.set_xlim(0, df_known_met_hm.shape[1] + 0.1)
heatmap1.set_ylim(df_known_met_hm.shape[0] + 0.1, 0)
heatmap2.collections[1].colorbar.set_label(label='DNA methylation (%)', rotation=270, labelpad=10)
plt.savefig('Heatmap_mCG_KnownICRs_woHG002.png')



### mCG inside Putative ICRs from Imprintome (w/ HG002)

In [None]:
df_putative = pd.read_table("Putative_ICR_wochrX.bed", header=None)
df_putative['ICR_region'] = df_putative.iloc[:, 0] + ':' + df_putative.iloc[:, 1].astype(str) + '-' + df_putative.iloc[:, 2].astype(str)
df_putative = df_putative.set_index('ICR_region')
df_putative = df_putative[[3]]
df_putative.columns = ['Name']

df_ak1_hap1 = pd.read_table("AK1_hap1_PutativeICR.tab", index_col=0, header=None)
df_ak1_hap2 = pd.read_table("AK1_hap2_PutativeICR.tab", index_col=0, header=None)
df_ak1 = pd.concat([df_ak1_hap1, df_ak1_hap2], axis=1)
df_ak1.index.name = 'ICR_region'
df_ak1.columns = ['AK1_Hap1', 'AK1_Hap2']

df_hg002_hap1 = pd.read_table("HG002_hap1_PutativeICR.tab", index_col=0, header=None)
df_hg002_hap2 = pd.read_table("HG002_hap2_PutativeICR.tab", index_col=0, header=None)
df_hg002 = pd.concat([df_hg002_hap1, df_hg002_hap2], axis=1)
df_hg002.index.name = 'ICR_region'
df_hg002.columns = ['HG002_Hap1', 'HG002_Hap2']

df_ipsc_hap1 = pd.read_table("iPSC_hap1_PutativeICR.tab", index_col=0, header=None)
df_ipsc_hap2 = pd.read_table("iPSC_hap2_PutativeICR.tab", index_col=0, header=None)
df_ipsc = pd.concat([df_ipsc_hap1, df_ipsc_hap2], axis=1)
df_ipsc.index.name = 'ICR_region'
df_ipsc.columns = ['iPSC_Hap1', 'iPSC_Hap2']

df_npc_hap1 = pd.read_table("NPC_hap1_PutativeICR.tab", index_col=0, header=None)
df_npc_hap2 = pd.read_table("NPC_hap2_PutativeICR.tab", index_col=0, header=None)
df_npc = pd.concat([df_npc_hap1, df_npc_hap2], axis=1)
df_npc.index.name = 'ICR_region'
df_npc.columns = ['NPC_Hap1', 'NPC_Hap2']

df_h1 = pd.read_table("H1_MAPQ10_HapMerged_PutativeICR.tab", index_col=0, header=None)
df_h1.index.name = 'ICR_region'
df_h1.columns = ['H1']

df_hues64 = pd.read_table("HUES64_MAPQ10_HapMerged_PutativeICR.tab", index_col=0, header=None)
df_hues64.index.name = 'ICR_region'
df_hues64.columns = ['HUES64']

df_putative_met = pd.concat([df_putative, df_ak1, df_hg002, df_ipsc, df_npc, df_h1, df_hues64], axis=1)
df_putative_met = df_putative_met.iloc[np.argsort(df_putative_met['Name'].apply(lambda x: int(x.split('_')[-1])))]
df_putative_met_hm = df_putative_met.iloc[:, 1:]
df_putative_met_hm['H1_HUES64_mean'] = df_putative_met_hm[['H1', 'HUES64']].mean(axis=1)
df_putative_met_hm = df_putative_met_hm.sort_values(by='H1_HUES64_mean', ascending=False)
del df_putative_met_hm['H1_HUES64_mean']

annot = df_putative_met_hm.applymap(lambda f: f'{f: .1f}')

fig, ax = plt.subplots(figsize=(7.5, 5), squeeze=False, constrained_layout=True)
# For NA values
heatmap1 = sns.heatmap(
    np.where(df_putative_met_hm.isna(), 0, np.nan),
    ax = ax[0, 0],
    cbar = False,
    xticklabels = True,
    yticklabels = False,
    annot = None,
    fmt = "",
    cmap = ListedColormap(['grey']),
    linewidth = 0)
# For non-NA values
heatmap2 = sns.heatmap(
    df_putative_met_hm,
    ax = ax[0, 0],
    xticklabels = True,
    yticklabels = False,
    annot = None, 
    fmt = "",
    cmap = cmap,
    linewidth = 0.001, 
    linecolor = "black",
    vmin = 0,
    vmax = 100)
ax[0, 0].xaxis.tick_top()
ax[0, 0].set_xticklabels(['AK1\nHap1', 'AK1\nHap2', 'HG002\nHap1', 'HG002\nHap2', 'iPSC\nHap1', 'iPSC\nHap2', 'NPC\nHap1', 'NPC\nHap2', 'H1', 'HUES64'], rotation=0)
ax[0, 0].set_ylabel('Putative ICR from Imprintome (N = 1,390)')
heatmap2.collections[1].colorbar.set_label(label='DNA methylation (%)', rotation=270, labelpad=10)
plt.savefig('Heatmap_mCG_PutativeICRs.png')



### mCG inside Putative ICRs from Imprintome (wo/ HG002)

In [None]:
'''
df_putative_met_hm = df_putative_met_hm.iloc[:, [0, 1, 4, 5, 6, 7, 8, 9]]

fig, ax = plt.subplots(squeeze=False, constrained_layout=True)
# For NA values
heatmap1 = sns.heatmap(
    np.where(df_putative_met_hm.isna(), 0, np.nan),
    ax = ax[0, 0],
    cbar = False,
    xticklabels = True,
    yticklabels = False,
    annot = None,
    fmt = "",
    cmap = ListedColormap(['grey']),
    linewidth = 0)
# For non-NA values
heatmap2 = sns.heatmap(
    df_putative_met_hm,
    ax = ax[0, 0],
    xticklabels = True,
    yticklabels = False,
    annot = None, 
    fmt = "",
    cmap = "coolwarm",
    linewidth = 0.001, 
    linecolor = "black",
    vmin = 0,
    vmax = 100)
ax[0, 0].xaxis.tick_top()
ax[0, 0].set_ylabel('Putative ICR from Imprintome (N = 1,390)')
heatmap2.collections[1].colorbar.set_label(label='DNA methylation (%)', rotation=270, labelpad=10)
plt.savefig('Heatmap_mCG_PutativeICRs_woHG002.png')
'''


### Basic statistics inside Candidate ICRs

In [None]:
def minMax_normalization(series):
    return ( series - series.min() ) / ( series.max() - series.min() )


df_AK1_candidateICR = pd.read_table("Candidate_ICR_metcutoff65_from_AK1_Hap_sorted.bed", header=None)
df_AK1_candidateICR['CpG_Count'] = df_AK1_candidateICR.iloc[:, 3].str.split('(').str[1].str.split('/').str[0].astype(int)
df_AK1_candidateICR['ICR_region'] = df_AK1_candidateICR.iloc[:, 0] + ':' + df_AK1_candidateICR.iloc[:, 1].astype(str) + '-' + df_AK1_candidateICR.iloc[:, 2].astype(str)
df_AK1_candidateICR['Length'] = (df_AK1_candidateICR.iloc[:, 2] - df_AK1_candidateICR.iloc[:, 1])
df_AK1_candidateICR['Norm_CpG_Density'] = minMax_normalization((df_AK1_candidateICR['CpG_Count'] / df_AK1_candidateICR['Length']))
df_AK1_candidateICR['mCG_HapDiff'] = df_AK1_candidateICR.iloc[:, 3].str.split('/').str[1].str.rstrip(')').astype(float)
sns.scatterplot(data=df_AK1_candidateICR, x='Length', y='Norm_CpG_Density', alpha=0.3)


df_AK1_candidateICR = df_AK1_candidateICR.set_index('ICR_region')
df_AK1_candidateICR = df_AK1_candidateICR[[3]]
df_AK1_candidateICR.columns = ['Name']




### Variants inside Candidate ICR in AK1

### mCG inside AK1 candidate ICRs (wo/ HG002)

In [None]:
df_AK1_candidateICR = pd.read_table("Candidate_ICR_metcutoff65_from_AK1_Hap_sorted.bed", header=None)
df_AK1_candidateICR['ICR_region'] = df_AK1_candidateICR.iloc[:, 0] + ':' + df_AK1_candidateICR.iloc[:, 1].astype(str) + '-' + df_AK1_candidateICR.iloc[:, 2].astype(str)
df_AK1_candidateICR = df_AK1_candidateICR.set_index('ICR_region')
df_AK1_candidateICR = df_AK1_candidateICR[[3]]
df_AK1_candidateICR.columns = ['Name']

df_ak1ICR_hap1 = pd.read_table("AK1_hap1_AK1_candidate_ICR.tab", index_col=0, header=None)
df_ak1ICR_hap2 = pd.read_table("AK1_hap2_AK1_candidate_ICR.tab", index_col=0, header=None)
df_ak1ICR = pd.concat([df_ak1ICR_hap1, df_ak1ICR_hap2], axis=1)
df_ak1ICR.index.name = 'ICR_region'
df_ak1ICR.columns = ['AK1_Hap1', 'AK1_Hap2']

df_hg002ICR_hap1 = pd.read_table("HG002_hap1_AK1_candidate_ICR.tab", index_col=0, header=None)
df_hg002ICR_hap2 = pd.read_table("HG002_hap2_AK1_candidate_ICR.tab", index_col=0, header=None)
df_hg002ICR = pd.concat([df_hg002ICR_hap1, df_hg002ICR_hap2], axis=1)
df_hg002ICR.index.name = 'ICR_region'
df_hg002ICR.columns = ['HG002_Hap1', 'HG002_Hap2']

df_ipscICR_hap1 = pd.read_table("iPSC_hap1_AK1_candidate_ICR.tab", index_col=0, header=None)
df_ipscICR_hap2 = pd.read_table("iPSC_hap2_AK1_candidate_ICR.tab", index_col=0, header=None)
df_ipscICR = pd.concat([df_ipscICR_hap1, df_ipscICR_hap2], axis=1)
df_ipscICR.index.name = 'ICR_region'
df_ipscICR.columns = ['iPSC_Hap1', 'iPSC_Hap2']

df_npcICR_hap1 = pd.read_table("NPC_hap1_AK1_candidate_ICR.tab", index_col=0, header=None)
df_npcICR_hap2 = pd.read_table("NPC_hap2_AK1_candidate_ICR.tab", index_col=0, header=None)
df_npcICR = pd.concat([df_npcICR_hap1, df_npcICR_hap2], axis=1)
df_npcICR.index.name = 'ICR_region'
df_npcICR.columns = ['NPC_Hap1', 'NPC_Hap2']

df_h1ICR = pd.read_table("H1_MAPQ10_HapMerged_AK1_candidate_ICR.tab", index_col=0, header=None)
df_h1ICR.index.name = 'ICR_region'
df_h1ICR.columns = ['H1']

df_hues64ICR = pd.read_table("HUES64_MAPQ10_HapMerged_AK1_candidate_ICR.tab", index_col=0, header=None)
df_hues64ICR.index.name = 'ICR_region'
df_hues64ICR.columns = ['HUES64']

# AK1 ICR length distribution
df_AK1_candidateICR_length = df_AK1_candidateICR.copy()
df_AK1_candidateICR_length['ICR_length'] = df_AK1_candidateICR_length.index.to_series().str.split(':').str[-1].str.split('-').apply(lambda x: int(x[1]) - int(x[0]))

fig, ax = plt.subplots(figsize=(6,4), constrained_layout=True)
hist = sns.histplot(data=df_AK1_candidateICR_length, x='ICR_length', kde=True, stat='count', ax=ax)
ax.set_xlabel("Length of AK1 Candidate ICR")
ax.set_ylabel("The Number of AK1 Candidate ICR")
ax.get_xaxis().set_major_formatter(matplotlib.ticker.FuncFormatter(lambda x, p: format(int(x), ',')))
ax.axvline(df_AK1_candidateICR_length['ICR_length'].mean(), color='r', linestyle='--')
ax.text(df_AK1_candidateICR_length['ICR_length'].mean() + 100, ax.get_ylim()[1] * 0.9, f'Average length: {int(df_AK1_candidateICR_length["ICR_length"].mean())} bp', color='r')

plt.savefig('AK1_CandidateICR_Length_Distribution.pdf')

# Heatmaps
df_AK1_candidateICR_met = pd.concat([df_AK1_candidateICR, df_ak1ICR, df_ipscICR, df_npcICR, df_h1ICR, df_hues64ICR], axis=1)
df_AK1_candidateICR_met_hm = df_AK1_candidateICR_met.iloc[:, 1:]
df_AK1_candidateICR_met_hm = df_AK1_candidateICR_met_hm.sort_values(by='AK1_Hap1', ascending=False)
df_AK1_candidateICR_met_hm = df_AK1_candidateICR_met_hm.dropna(axis=0, subset=['AK1_Hap1'])

annot = df_AK1_candidateICR_met_hm.applymap(lambda f: f'{f: .1f}')

fig, ax = plt.subplots(figsize=(11,8), squeeze=False, constrained_layout=True)
# For NA values
heatmap1 = sns.heatmap(
    np.where(df_AK1_candidateICR_met_hm.isna(), 0, np.nan),
    ax = ax[0, 0],
    cbar = False,
    xticklabels = True,
    yticklabels = False,
    annot = None,
    fmt = "",
    cmap = ListedColormap(['grey']),
    linewidth = 0,
    rasterized = True)
# For non-NA values
heatmap2 = sns.heatmap(
    df_AK1_candidateICR_met_hm,
    ax = ax[0, 0],
    xticklabels = True,
    yticklabels = False,
    annot = None, 
    fmt = "",
    cmap = cmap,
    linewidth = 0.001, 
    linecolor = "black",
    vmin = 0,
    vmax = 100,
    rasterized = True)
ax[0, 0].xaxis.tick_top()
ax[0, 0].set_xticklabels(['AK1\nHap1', 'AK1\nHap2', 'iPSC\nHap1', 'iPSC\nHap2', 'NPC\nHap1', 'NPC\nHap2', 'H1', 'HUES64'], rotation=0)
ax[0, 0].set_ylabel('Candidate ICRs in AK1 (N = 842)')
heatmap2.collections[1].colorbar.set_label(label='DNA methylation (%)', rotation=270, labelpad=10)
plt.savefig('Heatmap_mCG_AK1candidateICRs_woHG002.pdf')

### PCA plot of mCG inside AK1 candidate ICRs (wo/ HG002)

In [None]:
df_AK1_candidateICR_met_pca = df_AK1_candidateICR_met_hm.dropna(axis=0, how='any')

df_AK1_candidateICR_met_pca_normalized = (df_AK1_candidateICR_met_pca - df_AK1_candidateICR_met_pca.mean()) / df_AK1_candidateICR_met_pca.std()
pca = PCA(n_components=2)
pca.fit(df_AK1_candidateICR_met_pca_normalized.T)
transformed_data = pca.transform(df_AK1_candidateICR_met_pca_normalized.T)

sns.set(font="Arial", font_scale=0.9, style='ticks')
plt.rc("axes.spines", top=False, right=False)
labels = ['AK1\nHap1', 'AK1\nHap2', 'iPSC\nHap1', 'iPSC\nHap2', 'NPC\nHap1', 'NPC\nHap2', 'H1', 'HUES64']
fig, ax = plt.subplots(figsize=(7, 7), constrained_layout=True)
for i in range(len(transformed_data)):
    ax.scatter(transformed_data[i][0], transformed_data[i][1],
               color = hap_sample_palette[df_AK1_candidateICR_met_pca.columns[i]],
               s = 200)
    ax.annotate(labels[i],
                (transformed_data[i][0], transformed_data[i][1]), 
                xytext = (5,5), 
                textcoords = 'offset points')
'''
    ax.annotate(df_ak1novel_met_pca.columns[i], 
                (transformed_data[i][0], transformed_data[i][1]), 
                xytext = (5,5), 
                textcoords = 'offset points')
'''

ax.set_xlabel(f'PC1 ({pca.explained_variance_ratio_[0]*100:.2f}%)')
ax.set_ylabel(f'PC2 ({pca.explained_variance_ratio_[1]*100:.2f}%)')
sns.set(font="Arial", font_scale=1.15, style='ticks') # Back to as it was
plt.savefig('PCA_mCG_AK1candidateICRs_woHG002.pdf')

### PCA plot of mCG inside AK1 candidate ICRs - HiFi haps, HiFi combined and WGBS

In [None]:
df_ak1combICR = pd.read_table("AK1_combinedmet.tab", index_col=0, header=None)
df_ak1combICR.index.name = 'ICR_region'
df_ak1combICR.columns = ['AK1_Aggregate']

df_ipsccombICR = pd.read_table("iPSC_combinedmet.tab", index_col=0, header=None)
df_ipsccombICR.index.name = 'ICR_region'
df_ipsccombICR.columns = ['iPSC_Aggregate']

df_npccombICR = pd.read_table("NPC_combinedmet.tab", index_col=0, header=None)
df_npccombICR.index.name = 'ICR_region'
df_npccombICR.columns = ['NPC_Aggregate']

df_ak1wgbsICR = pd.read_table("AK1_WGBS_MAPQ10_HapMerged_AK1_candidate_ICR.tab", index_col=0, header=None)
df_ak1wgbsICR.index.name = 'ICR_region'
df_ak1wgbsICR.columns = ['AK1_WGBS']

df_hg002combICR = pd.read_table("HG002_combinedmet.tab", index_col=0, header=None)
df_hg002combICR.index.name = 'ICR_region'
df_hg002combICR.columns = ['HG002_Aggregate']


'''
df_AK1_w_combined_candidateICR_met = pd.concat([df_AK1_candidateICR, df_ak1ICR, df_ipscICR, df_npcICR, df_h1ICR, df_hues64ICR, df_ak1combICR, df_ipsccombICR, df_npccombICR, df_ak1wgbsICR, df_hg002ICR, df_hg002combICR], axis=1)
df_AK1_w_combined_candidateICR_met_hm = df_AK1_w_combined_candidateICR_met.iloc[:, 1:]

df_AK1_w_combined_candidateICR_met_pca = df_AK1_w_combined_candidateICR_met_hm.dropna(axis=0, how='any')
df_AK1_w_combined_candidateICR_met_pca_normalized = (df_AK1_w_combined_candidateICR_met_pca - df_AK1_w_combined_candidateICR_met_pca.mean()) / df_AK1_w_combined_candidateICR_met_pca.std()
pca = PCA(n_components=3)
pca.fit(df_AK1_w_combined_candidateICR_met_pca_normalized.T)
transformed_data = pca.transform(df_AK1_w_combined_candidateICR_met_pca_normalized.T)

sns.set_theme(font="Arial", font_scale=1.2, style='ticks')
plt.rc("axes.spines", top=False, right=False)
labels = ['AK1\nHap1', 'AK1\nHap2', 'iPSC\nHap1', 'iPSC\nHap2', 'NPC\nHap1', 'NPC\nHap2', 'H1', 'HUES64', 'AK1\nAggregate', 'iPSC\nAggregate', 'NPC\nAggregate', 'AK1\nWGBS', 'HG002\nHap1', 'HG002\nHap2', 'HG002\nAggregate']
fig, ax = plt.subplots(figsize=(7, 7), constrained_layout=True)
for i in range(len(transformed_data)):
    ax.scatter(transformed_data[i][0], transformed_data[i][1],
               color = hap_sample_palette[df_AK1_w_combined_candidateICR_met_pca.columns[i]],
               s = 200)
    ax.annotate(labels[i],
                (transformed_data[i][0], transformed_data[i][1]), 
                xytext = (5,5), 
                textcoords = 'offset points')
"""
    ax.annotate(df_ak1novel_met_pca.columns[i], 
                (transformed_data[i][0], transformed_data[i][1]), 
                xytext = (5,5), 
                textcoords = 'offset points')
"""
ax.set_xlabel(f'PC1 ({pca.explained_variance_ratio_[0]*100:.2f}%)')
ax.set_ylabel(f'PC2 ({pca.explained_variance_ratio_[1]*100:.2f}%)')
sns.set_theme(font="Arial", font_scale=1.15, style='ticks') # Back to as it was
plt.savefig('PCA_mCG_AK1candidateICRs_woHG002_wAK1WGBS.pdf')

'''
df_AK1_w_combined_candidateICR_met = pd.concat([df_AK1_candidateICR, df_ak1ICR, df_ipscICR, df_npcICR, df_h1ICR, df_hues64ICR, df_ak1wgbsICR], axis=1)
df_AK1_w_combined_candidateICR_met_hm = df_AK1_w_combined_candidateICR_met.iloc[:, 1:]

df_AK1_w_combined_candidateICR_met_pca = df_AK1_w_combined_candidateICR_met_hm.dropna(axis=0, how='any')
df_AK1_w_combined_candidateICR_met_pca_normalized = (df_AK1_w_combined_candidateICR_met_pca - df_AK1_w_combined_candidateICR_met_pca.mean()) / df_AK1_w_combined_candidateICR_met_pca.std()
pca = PCA(n_components=2)
pca.fit(df_AK1_w_combined_candidateICR_met_pca_normalized.T)
transformed_data = pca.transform(df_AK1_w_combined_candidateICR_met_pca_normalized.T)

sns.set_theme(font="Arial", font_scale=1.2, style='ticks')
plt.rc("axes.spines", top=False, right=False)
#labels = ['AK1\nHap1', 'AK1\nHap2', 'iPSC\nHap1', 'iPSC\nHap2', 'NPC\nHap1', 'NPC\nHap2', 'H1', 'HUES64', 'AK1\nAggregate', 'iPSC\nAggregate', 'NPC\nAggregate', 'AK1\nWGBS']
labels = ['AK1\nHap1', 'AK1\nHap2', 'iPSC\nHap1', 'iPSC\nHap2', 'NPC\nHap1', 'NPC\nHap2', 'H1', 'HUES64', 'AK1\nWGBS']
fig, ax = plt.subplots(figsize=(7, 7), constrained_layout=True)
for i in range(len(transformed_data)):
    ax.scatter(transformed_data[i][0], transformed_data[i][1],
               color = hap_sample_palette[df_AK1_w_combined_candidateICR_met_pca.columns[i]],
               s = 200)
    ax.annotate(labels[i],
                (transformed_data[i][0], transformed_data[i][1]), 
                xytext = (5,5), 
                textcoords = 'offset points')
'''
    ax.annotate(df_ak1novel_met_pca.columns[i], 
                (transformed_data[i][0], transformed_data[i][1]), 
                xytext = (5,5), 
                textcoords = 'offset points')
'''

ax.set_xlabel(f'PC1 ({pca.explained_variance_ratio_[0]*100:.2f}%)')
ax.set_ylabel(f'PC2 ({pca.explained_variance_ratio_[1]*100:.2f}%)')
sns.set_theme(font="Arial", font_scale=1.15, style='ticks') # Back to as it was
plt.savefig('PCA_mCG_AK1candidateICRs_woHG002_wAK1WGBS.pdf')

### mCG inside novel AK1 candidate ICRs (w/ HG002)

In [None]:
# /mnt/mone/Project/AK1_PacBio/01.DNA/Analysis_Samples_Merged/DNA_Methylation_Analysis/DMR/Haplotype_DSS/Imprinting/Gene_Association/mCG_Candidate_novelAK1_ICR
df_candidate_novelICR = pd.read_table("Candidate_ICR_AK1_Gene_Association_NOVEL.bed", header=None)
df_candidate_novelICR['ICR_region'] = df_candidate_novelICR.iloc[:, 0] + ':' + df_candidate_novelICR.iloc[:, 1].astype(str) + '-' + df_candidate_novelICR.iloc[:, 2].astype(str)
df_candidate_novelICR = df_candidate_novelICR.set_index('ICR_region')
df_candidate_novelICR = df_candidate_novelICR[[3]]
df_candidate_novelICR.columns = ['Name']

df_ak1_hap1 = pd.read_table("AK1_hap1_AK1ICR.tab", index_col=0, header=None)
df_ak1_hap2 = pd.read_table("AK1_hap2_AK1ICR.tab", index_col=0, header=None)
df_ak1 = pd.concat([df_ak1_hap1, df_ak1_hap2], axis=1)
df_ak1.index.name = 'ICR_region'
df_ak1.columns = ['AK1_Hap1', 'AK1_Hap2']

df_hg002_hap1 = pd.read_table("HG002_hap1_AK1ICR.tab", index_col=0, header=None)
df_hg002_hap2 = pd.read_table("HG002_hap2_AK1ICR.tab", index_col=0, header=None)
df_hg002 = pd.concat([df_hg002_hap1, df_hg002_hap2], axis=1)
df_hg002.index.name = 'ICR_region'
df_hg002.columns = ['HG002_Hap1', 'HG002_Hap2']

df_ipsc_hap1 = pd.read_table("iPSC_hap1_AK1ICR.tab", index_col=0, header=None)
df_ipsc_hap2 = pd.read_table("iPSC_hap2_AK1ICR.tab", index_col=0, header=None)
df_ipsc = pd.concat([df_ipsc_hap1, df_ipsc_hap2], axis=1)
df_ipsc.index.name = 'ICR_region'
df_ipsc.columns = ['iPSC_Hap1', 'iPSC_Hap2']

df_npc_hap1 = pd.read_table("NPC_hap1_AK1ICR.tab", index_col=0, header=None)
df_npc_hap2 = pd.read_table("NPC_hap2_AK1ICR.tab", index_col=0, header=None)
df_npc = pd.concat([df_npc_hap1, df_npc_hap2], axis=1)
df_npc.index.name = 'ICR_region'
df_npc.columns = ['NPC_Hap1', 'NPC_Hap2']

df_h1 = pd.read_table("H1_HapMerged_AK1ICR.tab", index_col=0, header=None)
df_h1.index.name = 'ICR_region'
df_h1.columns = ['H1']

df_hues64 = pd.read_table("HUES64_HapMerged_AK1ICR.tab", index_col=0, header=None)
df_hues64.index.name = 'ICR_region'
df_hues64.columns = ['HUES64']

'''
df_ak1novel_met = pd.concat([df_candidate_novelICR, df_ak1, df_hg002, df_ipsc, df_npc, df_h1, df_hues64], axis=1)
df_ak1novel_met_hm = df_ak1novel_met.iloc[:, 1:]
df_ak1novel_met_hm['H1_HUES64_mean'] = df_ak1novel_met_hm[['H1', 'HUES64']].mean(axis=1)
df_ak1novel_met_hm = df_ak1novel_met_hm.sort_values(by='H1_HUES64_mean', ascending=False)
del df_ak1novel_met_hm['H1_HUES64_mean']

annot = df_ak1novel_met_hm.applymap(lambda f: f'{f: .1f}')

fig, ax = plt.subplots(squeeze=False, constrained_layout=True)
# For NA values
heatmap1 = sns.heatmap(
    np.where(df_ak1novel_met_hm.isna(), 0, np.nan),
    ax = ax[0, 0],
    cbar = False,
    xticklabels = True,
    yticklabels = False,
    annot = None,
    fmt = "",
    cmap = ListedColormap(['grey']),
    linewidth = 0)
# For non-NA values
heatmap2 = sns.heatmap(
    df_ak1novel_met_hm,
    ax = ax[0, 0],
    xticklabels = True,
    yticklabels = False,
    annot = None, 
    fmt = "",
    cmap = "coolwarm",
    linewidth = 0.001, 
    linecolor = "black",
    vmin = 0,
    vmax = 100)
ax[0, 0].xaxis.tick_top()
ax[0, 0].set_xticklabels(ax[0, 0].get_xticklabels(), rotation=45)
ax[0, 0].set_ylabel('Novel ICRs in AK1 (N = 609)')
heatmap2.collections[1].colorbar.set_label(label='DNA methylation (%)', rotation=270, labelpad=10)
plt.savefig('Heatmap_mCG_AK1novelICRs_1.png')
'''

df_ak1novel_met = pd.concat([df_candidate_novelICR, df_ak1, df_hg002, df_ipsc, df_npc, df_h1, df_hues64], axis=1)
df_ak1novel_met_hm = df_ak1novel_met.iloc[:, 1:]
df_ak1novel_met_hm = df_ak1novel_met_hm.sort_values(by='AK1_Hap1', ascending=False)
df_ak1novel_met_hm = df_ak1novel_met_hm.dropna(axis=0, subset=['AK1_Hap1'])
annot = df_ak1novel_met_hm.applymap(lambda f: f'{f: .1f}')

fig, ax = plt.subplots(figsize=(7.5, 5), squeeze=False, constrained_layout=True)
# For NA values
heatmap1 = sns.heatmap(
    np.where(df_ak1novel_met_hm.isna(), 0, np.nan),
    ax = ax[0, 0],
    cbar = False,
    xticklabels = True,
    yticklabels = False,
    annot = None,
    fmt = "",
    cmap = ListedColormap(['grey']),
    linewidth = 0)
# For non-NA values
heatmap2 = sns.heatmap(
    df_ak1novel_met_hm,
    ax = ax[0, 0],
    xticklabels = True,
    yticklabels = False,
    annot = None, 
    fmt = "",
    cmap = cmap,
    linewidth = 0.001, 
    linecolor = "black",
    vmin = 0,
    vmax = 100)
ax[0, 0].xaxis.tick_top()
ax[0, 0].set_xticklabels(['AK1\nHap1', 'AK1\nHap2', 'HG002\nHap1', 'HG002\nHap2', 'iPSC\nHap1', 'iPSC\nHap2', 'NPC\nHap1', 'NPC\nHap2', 'H1', 'HUES64'], rotation=0)
ax[0, 0].set_ylabel('Novel ICRs in AK1 (N = 606)')
heatmap2.collections[1].colorbar.set_label(label='DNA methylation (%)', rotation=270, labelpad=10)
plt.savefig('Heatmap_mCG_AK1novelICRs.png')



### mCG inside novel AK1 candidate ICRs (wo/ HG002)

In [None]:
df_ak1novel_met = pd.concat([df_candidate_novelICR, df_ak1, df_hg002, df_ipsc, df_npc, df_h1, df_hues64], axis=1)
df_ak1novel_met_hm = df_ak1novel_met.iloc[:, 1:]
df_ak1novel_met_hm = df_ak1novel_met_hm.sort_values(by='AK1_Hap1', ascending=False)
df_ak1novel_met_hm = df_ak1novel_met_hm.dropna(axis=0, subset=['AK1_Hap1'])
df_ak1novel_met_hm = df_ak1novel_met_hm.iloc[:, [0, 1, 4, 5, 6, 7, 8, 9]]

annot = df_ak1novel_met_hm.applymap(lambda f: f'{f: .1f}')

fig, ax = plt.subplots(squeeze=False, constrained_layout=True)
# For NA values
heatmap1 = sns.heatmap(
    np.where(df_ak1novel_met_hm.isna(), 0, np.nan),
    ax = ax[0, 0],
    cbar = False,
    xticklabels = True,
    yticklabels = False,
    annot = None,
    fmt = "",
    cmap = ListedColormap(['grey']),
    linewidth = 0)
# For non-NA values
heatmap2 = sns.heatmap(
    df_ak1novel_met_hm,
    ax = ax[0, 0],
    xticklabels = True,
    yticklabels = False,
    annot = None, 
    fmt = "",
    cmap = cmap,
    linewidth = 0.001, 
    linecolor = "black",
    vmin = 0,
    vmax = 100)
ax[0, 0].xaxis.tick_top()
ax[0, 0].set_xticklabels(ax[0, 0].get_xticklabels(), rotation=45)
ax[0, 0].set_ylabel('Novel ICRs in AK1 (N = 606)')
heatmap2.collections[1].colorbar.set_label(label='DNA methylation (%)', rotation=270, labelpad=10)
plt.savefig('Heatmap_mCG_AK1novelICRs_2_woHG002.png')

### PCA

In [None]:
# remove HG002 and dropna
df_ak1novel_met_hm = df_ak1novel_met_hm.iloc[:, [0, 1, 4, 5, 6, 7, 8, 9]]
df_ak1novel_met_pca = df_ak1novel_met_hm.dropna(axis=0, how='any')

df_ak1novel_met_pca_normalized = (df_ak1novel_met_pca - df_ak1novel_met_pca.mean()) / df_ak1novel_met_pca.std()
pca = PCA(n_components=2)
pca.fit(df_ak1novel_met_pca_normalized.T)
transformed_data = pca.transform(df_ak1novel_met_pca_normalized.T)

sns.set(font="Arial", font_scale=0.9, style='ticks')
plt.rc("axes.spines", top=False, right=False)
labels = ['AK1\nHap1', 'AK1\nHap2', 'iPSC\nHap1', 'iPSC\nHap2', 'NPC\nHap1', 'NPC\nHap2', 'H1', 'HUES64']
fig, ax = plt.subplots(figsize=(7, 7), constrained_layout=True)
for i in range(len(transformed_data)):
    ax.scatter(transformed_data[i][0], transformed_data[i][1],
               color = hap_sample_palette[df_ak1novel_met_pca.columns[i]],
               s = 200)
    ax.annotate(labels[i],
                (transformed_data[i][0], transformed_data[i][1]), 
                xytext = (5,5), 
                textcoords = 'offset points')
'''
    ax.annotate(df_ak1novel_met_pca.columns[i], 
                (transformed_data[i][0], transformed_data[i][1]), 
                xytext = (5,5), 
                textcoords = 'offset points')
'''

ax.set_xlabel(f'PC1 ({pca.explained_variance_ratio_[0]*100:.2f}%)')
ax.set_ylabel(f'PC2 ({pca.explained_variance_ratio_[1]*100:.2f}%)')
sns.set(font="Arial", font_scale=1.15, style='ticks') # Back to as it was
plt.savefig('PCA_mCG_AK1novelICRs_woHG002.png')

### AK1-ICR retained in iPSC

In [None]:
df_ICR_retained = pd.read_table("iPSC-HapDMR_AK1-ICR_intersect.bed", header=None)
df_ICR_retained['ICR_retained'] = df_ICR_retained.iloc[:, 0] + ':' + df_ICR_retained.iloc[:, 1].astype(str) + '-' + df_ICR_retained.iloc[:, 2].astype(str)
df_ICR_retained = df_ICR_retained.set_index('ICR_retained')
df_ICR_retained = df_ICR_retained[[3]]
df_ICR_retained.columns = ['Name']

df_ak1_hap1 = pd.read_table("AK1_hap1_iPSC-HapDMR_AK1-ICR.tab", index_col=0, header=None)
df_ak1_hap2 = pd.read_table("AK1_hap2_iPSC-HapDMR_AK1-ICR.tab", index_col=0, header=None)
df_ak1 = pd.concat([df_ak1_hap1, df_ak1_hap2], axis=1)
df_ak1.index.name = 'ICR_retained'
df_ak1.columns = ['AK1_Hap1', 'AK1_Hap2']

df_hg002_hap1 = pd.read_table("HG002_hap1_iPSC-HapDMR_AK1-ICR.tab", index_col=0, header=None)
df_hg002_hap2 = pd.read_table("HG002_hap2_iPSC-HapDMR_AK1-ICR.tab", index_col=0, header=None)
df_hg002 = pd.concat([df_hg002_hap1, df_hg002_hap2], axis=1)
df_hg002.index.name = 'ICR_retained'
df_hg002.columns = ['HG002_Hap1', 'HG002_Hap2']

df_ipsc_hap1 = pd.read_table("iPSC_hap1_iPSC-HapDMR_AK1-ICR.tab", index_col=0, header=None)
df_ipsc_hap2 = pd.read_table("iPSC_hap2_iPSC-HapDMR_AK1-ICR.tab", index_col=0, header=None)
df_ipsc = pd.concat([df_ipsc_hap1, df_ipsc_hap2], axis=1)
df_ipsc.index.name = 'ICR_retained'
df_ipsc.columns = ['iPSC_Hap1', 'iPSC_Hap2']

df_npc_hap1 = pd.read_table("NPC_hap1_iPSC-HapDMR_AK1-ICR.tab", index_col=0, header=None)
df_npc_hap2 = pd.read_table("NPC_hap2_iPSC-HapDMR_AK1-ICR.tab", index_col=0, header=None)
df_npc = pd.concat([df_npc_hap1, df_npc_hap2], axis=1)
df_npc.index.name = 'ICR_retained'
df_npc.columns = ['NPC_Hap1', 'NPC_Hap2']

df_h1 = pd.read_table("H1_MAPQ10_HapMerged_iPSC-HapDMR_AK1-ICR.tab", index_col=0, header=None)
df_h1.index.name = 'ICR_retained'
df_h1.columns = ['H1']

df_hues64 = pd.read_table("HUES64_MAPQ10_HapMerged_iPSC-HapDMR_AK1-ICR.tab", index_col=0, header=None)
df_hues64.index.name = 'ICR_retained'
df_hues64.columns = ['HUES64']


df_ICR_retained_met = pd.concat([df_ICR_retained, df_ak1, df_ipsc, df_npc, df_h1, df_hues64], axis=1)
df_ICR_retained_met_hm = df_ICR_retained_met.iloc[:, 1:]
df_ICR_retained_met_hm = df_ICR_retained_met_hm.sort_values(by='AK1_Hap1', ascending=False)
df_ICR_retained_met_hm = df_ICR_retained_met_hm.dropna(axis=0, subset=['AK1_Hap1'])

df_ICR_retained_met_hm_1 = df_ICR_retained_met_hm.query('(35 <= H1 <= 65) or (35 <= HUES64 <= 65)')

annot = df_ICR_retained_met_hm_1.applymap(lambda f: f'{f: .1f}')

fig, ax = plt.subplots(figsize=(11, 8), squeeze=False, constrained_layout=True)
# For NA values
heatmap1 = sns.heatmap(
    np.where(df_ICR_retained_met_hm_1.isna(), 0, np.nan),
    ax = ax[0, 0],
    cbar = False,
    xticklabels = True,
    yticklabels = False,
    annot = None,
    fmt = "",
    cmap = ListedColormap(['grey']),
    linewidth = 0, 
    rasterized = True)
# For non-NA values
heatmap2 = sns.heatmap(
    df_ICR_retained_met_hm_1,
    ax = ax[0, 0],
    xticklabels = True,
    yticklabels = False,
    annot = None, 
    fmt = "",
    cmap = cmap,
    linewidth = 0.001, 
    linecolor = "black",
    vmin = 0,
    vmax = 100,
    rasterized = True)
ax[0, 0].xaxis.tick_top()
ax[0, 0].set_xticklabels(['AK1\nHap1', 'AK1\nHap2', 'iPSC\nHap1', 'iPSC\nHap2', 'NPC\nHap1', 'NPC\nHap2', 'H1', 'HUES64'], rotation=0)
ax[0, 0].set_ylabel('Candidate AK1 ICR retained in iPSC\nthat shows intermediate level of mCG in either one of ESCs (N = 23)')
heatmap2.collections[1].colorbar.set_label(label='DNA methylation (%)', rotation=270, labelpad=10)
plt.savefig('Candidate-AK1-ICR_retained_in_iPSC_but_intermediate_in_ESC_woHG002.pdf')


df_ICR_retained_met_hm_2 = df_ICR_retained_met_hm[~df_ICR_retained_met_hm.index.isin(df_ICR_retained_met_hm_1.index)]
df_ICR_retained_met_hm_2 = df_ICR_retained_met_hm_2.dropna(subset=['H1', 'HUES64'], how='all')

annot = df_ICR_retained_met_hm_2.applymap(lambda f: f'{f: .1f}')

fig, ax = plt.subplots(figsize=(11, 8), squeeze=False, constrained_layout=True)
# For NA values
heatmap1 = sns.heatmap(
    np.where(df_ICR_retained_met_hm_2.isna(), 0, np.nan),
    ax = ax[0, 0],
    cbar = False,
    xticklabels = True,
    yticklabels = False,
    annot = None,
    fmt = "",
    cmap = ListedColormap(['grey']),
    linewidth = 0, 
    rasterized = True)
# For non-NA values
heatmap2 = sns.heatmap(
    df_ICR_retained_met_hm_2,
    ax = ax[0, 0],
    xticklabels = True,
    yticklabels = False,
    annot = None, 
    fmt = "",
    cmap = cmap,
    linewidth = 0.001, 
    linecolor = "black",
    vmin = 0,
    vmax = 100,
    rasterized = True)
ax[0, 0].xaxis.tick_top()
ax[0, 0].set_xticklabels(['AK1\nHap1', 'AK1\nHap2', 'iPSC\nHap1', 'iPSC\nHap2', 'NPC\nHap1', 'NPC\nHap2', 'H1', 'HUES64'], rotation=0)
ax[0, 0].set_ylabel('Candidate AK1 ICR retained in iPSC\n (Somatic Memory, N = 41)')
heatmap2.collections[1].colorbar.set_label(label='DNA methylation (%)', rotation=270, labelpad=10)
plt.savefig('Candidate-AK1-ICR_retained_in_iPSC_Somatic_Memory.pdf')


### AK1-ICR restored in iPSC (but could be aberrant)

In [None]:
df_ICR_restored = pd.read_table("AK1-ICR_notiPSC-DMR_intersect.bed", header=None)
df_ICR_restored['ICR_restored'] = df_ICR_restored.iloc[:, 0] + ':' + df_ICR_restored.iloc[:, 1].astype(str) + '-' + df_ICR_restored.iloc[:, 2].astype(str)
df_ICR_restored = df_ICR_restored.set_index('ICR_restored')
df_ICR_restored = df_ICR_restored[[3]]
df_ICR_restored.columns = ['Name']

df_ak1_hap1 = pd.read_table("AK1_hap1_AK1-ICR_notiPSC-DMR.tab", index_col=0, header=None)
df_ak1_hap2 = pd.read_table("AK1_hap2_AK1-ICR_notiPSC-DMR.tab", index_col=0, header=None)
df_ak1 = pd.concat([df_ak1_hap1, df_ak1_hap2], axis=1)
df_ak1.index.name = 'ICR_restored'
df_ak1.columns = ['AK1_Hap1', 'AK1_Hap2']

df_hg002_hap1 = pd.read_table("HG002_hap1_AK1-ICR_notiPSC-DMR.tab", index_col=0, header=None)
df_hg002_hap2 = pd.read_table("HG002_hap2_AK1-ICR_notiPSC-DMR.tab", index_col=0, header=None)
df_hg002 = pd.concat([df_hg002_hap1, df_hg002_hap2], axis=1)
df_hg002.index.name = 'ICR_restored'
df_hg002.columns = ['HG002_Hap1', 'HG002_Hap2']

df_ipsc_hap1 = pd.read_table("iPSC_hap1_AK1-ICR_notiPSC-DMR.tab", index_col=0, header=None)
df_ipsc_hap2 = pd.read_table("iPSC_hap2_AK1-ICR_notiPSC-DMR.tab", index_col=0, header=None)
df_ipsc = pd.concat([df_ipsc_hap1, df_ipsc_hap2], axis=1)
df_ipsc.index.name = 'ICR_restored'
df_ipsc.columns = ['iPSC_Hap1', 'iPSC_Hap2']

df_npc_hap1 = pd.read_table("NPC_hap1_AK1-ICR_notiPSC-DMR.tab", index_col=0, header=None)
df_npc_hap2 = pd.read_table("NPC_hap2_AK1-ICR_notiPSC-DMR.tab", index_col=0, header=None)
df_npc = pd.concat([df_npc_hap1, df_npc_hap2], axis=1)
df_npc.index.name = 'ICR_restored'
df_npc.columns = ['NPC_Hap1', 'NPC_Hap2']

df_h1 = pd.read_table("H1_MAPQ10_HapMerged_AK1-ICR_notiPSC-DMR.tab", index_col=0, header=None)
df_h1.index.name = 'ICR_restored'
df_h1.columns = ['H1']

df_hues64 = pd.read_table("HUES64_MAPQ10_HapMerged_AK1-ICR_notiPSC-DMR.tab", index_col=0, header=None)
df_hues64.index.name = 'ICR_restored'
df_hues64.columns = ['HUES64']


df_ICR_restored_met = pd.concat([df_ICR_restored, df_ak1, df_ipsc, df_npc, df_h1, df_hues64], axis=1)
df_ICR_restored_met_hm = df_ICR_restored_met.iloc[:, 1:]
df_ICR_restored_met_hm = df_ICR_restored_met_hm.sort_values(by='AK1_Hap1', ascending=False)
df_ICR_restored_met_hm = df_ICR_restored_met_hm.dropna(axis=0, subset=['AK1_Hap1'])

df_ICR_restored_met_hm_1 = df_ICR_restored_met_hm.query('(H1 <= 35) or (HUES64 <= 35)')

annot = df_ICR_restored_met_hm_1.applymap(lambda f: f'{f: .1f}')

fig, ax = plt.subplots(figsize=(11, 8), squeeze=False, constrained_layout=True)
# For NA values
heatmap1 = sns.heatmap(
    np.where(df_ICR_restored_met_hm_1.isna(), 0, np.nan),
    ax = ax[0, 0],
    cbar = False,
    xticklabels = True,
    yticklabels = False,
    annot = None,
    fmt = "",
    cmap = ListedColormap(['grey']),
    linewidth = 0, 
    rasterized = True)
# For non-NA values
heatmap2 = sns.heatmap(
    df_ICR_restored_met_hm_1,
    ax = ax[0, 0],
    xticklabels = True,
    yticklabels = False,
    annot = None, 
    fmt = "",
    cmap = cmap,
    linewidth = 0.001, 
    linecolor = "black",
    vmin = 0,
    vmax = 100,
    rasterized = True)
ax[0, 0].xaxis.tick_top()
ax[0, 0].set_xticklabels(['AK1\nHap1', 'AK1\nHap2', 'iPSC\nHap1', 'iPSC\nHap2', 'NPC\nHap1', 'NPC\nHap2', 'H1', 'HUES64'], rotation=0)
ax[0, 0].set_ylabel('Candidate AK1 ICR retained in iPSC\nthat shows intermediate level of mCG in either one of ESCs (N = 23)')
heatmap2.collections[1].colorbar.set_label(label='DNA methylation (%)', rotation=270, labelpad=10)
plt.savefig('Candidate-AK1-ICR_retained_in_iPSC_but_intermediate_in_ESC_woHG002.pdf')


df_ICR_retained_met_hm_2 = df_ICR_retained_met_hm[~df_ICR_retained_met_hm.index.isin(df_ICR_retained_met_hm_1.index)]
df_ICR_retained_met_hm_2 = df_ICR_retained_met_hm_2.dropna(subset=['H1', 'HUES64'], how='all')

annot = df_ICR_retained_met_hm_2.applymap(lambda f: f'{f: .1f}')

fig, ax = plt.subplots(figsize=(11, 8), squeeze=False, constrained_layout=True)
# For NA values
heatmap1 = sns.heatmap(
    np.where(df_ICR_retained_met_hm_2.isna(), 0, np.nan),
    ax = ax[0, 0],
    cbar = False,
    xticklabels = True,
    yticklabels = False,
    annot = None,
    fmt = "",
    cmap = ListedColormap(['grey']),
    linewidth = 0, 
    rasterized = True)
# For non-NA values
heatmap2 = sns.heatmap(
    df_ICR_retained_met_hm_2,
    ax = ax[0, 0],
    xticklabels = True,
    yticklabels = False,
    annot = None, 
    fmt = "",
    cmap = cmap,
    linewidth = 0.001, 
    linecolor = "black",
    vmin = 0,
    vmax = 100,
    rasterized = True)
ax[0, 0].xaxis.tick_top()
ax[0, 0].set_xticklabels(['AK1\nHap1', 'AK1\nHap2', 'iPSC\nHap1', 'iPSC\nHap2', 'NPC\nHap1', 'NPC\nHap2', 'H1', 'HUES64'], rotation=0)
ax[0, 0].set_ylabel('Candidate AK1 ICR retained in iPSC\n (Somatic Memory, N = 41)')
heatmap2.collections[1].colorbar.set_label(label='DNA methylation (%)', rotation=270, labelpad=10)
plt.savefig('Candidate-AK1-ICR_retained_in_iPSC_Somatic_Memory.pdf')
