### KOLF genome methylation comparison: ont vs. pb

Read in data:

In [18]:
#!pip3 install pandas aplanat
# !pip3 install seaborn
import pandas as pd
import numpy as np
import aplanat
from aplanat import spatial
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.patches as mplpatches
from matplotlib import gridspec

In [140]:
KOLF_ont_Hs = pd.read_csv(
    "KOLF_ont_Hs.bed",
    sep="\t", header=None, engine="c",
    dtype={'chrom':str, 'start':int, 'end':int, 'name':str, 'score':int, 'strand':str, 'tstart':int, 'tend':int, 'avg_mod':float, 'SD_avg_mod':float, 'coverage':float, 'avg_mod_h1':float, 'SD_avg_mod_h1':float, 'coverage_h1':float, 'avg_mod_h2':float, 'SD_avg_mod_h2':float, 'coverage_h2':float},
    names=['chrom', 'start', 'end', 'name', 'score', 'strand', 'tstart', 'tend', 'avg_mod', 'SD_avg_mod', 'coverage', 'avg_mod_h1', 'SD_avg_mod_h1', 'coverage_h1', 'avg_mod_h2', 'SD_avg_mod_h2', 'coverage_h2']
)

print("KOLF ont Hs data")
KOLF_ont_Hs
KOLF_ont_Hs_mod = KOLF_ont_Hs.drop(KOLF_ont_Hs.columns[[4,5,6,7]],axis=1)
KOLF_ont_Hs_mod

KOLF ont Hs data


Unnamed: 0,chrom,start,end,name,avg_mod,SD_avg_mod,coverage,avg_mod_h1,SD_avg_mod_h1,coverage_h1,avg_mod_h2,SD_avg_mod_h2,coverage_h2
0,chr1,959245,959305,NOC2L_1,6.655329,8.625277,28.0,7.916667,10.603873,12.0,5.231481,6.359467,12.0
1,chr1,960583,960643,KLHL17_1,8.825691,11.626220,27.0,8.796296,12.884185,12.0,6.805556,8.967749,10.0
2,chr1,966432,966492,PLEKHN1_1,15.119048,20.009564,20.0,20.292208,22.706391,11.0,8.796296,13.671132,9.0
3,chr1,976670,976730,PERM1_1,13.458995,21.027496,18.0,12.314815,17.487993,9.0,8.571429,14.568627,7.0
4,chr1,1000086,1000146,HES4_1,5.445758,8.257538,39.0,7.493924,8.618725,19.0,3.500000,7.390873,20.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
29593,chrX,155612941,155613001,TMLHE_1,22.813853,24.039113,11.0,-1.000000,-1.000000,-1.0,-1.000000,-1.000000,-1.0
29594,chrX,155767702,155767762,SPRY3_2,-1.000000,-1.000000,-1.0,-1.000000,-1.000000,-1.0,-1.000000,-1.000000,-1.0
29595,chrX,155881295,155881355,VAMP7_1,56.585775,45.691987,26.0,98.076923,4.510015,13.0,8.373016,11.672941,10.0
29596,chrY,18992808,18992868,CD24_1,9.524658,9.449059,17.0,20.000000,0.000000,1.0,-1.000000,-1.000000,-1.0


In [141]:
KOLF_pb_Hs = pd.read_csv(
    "KOLF_pb_Hs.bed",
    sep="\t", header=None, engine="c",
    dtype={'chrom':str, 'start':int, 'end':int, 'name':str, 'score':int, 'strand':str, 'tstart':int, 'tend':int, 'avg_mod':float, 'SD_avg_mod':float, 'coverage':float, 'avg_mod_h1':float, 'SD_avg_mod_h1':float, 'coverage_h1':float, 'avg_mod_h2':float, 'SD_avg_mod_h2':float, 'coverage_h2':float},
    names=['chrom', 'start', 'end', 'name', 'score', 'strand', 'tstart', 'tend', 'avg_mod', 'SD_avg_mod', 'coverage', 'avg_mod_h1', 'SD_avg_mod_h1', 'coverage_h1', 'avg_mod_h2', 'SD_avg_mod_h2', 'coverage_h2']
)

print("KOLF pb Hs data")
KOLF_pb_Hs
KOLF_pb_Hs_mod = KOLF_pb_Hs.drop(KOLF_pb_Hs.columns[[4,5,6,7]],axis=1)
KOLF_pb_Hs_mod

KOLF pb Hs data


Unnamed: 0,chrom,start,end,name,avg_mod,SD_avg_mod,coverage,avg_mod_h1,SD_avg_mod_h1,coverage_h1,avg_mod_h2,SD_avg_mod_h2,coverage_h2
0,chr1,959245,959305,NOC2L_1,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
1,chr1,960583,960643,KLHL17_1,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
2,chr1,966432,966492,PLEKHN1_1,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
3,chr1,976670,976730,PERM1_1,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
4,chr1,1000086,1000146,HES4_1,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
29593,chrX,155612941,155613001,TMLHE_1,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
29594,chrX,155767702,155767762,SPRY3_2,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
29595,chrX,155881295,155881355,VAMP7_1,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
29596,chrY,18992808,18992868,CD24_1,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0


In [144]:
KOLF_ont_cpg = pd.read_csv(
    "KOLF_ont_cpg.bed",
    sep="\t", header=None, engine="c",
    dtype={'chrom':str, 'start':int, 'end':int, 'name':str, 'idk1':float, 'idk2':float, 'idk3':float, 'idk4':float, 'idk5':float, 'idk6':float, 'avg_mod':float, 'SD_avg_mod':float, 'coverage':float, 'avg_mod_h1':float, 'SD_avg_mod_h1':float, 'coverage_h1':float, 'avg_mod_h2':float, 'SD_avg_mod_h2':float, 'coverage_h2':float},
    names=['chrom', 'start', 'end', 'name', 'idk1', 'idk2', 'idk3', 'idk4', 'idk5', 'idk6', 'avg_mod', 'SD_avg_mod', 'coverage', 'avg_mod_h1', 'SD_avg_mod_h1', 'coverage_h1', 'avg_mod_h2', 'SD_avg_mod_h2', 'coverage_h2']
)

print("KOLF ont CpG data")
KOLF_ont_cpg
KOLF_ont_cpg_mod = KOLF_ont_cpg.drop(KOLF_ont_cpg.columns[[4,5,6,7,8,9]],axis=1)
KOLF_ont_cpg_mod

KOLF ont CpG data


Unnamed: 0,chrom,start,end,name,avg_mod,SD_avg_mod,coverage,avg_mod_h1,SD_avg_mod_h1,coverage_h1,avg_mod_h2,SD_avg_mod_h2,coverage_h2
0,chr1,28735,29737,CpG:_111,14.125110,12.538598,77.0,13.937391,9.766899,51.0,14.493329,16.681789,26.0
1,chr1,135124,135563,CpG:_30,92.249360,6.570105,23.0,94.123010,4.799198,12.0,90.205378,7.557979,11.0
2,chr1,199251,200121,CpG:_104,9.837930,4.311553,18.0,10.115214,5.329828,7.0,9.661476,3.501698,11.0
3,chr1,368792,370063,CpG:_99,94.033198,2.606688,18.0,-1.000000,-1.000000,-1.0,-1.000000,-1.000000,-1.0
4,chr1,381172,382185,CpG:_84,93.545341,3.346222,25.0,-1.000000,-1.000000,-1.0,-1.000000,-1.000000,-1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
27944,chrY,25464370,25464941,CpG:_51,92.236698,6.112161,8.0,-1.000000,-1.000000,-1.0,-1.000000,-1.000000,-1.0
27945,chrY,26409388,26409785,CpG:_32,91.665397,6.033898,21.0,-1.000000,-1.000000,-1.0,-1.000000,-1.000000,-1.0
27946,chrY,26627168,26627397,CpG:_25,89.678151,10.924243,21.0,82.261905,14.346596,4.0,72.727273,0.000000,1.0
27947,chrY,57067645,57068034,CpG:_36,-1.000000,-1.000000,-1.0,-1.000000,-1.000000,-1.0,-1.000000,-1.000000,-1.0


In [145]:
KOLF_pb_cpg = pd.read_csv(
    "KOLF_pb_cpg.bed",
    sep="\t", header=None, engine="c",
    dtype={'chrom':str, 'start':int, 'end':int, 'name':str, 'idk1':float, 'idk2':float, 'idk3':float, 'idk4':float, 'idk5':float, 'idk6':float, 'avg_mod':float, 'SD_avg_mod':float, 'coverage':float, 'avg_mod_h1':float, 'SD_avg_mod_h1':float, 'coverage_h1':float, 'avg_mod_h2':float, 'SD_avg_mod_h2':float, 'coverage_h2':float},
    names=['chrom', 'start', 'end', 'name', 'idk1', 'idk2', 'idk3', 'idk4', 'idk5', 'idk6', 'avg_mod', 'SD_avg_mod', 'coverage', 'avg_mod_h1', 'SD_avg_mod_h1', 'coverage_h1', 'avg_mod_h2', 'SD_avg_mod_h2', 'coverage_h2']
)

print("KOLF pb CpG data")
KOLF_pb_cpg
KOLF_pb_cpg_mod = KOLF_pb_cpg.drop(KOLF_pb_cpg.columns[[4,5,6,7,8,9]],axis=1)
KOLF_pb_cpg_mod

KOLF pb CpG data


Unnamed: 0,chrom,start,end,name,avg_mod,SD_avg_mod,coverage,avg_mod_h1,SD_avg_mod_h1,coverage_h1,avg_mod_h2,SD_avg_mod_h2,coverage_h2
0,chr1,28735,29737,CpG:_111,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
1,chr1,135124,135563,CpG:_30,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
2,chr1,199251,200121,CpG:_104,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
3,chr1,368792,370063,CpG:_99,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
4,chr1,381172,382185,CpG:_84,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
27944,chrY,25464370,25464941,CpG:_51,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
27945,chrY,26409388,26409785,CpG:_32,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
27946,chrY,26627168,26627397,CpG:_25,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
27947,chrY,57067645,57068034,CpG:_36,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
