In [1]:
from pathlib import Path
import tqdm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pyfaidx import Fasta
from meth5 import MetH5File

from benchmark_pycometh.simulation.nanopolish_simulator import OmicsSimlaLoader
from nanoepitools.plotting.general_plotting import PlotArchiver, plot_2d_density
from benchmark_pycometh.config import module_config

In [40]:
ground_truth_file = "/home/r933r/data/projects/nanopore/pycometh_benchmark/simulated/wgbs/merged/changepoints.txt"

gt = pd.read_csv(ground_truth_file, sep="\t", names=["chrom", "start", "end", "segment_type", "diffmet"])
gt_dmrs = gt.loc[gt["diffmet"]!=0]
gt_negative = gt.loc[gt["diffmet"]==0]

In [41]:
pa = PlotArchiver("simulation", headless=False, config={"plot_archive_dir": "/home/r933r/snajder/nanoepitools_plots/benchmark"})

In [19]:
methcp_dmrs_file = "/home/r933r/data/projects/nanopore/pycometh_benchmark/simulated/methcp/methcp_dmr.bed"
methcp_dmrs = pd.read_csv(methcp_dmrs_file, sep="\t", names=["chrom", "start", "end", "nC.valid", "nC", "diffmet", "cov", "pval"])

In [46]:
class ResultComparer:
    def __init__(self, segments):
        self.segments = segments

    def intersect_regions(self, left, right):
        for a in left.itertuples():
            for b in right.itertuples():
                if a.chrom != b.chrom:
                    continue
                if a.start <= b.end and b.start <= a.end:
                    yield a
                    break
    def get(self, key, diffmet_threshold=0):
        ret = self.segments[key]
        ret = ret.loc[ret["diffmet"].map(lambda x: abs(x)>=diffmet_threshold)]
        return ret

    def compute_tpr(self, gt, pred, diffmet_threshold=0):
        pred = self.get(pred, diffmet_threshold=diffmet_threshold)
        gt = self.get(gt, diffmet_threshold=diffmet_threshold)
        true_positives = list(self.intersect_regions(gt, pred))
        return len(true_positives) / len(gt)

    def compute_fpr(self, gt_pos, gt_neg, pred, diffmet_threshold=0):
        pred = self.get(pred, diffmet_threshold=diffmet_threshold)
        gt_neg = len(self.get(gt_neg)) + len(self.get(gt_pos))
        gt_pos = self.get(gt_pos, diffmet_threshold=diffmet_threshold)
        gt_neg = gt_neg - len(gt_pos)

        true_positives = list(self.intersect_regions(gt_pos, pred))
        false_positives = len(pred) - len(true_positives)
        return false_positives / gt_neg

In [47]:
rc = ResultComparer({"GT":gt_dmrs, "MethCP":methcp_dmrs, "GT_neg": gt_negative})
print("MethCP TPR", rc.compute_tpr("GT", "MethCP"))
print("MethCP FPR", rc.compute_fpr("GT", "GT_neg", "MethCP"))

MethCP TPR 0.7624521072796935
MethCP FPR 0.2076727964489537


In [37]:
for diff_thres in np.arange(0, 0.7, 0.05):
    print(f"MethCP TPR ({diff_thres})", rc.compute_tpr("GT", "MethCP"))
    print(f"MethCP FPR ({diff_thres})", rc.compute_fpr("GT", "GT_neg", "MethCP"))

Unnamed: 0,chrom,start,end,segment_type,difference
0,1,10468,28432,2,0.0
1,1,28511,30094,1,0.0
2,1,30154,521533,2,0.0
3,1,521570,521793,3,0.6
4,1,521945,563574,2,0.0
...,...,...,...,...,...
6825,1,249159108,249167003,2,0.0
6826,1,249167011,249168865,1,0.0
6827,1,249168876,249200038,2,0.0
6828,1,249200111,249201676,1,0.0
