In [44]:
import pandas as pd
from pathlib import Path
from IPython.display import display, HTML
from Bio import SeqIO

def fasta2df(fn):
    seqs = SeqIO.parse(fn, "fasta")
    df = pd.DataFrame((str(s.id), str(s.seq)) for s in seqs)
    df.columns = ["ID", "Sequence"]
    return df

def get_table(refname, experiment, tools):
    alldf = pd.DataFrame(columns=['tool', 'unaligned', 'aligned', 'misaligned', '%', 'time (sec)', 'memory (GB)'])
    ref = fasta2df(Path("refs") / (refname+'.fa'))
    reads = fasta2df(Path("reads") / Path('reads-'+experiment+'.fa'))
    d = Path("out") / experiment
    for tool in tools:
        eval_file = d / f"{tool}.eval"
        if not eval_file.exists():
            print(f"File {eval_file} does not exist.")
            continue
        evaldf = pd.read_csv(eval_file, sep='\t', names=['Q', 'quality', 'aligned', 'wrong', 'wrong_frac', 'aligned_cum'])
        #evaldf = pd.read_csv(d/f"{tool}.eval", sep='\t', names=['Q', 'quality', 'aligned', 'wrong', 'wrong_frac', 'aligned_cum'])
        aligned = evaldf['aligned'].sum()
        unaligned = len(reads) - aligned
        wrong = evaldf['wrong'].sum()
        wrong_perc = (100.0 * wrong  / aligned).round(2)
        with open(d/f'{tool}.time') as f:
            arr = f.readline().split()
            assert len(arr) == 2
            time, mem = arr
        alldf.loc[len(alldf.index)] = [tool, unaligned, aligned, wrong, wrong_perc, time, mem]
    alldf = alldf.set_index('tool')
    alldf.index.name = None
    alldf['memory (GB)'] = (alldf['memory (GB)'].astype(float) / (1024 * 1024)).round(2)
    return alldf

tools = ['sweepmap', 'mapquik', 'blend', 'minimap', 'winnowmap']
experiments = [('chm13-1B', 'chm13-1B-a0.99-d1-l10000'), ('t2tChrY', 't2tChrY-a0.99-d1-l10000')]
for refname, experiment in experiments:
    df = get_table(refname, experiment, tools)
    df.columns.name = experiment
    css = """ <style> table { font-family: "Courier New", Courier, monospace; } </style> """
    display(HTML(css))
    display(df)
    df = df.round(2).astype(str).applymap(lambda x: x.rstrip('0').rstrip('.') if '.' in x else x)
    print(df.to_latex(escape=True))

chm13-1B-a0.99-d1-l10000,unaligned,aligned,misaligned,%,time (sec),memory (GB)
sweepmap,0,40037,101,0.25,40.9,7.69
mapquik,229,39808,495,1.24,12.0,5.5
blend,0,40037,169,0.42,28.8,2.37
minimap,0,40037,73,0.18,81.06,3.98
winnowmap,0,40037,70,0.17,931.68,3.22


  df = df.round(2).astype(str).applymap(lambda x: x.rstrip('0').rstrip('.') if '.' in x else x)


\begin{tabular}{lllllll}
\toprule
chm13-1B-a0.99-d1-l10000 & unaligned & aligned & misaligned & \% & time (sec) & memory (GB) \\
\midrule
sweepmap & 0 & 40037 & 101 & 0.25 & 40.9 & 7.69 \\
mapquik & 229 & 39808 & 495 & 1.24 & 12 & 5.5 \\
blend & 0 & 40037 & 169 & 0.42 & 28.8 & 2.37 \\
minimap & 0 & 40037 & 73 & 0.18 & 81.06 & 3.98 \\
winnowmap & 0 & 40037 & 70 & 0.17 & 931.68 & 3.22 \\
\bottomrule
\end{tabular}



t2tChrY-a0.99-d1-l10000,unaligned,aligned,misaligned,%,time (sec),memory (GB)
sweepmap,0,3131,189,6.04,2.21,0.4
mapquik,402,2729,760,27.85,0.65,1.48
blend,0,3131,318,10.16,19.89,0.16
minimap,0,3131,125,3.99,63.43,0.29
winnowmap,0,3131,148,4.73,2944.98,1.07


\begin{tabular}{lllllll}
\toprule
t2tChrY-a0.99-d1-l10000 & unaligned & aligned & misaligned & \% & time (sec) & memory (GB) \\
\midrule
sweepmap & 0 & 3131 & 189 & 6.04 & 2.21 & 0.4 \\
mapquik & 402 & 2729 & 760 & 27.85 & 0.65 & 1.48 \\
blend & 0 & 3131 & 318 & 10.16 & 19.89 & 0.16 \\
minimap & 0 & 3131 & 125 & 3.99 & 63.43 & 0.29 \\
winnowmap & 0 & 3131 & 148 & 4.73 & 2944.98 & 1.07 \\
\bottomrule
\end{tabular}



  df = df.round(2).astype(str).applymap(lambda x: x.rstrip('0').rstrip('.') if '.' in x else x)
