In [40]:
import pandas as pd
from pathlib import Path
from IPython.display import display, HTML
from Bio import SeqIO

def fasta2df(fn):
    seqs = SeqIO.parse(fn, "fasta")
    df = pd.DataFrame((str(s.id), str(s.seq)) for s in seqs)
    df.columns = ["ID", "Sequence"]
    return df

def get_table(refname, experiment, tools):
    alldf = pd.DataFrame(columns=['tool', 'unaligned', 'aligned', 'misaligned', '%', 'time (sec)', 'memory (GB)'])
    ref = fasta2df(Path("refs") / (refname+'.fa'))
    reads = fasta2df(Path("reads") / Path('reads-'+experiment+'.fa'))
    d = Path("out") / experiment
    for tool in tools:
        evaldf = pd.read_csv(d/f"{tool}.eval", sep='\t', names=['Q', 'quality', 'aligned', 'wrong', 'wrong_frac', 'aligned_cum'])
        aligned = evaldf['aligned'].sum()
        unaligned = len(reads) - aligned
        wrong = evaldf['wrong'].sum()
        wrong_perc = (100.0 * wrong  / aligned).round(2)
        with open(d/f'{tool}.time') as f:
            arr = f.readline().split()
            assert len(arr) == 2
            time, mem = arr
        alldf.loc[len(alldf.index)] = [tool, unaligned, aligned, wrong, wrong_perc, time, mem]
    alldf = alldf.set_index('tool')
    alldf.index.name = None
    alldf['memory (GB)'] = (alldf['memory (GB)'].astype(float) / (1024 * 1024)).round(2)
    return alldf

tools = ['sweepmap', 'mapquik', 'blend', 'minimap', 'winnowmap']
experiments = [('chm13-1B', 'chm13-1B-a0.99-d1-l10000'), ('t2tChrY', 't2tChrY-a0.99-d1-l10000')]
for refname, experiment in experiments:
    df = get_table(refname, experiment, tools)
    df.columns.name = experiment
    css = """ <style> table { font-family: "Courier New", Courier, monospace; } </style> """
    display(HTML(css))
    display(df)
    print(df.to_latex(escape=True))

FileNotFoundError: [Errno 2] No such file or directory: 'out/chm13-1B-a0.99-d1-l10000/minimap.eval'