# A*PA-next evals

This notebook contains the latest evals for the A*PA project.

In [None]:
import numpy as np
import math
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='once', category=UserWarning)
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import json
from pathlib import Path

In [None]:
pd.set_option("display.max_rows", 200)
pd.set_option("display.max_columns", 100)
pd.set_option("display.width", 1000)

# Data reading and preparation

In [None]:
labelsize=10
markersize=4
linewidth = 0.75

def column_display_name(col):
    d = {
        "divergence": "Divergence",
        "runtime": "Runtime per alignment [s]",
        "runtime_capped": "Runtime per alignment [s]",
        "s_per_pair": "Avg. runtime per alignment [s]",
        "s_per_pair_capped": "Avg. runtime per alignment [s]",
        "length": "Sequence length [bp]",
        "band": "Equivalent band",
        "algo_key": "algorithm",
        "algo_pretty": " ",
    }
    if col in d:
        return d[col]
    return col

dataset_pretty = {'ont-ul-500k': 'ONT reads', 'ont-minion-ul-500k': 'ONT reads + genetic variation'}
dataset_order = list(dataset_pretty.keys())


# Line style:
# - slow (no pruning): dotted
# - normal: solid
# - diagonal-transition: dashed
# Colours:
# edlib/wfa ('extern'): blue/purple
# sh/csh/gcsh: orange -> brown -> green gradient
# noprune/normal/dt: 60% -> 70% -> 85% saturation
colors = {'dijkstra': '#786061', 'sh': "#e87146", 'csh': "#8c662a", 'gcsh': "#257d26"}
dashed = (0, (5, 5))
dotted = (0, (1, 4))
algorithm_styles = {
    "edlib": ("#DE4AFF", dashed, 'Edlib'),
    "biwfa": ("#625AFF", '-', 'BiWFA'),
    'blockaligner': ('#0000ff', '.', 'Block\nAligner\n(128,1024)'),
    "dijkstra": (colors['dijkstra'], dashed, 'Dijkstra'),
    "sh-noprune": (colors['sh'], dotted, 'SH (no prune)'),
    "sh": (colors['sh'], dashed, 'SH'),
    "csh": (colors['csh'], dashed, 'CSH'),
    "gcsh": (colors['gcsh'], dashed, 'GCSH'),
    "dijkstra-dt": (colors['dijkstra'], '-', 'Dijkstra+DT'),
    "sh-dt": ('#e35522', '-', 'SH+DT'),
    "csh-dt": ('#875a12', '-', 'CSH+DT'),
    "gcsh-dt": ('#0f7a10', '-', 'GCSH+DT'),
    'astarpa': ('#0f7a10', '-', 'GCSH+DT'),
    'astarnw': ('#000000', '.', 'A*NW'),
    'astarnw-gapgap': ('#bb0000', '.', 'A*NW\n+gapgap'),
    'astarnw-gapdist': ('#ee0000', '.', '\n\n+gapdist'),
    'astarnw-blocks': ('#ff0000', '.', '\n\n\n+blocks'),
    'astarnw-sparse_mem': ('#ff6600', '.', '\n+sparse mem'),
    'astarnw-new-profile': ('#ffcc00', '.', '\n\n+bit-profile'),
    'astarnw-trace': ('#aacc00', '.', '\n\n\n+new trace'),
    'astarnw-local-pruning-5': ('#77cc00', '.', '+local\npruning'),
    'astarnw-prune': ('#00cc77', '.', '\n\n+pruning'),
    'astarnw-matches': ('#00aaaa', '.', '\n\n\n+match filter'),
    'astarnw-matches2': ('#0099ee', '.', '\n+faster LP'),
    'astarnw-dt-trace': ('#0066ff', '.', '\n\n+DT trace'),
    'astarnw-k12': ('#0033ff', '.', 'k: 12\n(from 15)'),
    'astarnw-local-doubling': ('#3300ff', '.', '\n\n+local\ndoubling'),
    'astarnw-k12-new': ('#0033ff', '.', 'k: 12\n(from 15)\nNEW'),
    'astarnw-local-doubling-new': ('#3300ff', '.', '\n\n+local\ndoubling\nNEW'),
}
algorithm_order = list(algorithm_styles.keys())
palette = {k: v[0] for k, v in algorithm_styles.items()}

def get_algorithm_key(row):
    name = row['algo_name']
    if name == 'Edlib': return 'edlib'
    if name == 'Wfa':
        if row['job_algo_Wfa_memorymodel'] == 'MemoryUltraLow':
            return 'biwfa'
        else:
            return 'wfa'
    if name == 'BlockAligner':
        return 'blockaligner'
    if name == 'AstarPa':
        t = row['job_algo_AstarPa_heuristic_type']
        r = row['job_algo_AstarPa_heuristic_type']
        dt = row['job_algo_AstarPa_diagonaltransition']
        prune = row['job_algo_AstarPa_heuristic_prune'] if 'job_algo_AstarPa_heuristic_prune' in row else 'Both'
        if t == 'None':
            key = 'dijkstra'
        else:
            key = t.lower()
        if t != 'None' and prune == 'None':
            key += '-noprune'
        if dt:
            key += '-dt'
        return key
    if name == 'AstarNW':
        key = 'astarnw'
        name = row.job_algo_AstarNW_name
        if name:
            return f'{key}-{name}'
        if row.job_algo_AstarNW_front_Bit_sparse:
            key += '-sparse'
        if row.job_algo_AstarNW_front_Bit_simd:
            key += '-simd'
        if row.job_algo_AstarNW_sparsehcalls:
            key += '-h'
        return key
    return 'unknown'

# Returns display name, color, and style for an algorithm
def algorithm_display(row, split):
    (c, l, n) = algorithm_styles[row['algo_key']]
    if 'r' in split:
        if row.r:
            n += f' (r={row.r})'
    return (c, l, n)

In [None]:
def read_results(path):
    # - Read a json file
    # - Rename json fields from a_b to a-b
    # - Flatten into dataframe
    # - Flatten algorithm params into a few fields:
    #   - algo_name: the type of algorithm
    #   - algo_full: the json-string of algorithm parameters
    # - Rename and compute some common columns:
    #   - error-rate
    #   - length
    #   - s_per_pair
    #   - p_correct
    
    json_path = Path(path)
    data = json.loads(json_path.read_text())
    
    # Remove underscores from all keys
    def remove_underscores(o):
        if isinstance(o, list):
            return [remove_underscores(v) for v in o]
        if isinstance(o, dict):
            return {k.replace('_', ''): remove_underscores(v) for k, v in o.items()}
        return o
    
    data = remove_underscores(data)

    # Clean up algo columns
    for x in data:
        name = list(x['job']['algo'].keys())[0]
        obj = x['job']['algo']
        obj['name'] = name
        x['algo_name'] = name
        x['algo_full'] = json.dumps(obj)
        #del x['job']['algo']
        if 'Ok' in x['output']:
            del x['output']['Ok']['costs']

    # Flatten the js
    df = pd.json_normalize(data, sep='_')
    df['algo_key'] = df.apply(get_algorithm_key, axis=1)
    df['algo_pretty'] = df['algo_key'].map(lambda key: algorithm_styles[key][2])
    
    # Convenience renaming
    df = df.rename({'job_dataset_Generated_length': 'length',
                    'job_dataset_Generated_errorrate': 'errorrate',
                    'job_timelimit': 'timelimit',
                    'output_Ok_pcorrect': 'pcorrect',
                    'output_Ok_measured_runtime': 'runtime',
                    'output_Ok_measured_memory': 'memory',
                    'stats_divergence_mean': 'divergence',
                    'job_algo_AstarPa_diagonaltransition': 'dt',
                    'job_algo_AstarPa_heuristic_prune': 'prune',
                    'job_algo_AstarPa_heuristic_r': 'r',
                    #'job_algo_AstarNW_heuristic_r': 'r',
                   }, axis='columns')
    
    # Order rows
    df['algo_ord'] = df['algo_key'].map(lambda key: algorithm_order.index(key))
    df.sort_values(by='algo_ord', inplace=True, kind = 'stable')
    if 'length' in df.columns:
        df.sort_values(by='length', inplace=True, kind = 'stable')
    if 'errorrate' in df.columns:
        df.sort_values(by='errorrate', inplace=True, kind = 'stable')
    # Order by dataset
    if 'job_dataset_File' in df.columns and df.job_dataset_File.notna().all():
        df['dataset'] = df['job_dataset_File'].map(lambda f: Path(f).parent.name)
        df['dataset_ord'] = df['dataset'].map(lambda key: dataset_order.index(key) if key in dataset_order else key)
        df.sort_values(by='dataset_ord', inplace=True, kind = 'stable')
    
    # Computed columns
    df['costmodel'] = df.apply(lambda row: (row['job_costs_sub'], row['job_costs_open'], row['job_costs_extend']), axis=1)
    df['s_per_pair'] = df['runtime'] / df['stats_seqpairs']
    df['timelimit_per_pair'] = df['timelimit'] / df['stats_seqpairs']
    if 'length' in df.columns and 'output_Ok_stats_expanded' in df.columns:
        df['band'] = df['output_Ok_stats_expanded'] / (df['stats_seqpairs']* df['length'])

    def runtime_capped(row):
        if not math.isnan(row['runtime']):
            return row['runtime']
        if row['output_Err'] == 'Timeout':
            return row['timelimit']
        return row['timelimit']*1.1
    df['runtime_capped'] = df.apply(runtime_capped, axis = 1)
    df['s_per_pair_capped'] = df['runtime_capped'] / df['stats_seqpairs']
    
    df['editdistance'] = df['stats_insertions'] + df['stats_deletions'] + df['stats_substitutions']
    
    # Some specific fixes
    df = df.fillna({'r': 0}, downcast='infer')
    
    # Remove unsupported algos
    if 'output_Err' in df.columns:
        df = df[df.output_Err != 'Unsupported']
    
    return df

## The one plotting function

In [None]:
def plot(df,
         name='',
         file=None,
         x='length',
         y='s_per_pair',
         # Column to use for hue and style.
         # Always change both at the same time!
         hue='algo_key',
         style='r',
         # column to use for marker size
         size=None,
         # Logarithmic axes by default
         xlog=True,
         ylog=True,
         ylim=None,
         # alph
         alpha=1.0,
         # Use line instead of scatter plot?
         connect=False,
         # Draw a cone from the given filter and x
         cone=None,
         cone_x=3*10**4,
         fit=False,
         line_labels=False,
         categorical=False,
         ax=None,
         width=None,
         height=None,
         png=False
        ):
    
    if df[y].isna().all():
        print(f"All values of {y} are nan.")
        return
    
    df = df[df[y].notnull()]
    assert not df.empty
    
    # We group data by this set of keys.
    split = [hue, style]
    
    # Remove 'r' from the split if not both r=1 and r=2 are present,
    # to prevent redundant (r=1) in plots.
    if 'r' in split and 'r' in df.columns:
        if not (1 in df.r.values and 2 in df.r.values):
            split.remove('r')
    
    # Group the data into datapoints per line
    groups = df.groupby(split, sort=False)
    
    # Not sure if needed actually.
    sns.reset_defaults()
    sns.set_context(None) # 'paper', 'notebook'
    
    # Set up the figure if not provided.
    if ax is None:
        fig, ax = plt.subplots()
        fig.set_size_inches(width if width else 3, height if height else 2, forward=True)
        hasax = False
    else:
        hasax = True

    
    # Set log scales
    ax.set(xscale='log' if xlog else 'linear', yscale='log' if ylog else 'linear')
    
    # limit number of ticks
    if ylog:
        ax.locator_params(axis='y', numticks=6)
    else:
        ax.locator_params(axis='y', nbins=6)
    
    
    # PLOTTING
    
    if not categorical:
        # Show a scatterplot of points.
        # Each group is plotted separately for more control over its style.
        for k, group in groups:
            first_row = group.iloc[0]
            color, linestyle, grouplabel = algorithm_display(first_row, split)

            ax.plot(x,
                    y,
                    data=group.sort_values(by=x),
                    color=color,
                    linestyle=linestyle if connect else 'None',
                    marker='o',
                    alpha=alpha,
                    dash_capstyle = 'round',
                    label=grouplabel,
                    zorder=2,
                    markersize=markersize,
                    linewidth=linewidth
                   )
    if categorical:
        # Overlay a boxplot and swarmplot on top of each other
        sns.swarmplot(data=df,
                        x=x,
                        y=y,
                        hue=hue,
                        palette=palette,
                        ax=ax,
                        size=3,
                        linewidth=0,
                        edgecolor='gray',
                        zorder=9,
                        dodge=False,
        )
        sns.boxplot(data=df,
                    x=x,
                    y=y,
                    zorder=10,
                    ax=ax,
                    boxprops={'facecolor':'None'},
                    showfliers=False,
                    linewidth=linewidth,
                   )
    
    # TEXT
    
    # Title
    if name:
        ax.set_title(name, y=1.05)
    
    # Remove legend
    ax.legend().remove()
    
    # BACKGROUND
    ax.set_facecolor("#F8F8F8")
    ax.set_axisbelow(True) 
    ax.grid(False)
    ax.grid(True, axis="y", which="major", color="white", alpha=1, zorder=0)
    
    
    # AXES
    
    # Labels
    ax.set_xlabel(column_display_name(x))  # weight='bold',
    ax.set_ylabel(column_display_name(y), rotation=0, ha="left")
    ax.yaxis.set_label_coords(-0.10, 1.00)
    
    # Limits
    x_margin = 1.5
    y_margin = 3
    if xlog:
        #xs = df[df[x] > 0][x]
        ax.set_xlim(df[x].min() / x_margin, df[x].max() * x_margin)

    if ylog:
        ax.set_ylim(df[y].min() / y_margin, df[y].max() * y_margin)
    
    # Start linear scales at 0.
    if not xlog and not categorical and x != 'job_costs_open':
        ax.set(xlim=(0,None))
    if not ylog:
        ax.set(ylim=(0,None))
    if ylim is not None:
        ax.set_ylim(ylim[0], ylim[1])
 
    
    # Show bottom spine, and left spine when xlog=false
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['bottom'].set_visible(True)
    ax.spines['left'].set_visible(not xlog and not categorical)
    
    # Format % scales.
    if x in ['errorrate', 'divergence']:
        ax.xaxis.set_major_formatter(mtick.PercentFormatter(1.0, decimals=0))
    
    # Show major ticks
    ax.tick_params(
        axis="both",
        which="major",
        bottom=True,
        top=False,
        left=True,
        right=False,
    )
    # No minor ticks
    ax.tick_params(
        axis="both",
        which="minor",
        bottom=False,
        top=False,
        left=False,
        right=False,
        labelbottom=False,  # labels along the bottom edge are off
    )
    # Do show minor ticks for small log ranges
    if ylog:
         ax.tick_params(axis="y", which="minor", left=True)
    
    
    # CONE
    # Fills the region between x**1 and x**2
    if cone:
        x0 = cone_x
        x_max = x_margin * df[x].max()
        x_range = (x0, x_max)
        
        y0 = df[cone(df) & (df[x] == cone_x)][y].max()
        y_lin = (y0, y0 * (x_max / x0) ** 1)
        y_quad = (y0, y0 * (x_max / x0) ** 2)
        ax.fill_between(x_range, y_lin, y_quad, color="grey", alpha=0.15, zorder=0.4)
        
    # TIME LIMIT
    if y=='runtime_capped' or (y=='s_per_pair_capped' and x != 'length'):
        timelimit = df.timelimit.iloc[0]
        #assert df[df.runtime.isna()].timelimit.eq(timelimit).all()
        # Draw a red line at the timelimit.
        ax.axhline(y=timelimit, color="red", linestyle="-", alpha=1, linewidth=0.5)
        
        # Modify/add the timelimit ticklabel with TL=
        if False:
            ylabels = [x for x in ax.get_yticklabels()]
            found = False
            for i, l in enumerate(ylabels):
                if l.get_position()[1] == timelimit:
                    ylabels[i] = "TL=" + ylabels[i].get_text()
                    found = True
            if found:
                ax.set_yticklabels(ylabels)
            else:
                yticks = list(ax.get_yticks())
                ylabels = list(ax.get_yticklabels())
                yticks.append(timelimit)
                ylabels.append("TLE")
                ax.set_yticks(yticks)
                try:
                    ax.set_yticklabels(ylabels)
                except ValueError:
                    pass
                finally:
                    pass
            
    # POLY FIT

    def angle(slope):
        x_min, x_max = ax.get_xlim()
        y_min, y_max = ax.get_ylim()
        bbox = ax.get_window_extent()
        x_sz = bbox.width
        y_sz = bbox.height
        x_factor = x_sz / (np.log10(x_max) - np.log10(x_min) if xlog else x_max - x_min)
        y_factor = y_sz / (np.log10(y_max) - np.log10(y_min) if ylog else y_max - y_min) 
        slope = slope * y_factor / x_factor
        return math.atan(slope)*180/math.pi
    
    if fit:
        assert x=='length' and xlog and ylog, "Polynomial fits only work in log-log plots with x=length"
        for k, group in groups:
            first_row = group.iloc[0]
            color, linestyle, grouplabel = algorithm_display(first_row, split)
            fit_label = grouplabel
            
            filtered = group[group.runtime.notnull()]
            ps = filtered[[x,y]].dropna()
            xmin, xmax = filtered[x].min(), filtered[x].max()
            if len(ps) > 1:
                fit = np.polyfit(np.log(ps[x]), np.log(ps[y]), 1)
                f = lambda x: x**fit[0] * np.exp(fit[1])
                # Extra {{ and }} are for the math-mode superscript
                fit_label = f"{grouplabel} $\sim n^{{{fit[0]:0.2f}}}$"

                ymin, ymax = f(xmin), f(xmax)
                # line from xmin to xmax (use plt.axline for infinite line)
                ax.plot([xmin, xmax], [ymin, ymax], color=color, linestyle=linestyle, alpha=1, dash_capstyle = 'round', label=grouplabel, zorder=2, linewidth=linewidth)
                #print(f'Exponent for {k}: {fit[0]:0.2f}')

            ax.text(
                xmax,
                min(ymax, ax.get_ylim()[1]),
                fit_label,
                color=color,
                ha="right",
                va="bottom",
                size=labelsize,
                alpha=1,
                rotation=angle(fit[0]),
                rotation_mode='anchor',
            )
    if line_labels:
        # If no legend and no fits are shown, show manual labels instead
        for split_key, group in groups:
            first_row = group.iloc[0]
            color, linestyle, grouplabel = algorithm_display(first_row, split)
            max_idx = group[x].idxmax()
            label_x = group[x][max_idx]
            label_y = min(group[y][max_idx], ax.get_ylim()[1])
            key = split_key[0] if isinstance(split_key, tuple) else split_key
            
            by_x = group[x].argsort()
            last = group.iloc[by_x.iloc[-1]]
            before = group.iloc[by_x.iloc[-3]]
            slope = (last[y] - before[y])/(last[x] - before[x])
            ax.text(
                label_x,
                label_y,
                grouplabel,
                color=color,
                ha="right",
                va="bottom",
                size=labelsize,
                alpha=1,
                rotation=angle(slope),
                rotation_mode='anchor',
            )

    if not hasax:
        if file:
            plt.savefig(f"plots/{file}.pdf", dpi=300, bbox_inches='tight')
            if png:
                plt.savefig(f"plots/{file}.png", dpi=300, bbox_inches='tight')

# Boxplots on testdata

In [None]:
from pathlib import Path
warnings.simplefilter('ignore', category=UserWarning)
df = read_results("results/real.json")
def boxplot(df, w, **kwargs):
    ww=1
    datasets = len(df.dataset.unique())
    hh = (datasets+ww-1)//ww
    w *= ww
    h = 3.7 * hh
    fig, axs = plt.subplots(hh, ww, figsize=(w, h))
    if not isinstance(axs, np.ndarray):
        axs = [axs]
    if isinstance(axs[0], np.ndarray):
        axs = [x for col in zip(*axs) for x in col]
    for (k, g), ax in zip(df.groupby('dataset'),axs):
        plot(g, x='algo_pretty', y='runtime_capped', xlog=False, ylog=True, categorical=True, ylim=(0.01, 15), ax=ax, **kwargs)
        ax.tick_params(axis="y", which="both", right=True)
        ax.grid(True, axis="y", which="major", color="black", alpha=.5, zorder=0, lw=0.5)
        ax.grid(True, axis="y", which="minor", color="black", alpha=.1, zorder=0, lw=0.5)
        
        ax.set_xlabel(dataset_pretty.get(k, k))

        if False:
            print(k)
            for (kk, gg) in g.groupby('algo_pretty', sort=False):
                qs = [gg.runtime_capped.quantile(q) for q in [0, 0.25, 0.5, 0.75, 1]]
                name = kk.strip().replace("\n", " ")
                print(f'{name:20}: {qs[0]:0.3f} {qs[1]:0.3f} {qs[2]:0.3f} {qs[3]:0.3f} {qs[4]:0.3f}')
    
    fig.subplots_adjust(wspace=.05, hspace=0.6)
    plt.show()
boxplot(df, 11)

# Memory usage

In [None]:
df = read_results("results/real.json")
df.memory = (df.memory/1000000)
df['capped_memory'] = df.memory.fillna(1000000)
#df = df[df.algo_key.isin(['edlib', 'biwfa', 'gcsh-dt', 'astarnw', 'astarnw-sparse'])]
table = df.pivot_table(index='algo_pretty', columns=['dataset'], values=['capped_memory', 'memory'], aggfunc={'capped_memory': np.median, 'memory': np.max}, sort=False).round(0).astype('int')
table =table.rename({'capped_memory': 'Median', 'memory': 'Max'}, axis='columns')
table = table.swaplevel(axis=1)
table.sort_index(axis=1, level=0, inplace=True, kind='stable', ascending=False)
display(table)
#print(table.to_latex())

# Sanity check: CPU frequency
Make sure that the CPU frequency is consistent over all experiments.

In [None]:
df = read_results("results/real.json")
df = df.rename({'output_Ok_measured_cpufreqstart': 'freqstart','output_Ok_measured_cpufreqend': 'freqend'}, axis='columns')
for c in ['freqstart', 'freqend']:
    print(df[c].min(), df[c].max())
    assert df[c].min() > 2550
    assert df[c].max() < 2650