# Block Aligner Benchmark Analysis and Visualizations

This notebook contains code for collecting, cleaning, and analyzing data produced by block aligner's experiments.

To run this, you will need to install all the libraries imported below, along with [altair-saver](https://github.com/altair-viz/altair_saver) and [altair-data-server](https://github.com/altair-viz/altair_data_server), which has some extra dependencies for PDF saving.

Run each cell one by one to reproduce the experiments. This may take a while. For accurate benchmarking, it is recommended to run the entire notebook in the command line with `nbconvert`.

In [1]:
import altair as alt
from altair_saver import save
from altair import datum
import pandas as pd
from io import StringIO

alt.data_transformers.enable("data_server")

DataTransformerRegistry.enable('data_server')

In [2]:
def csv_to_pandas(csv, d = "\\s*,\\s*", t = None):
    s = StringIO("\n".join(csv))
    data = pd.read_csv(s, sep = d, thousands = t, comment = "#", engine = "python")
    return data

## Prefix Scan Benchmark

In [3]:
output = !cd .. && cargo bench --features simd_avx2 --quiet -- prefix_scan | grep 'bench:' | awk '{print $2"\t"$5}'
output.insert(0, "algorithm\ttime")
output

['algorithm\ttime', 'bench_naive_prefix_scan\t26', 'bench_opt_prefix_scan\t18']

In [4]:
data = csv_to_pandas(output, d = "\t", t = ",")
data

Unnamed: 0,algorithm,time
0,bench_naive_prefix_scan,26
1,bench_opt_prefix_scan,18


In [5]:
data["algorithm"] = data["algorithm"].map({
    "bench_naive_prefix_scan": "naive",
    "bench_opt_prefix_scan": "ours"
})
data

Unnamed: 0,algorithm,time
0,naive,26
1,ours,18


Prefix Scan Benchmark (AVX2)

In [6]:
c = alt.Chart(data).mark_bar().encode(
    x = alt.X("time", axis = alt.Axis(title = "time (ns)")),
    y = "algorithm",
    color = alt.Color("algorithm", legend = None)
).properties(
    width = 150
)
save(c, "prefix_scan_bench.pdf")
c

## Random Data Benchmark

In [7]:
output = !cd .. && cargo bench --features simd_avx2 --quiet -- bench_ | grep 'bench:' | grep -v 'prefix_scan' | awk '{print $2"\t"$5}'
output

['bench_parasailors_aa_1000_10000\t51,780,001',
 'bench_parasailors_aa_100_1000\t602,637',
 'bench_parasailors_aa_10_100\t20,467',
 'bench_rustbio_aa_100_1000\t15,366,641',
 'bench_rustbio_aa_10_100\t166,568',
 'bench_scan_aa_1000_10000\t239,823',
 'bench_scan_aa_1000_10000_insert\t8,664,998',
 'bench_scan_aa_1000_10000_small\t223,824',
 'bench_scan_aa_1000_10000_trace\t1,879,637',
 'bench_scan_aa_100_1000\t24,559',
 'bench_scan_aa_100_1000_insert\t49,989',
 'bench_scan_aa_100_1000_small\t22,348',
 'bench_scan_aa_100_1000_trace\t520,984',
 'bench_scan_aa_10_100\t3,977',
 'bench_scan_aa_10_100_insert\t4,033',
 'bench_scan_aa_10_100_small\t3,422',
 'bench_scan_aa_10_100_trace\t364,466',
 'bench_scan_nuc_1000_10000\t221,418',
 'bench_scan_nuc_100_1000\t24,314',
 'bench_triple_accel_1000_10000\t8,165,956',
 'bench_triple_accel_100_1000\t24,592']

In [8]:
cleaned = ["algorithm\talphabet\tk\tlength\tproperty\ttime"]
names = ["parasailors_aa", "rustbio_aa", "scan_aa", "scan_nuc", "triple_accel"]
new_names = ["parasailors\tprotein", "rust bio\tprotein", "ours\tprotein", "ours\tnucleotide", "triple accel\tnucleotide"]

for o in output:
    o = o[len("bench_"):]
    for n, nn in zip(names, new_names):
        if o.startswith(n):
            suffix = o[len(n):].replace("_", "\t")
            o = nn + suffix
            break
    if len(o.split("\t")) < len(cleaned[0].split("\t")):
        insert_idx = o.rindex("\t")
        o = o[:insert_idx] + "\tdefault" + o[insert_idx:]
    cleaned.append(o)

cleaned

['algorithm\talphabet\tk\tlength\tproperty\ttime',
 'parasailors\tprotein\t1000\t10000\tdefault\t51,780,001',
 'parasailors\tprotein\t100\t1000\tdefault\t602,637',
 'parasailors\tprotein\t10\t100\tdefault\t20,467',
 'rust bio\tprotein\t100\t1000\tdefault\t15,366,641',
 'rust bio\tprotein\t10\t100\tdefault\t166,568',
 'ours\tprotein\t1000\t10000\tdefault\t239,823',
 'ours\tprotein\t1000\t10000\tinsert\t8,664,998',
 'ours\tprotein\t1000\t10000\tsmall\t223,824',
 'ours\tprotein\t1000\t10000\ttrace\t1,879,637',
 'ours\tprotein\t100\t1000\tdefault\t24,559',
 'ours\tprotein\t100\t1000\tinsert\t49,989',
 'ours\tprotein\t100\t1000\tsmall\t22,348',
 'ours\tprotein\t100\t1000\ttrace\t520,984',
 'ours\tprotein\t10\t100\tdefault\t3,977',
 'ours\tprotein\t10\t100\tinsert\t4,033',
 'ours\tprotein\t10\t100\tsmall\t3,422',
 'ours\tprotein\t10\t100\ttrace\t364,466',
 'ours\tnucleotide\t1000\t10000\tdefault\t221,418',
 'ours\tnucleotide\t100\t1000\tdefault\t24,314',
 'triple accel\tnucleotide\t1000\t100

In [9]:
data = csv_to_pandas(cleaned, d = "\t", t = ",")
data

Unnamed: 0,algorithm,alphabet,k,length,property,time
0,parasailors,protein,1000,10000,default,51780001
1,parasailors,protein,100,1000,default,602637
2,parasailors,protein,10,100,default,20467
3,rust bio,protein,100,1000,default,15366641
4,rust bio,protein,10,100,default,166568
5,ours,protein,1000,10000,default,239823
6,ours,protein,1000,10000,insert,8664998
7,ours,protein,1000,10000,small,223824
8,ours,protein,1000,10000,trace,1879637
9,ours,protein,100,1000,default,24559


In [10]:
data["algorithm property"] = data["algorithm"] + " " + data["property"]
data["time"] /= 1000

Random Protein Sequences Benchmark (AVX2)

In [11]:
c = alt.Chart(data).mark_point(opacity = 1, filled = True).encode(
    x = alt.X("time", axis = alt.Axis(title = "time (us)"), scale = alt.Scale(type = "log", domain = [1, 50000])),
    y = alt.Y("algorithm property", axis = alt.Axis(title = "algorithm", grid = True), sort = alt.EncodingSortField(field = "time")),
    color = "length:N",
    shape = "length:N"
).transform_filter(
    datum.alphabet == "protein"
).properties(
    width = 200,
    height = 150
)
save(c, "random_protein_bench.pdf")
c

Random DNA Sequences Benchmark (AVX2)

In [12]:
c = alt.Chart(data).mark_point(opacity = 1, filled = True).encode(
    x = alt.X("time", axis = alt.Axis(title = "time (us)"), scale = alt.Scale(type = "log", domain = [1, 50000])),
    y = alt.Y("algorithm property", axis = alt.Axis(title = "algorithm", grid = True), sort = alt.EncodingSortField(field = "time")),
    color = alt.Color("length:N", scale = alt.Scale(domain = [100, 1000, 10000])),
    shape = alt.Color("length:N", scale = alt.Scale(domain = [100, 1000, 10000]))
).transform_filter(
    datum.alphabet == "nucleotide"
).properties(
    width = 200,
    height = 50
)
save(c, "random_dna_bench.pdf")
c

## Uniclust 30 Data Benchmark

In [13]:
output = !cd .. && cargo run --example uc_bench --release --features simd_avx2 --quiet
output

['# time (s)',
 'algorithm, dataset, size, time',
 'ours (no trace), uc30, 32-32, 0.056466626',
 'ours (no trace), uc30 0.95, 32-32, 0.06183324',
 'ours (no trace), uc30, 32-256, 0.091996793',
 'ours (no trace), uc30 0.95, 32-256, 0.080232473',
 'ours (no trace), uc30, 256-256, 0.199835617',
 'ours (no trace), uc30 0.95, 256-256, 0.223796559',
 'ours (trace), uc30, 32-256, 0.172806891',
 'ours (trace), uc30 0.95, 32-256, 0.155278593',
 'parasail, uc30, full, 0.884179105',
 'parasail, uc30 0.95, full, 1.02361492']

In [14]:
data = csv_to_pandas(output)
data

Unnamed: 0,algorithm,dataset,size,time
0,ours (no trace),uc30,32-32,0.056467
1,ours (no trace),uc30 0.95,32-32,0.061833
2,ours (no trace),uc30,32-256,0.091997
3,ours (no trace),uc30 0.95,32-256,0.080232
4,ours (no trace),uc30,256-256,0.199836
5,ours (no trace),uc30 0.95,256-256,0.223797
6,ours (trace),uc30,32-256,0.172807
7,ours (trace),uc30 0.95,32-256,0.155279
8,parasail,uc30,full,0.884179
9,parasail,uc30 0.95,full,1.023615


Uniclust30 Benchmark (AVX2)

In [15]:
c = alt.Chart(data).mark_bar().encode(
    x = alt.X("algorithm", axis = None),
    y = alt.Y("time", axis = alt.Axis(title = "time (s)"), scale = alt.Scale(domain = [0.0, 1.0])),
    color = "algorithm"
).transform_filter(
    (datum.size == "32-256") | (datum.algorithm == "parasail")
)
t = c.mark_text(dy = -4, size = 7).encode(text = alt.Text("time", format = ".2"))
c = (c + t).properties(
    width = 50,
    height = 100
).facet(
    column = alt.Column("dataset", header = alt.Header(orient = "bottom")),
).configure_range(
    category = {"scheme": "dark2"}
)
save(c, "uniclust30_bench.pdf")
c

Uniclust30 Block Size Benchmark (AVX2)

In [16]:
c = alt.Chart(data).mark_bar().encode(
    x = alt.X("size", axis = None, sort = ["32-32", "32-256", "256-256"]),
    y = alt.Y("time", axis = alt.Axis(title = "time (s)"), scale = alt.Scale(domain = [0.0, 1.0])),
    color = alt.Color("size", sort = ["32-32", "32-256", "256-256"])
).transform_filter(
    datum.algorithm == "ours (no trace)"
)
t = c.mark_text(dy = -4, size = 7).encode(text = alt.Text("time", format = ".2"))
c = (c + t).properties(
    width = 50,
    height = 100
).facet(
    column = alt.Column("dataset", header = alt.Header(orient = "bottom")),
)
save(c, "uniclust30_size_bench.pdf")
c

## DNA Global Alignment Benchmark

In [17]:
output = !cd .. && cargo run --example nanopore_bench_global --release --features simd_avx2 --quiet
output

['# time (s)',
 'dataset, algorithm, time',
 'illumina, ours (32-32), 0.19313953199996806',
 'illumina, ours (32-32), 0.20005018599994973',
 'illumina, edlib, 0.37557256500000386',
 'illumina, wfa2, 0.027744283999997555',
 'illumina, wfa2 adaptive, 0.027990128999999708',
 'illumina, parasail, 2.1748713370000026',
 'nanopore 1kbp, ours (32-32), 0.20252610700000026',
 'nanopore 1kbp, ours (32-128), 0.24006039099999973',
 'nanopore 1kbp, edlib, 0.3721879099999994',
 'nanopore 1kbp, wfa2, 0.7648904769999965',
 'nanopore 1kbp, wfa2 adaptive, 0.7505688150000004',
 'nanopore 1kbp, parasail, 3.9451183440000004',
 'nanopore <10kbp, ours (128-128), 1.1177290729999971',
 'nanopore <10kbp, ours (128-1024), 1.304336278999998',
 'nanopore <10kbp, edlib, 1.3052689130000017',
 'nanopore <10kbp, wfa2, 7.684882509999996',
 'nanopore <10kbp, wfa2 adaptive, 3.43180103799999',
 'nanopore <10kbp, parasail, 30.325974206999955',
 'nanopore <50kbp, ours (512-512), 24.771258347999904',
 'nanopore <50kbp, ours (

In [18]:
data = csv_to_pandas(output)
data

Unnamed: 0,dataset,algorithm,time
0,illumina,ours (32-32),0.19314
1,illumina,ours (32-32),0.20005
2,illumina,edlib,0.375573
3,illumina,wfa2,0.027744
4,illumina,wfa2 adaptive,0.02799
5,illumina,parasail,2.174871
6,nanopore 1kbp,ours (32-32),0.202526
7,nanopore 1kbp,ours (32-128),0.24006
8,nanopore 1kbp,edlib,0.372188
9,nanopore 1kbp,wfa2,0.76489


In [19]:
data = data.drop_duplicates(["dataset", "algorithm"])

DNA Global Alignment Benchmark (AVX2)

In [20]:
algos = ["ours (32-32)", "ours (32-128)", "ours (128-128)", "ours (128-1024)", "ours (512-512)", "ours (512-4096)", "edlib", "wfa2", "wfa2 adaptive", "parasail"]
c2 = None
for d in ["illumina", "nanopore 1kbp", "nanopore <10kbp", "nanopore <50kbp"]:
    c = alt.Chart(data.loc[data["dataset"] == d]).mark_bar().encode(
        x = alt.X("algorithm", sort = algos, title = None),
        y = alt.Y("time", axis = alt.Axis(title = "time (s)")),
        color = alt.Color("dataset", legend = None)
    )
    t = c.mark_text(dy = -4, size = 7).encode(text = alt.Text("time", format = ".2"))
    c = (c + t).properties(
        width = 100,
        height = 150
    ).facet(
        column = alt.Column("dataset", title = None, header = alt.Header(orient = "bottom"))
    )
    c2 = (c2 | c) if c2 else c
save(c2, "dna_global_bench.pdf")
c2

## Nanopore Data Benchmark Setup

To run the benchmarks below, you need to clone the following repos, place them in the same directory where this repo (block aligner) is located, and follow their setup instructions:
* [diff-bench-paper](https://github.com/Daniel-Liu-c0deb0t/diff-bench-paper)
* [adaptivebandbench](https://github.com/Daniel-Liu-c0deb0t/adaptivebandbench)

## Nanopore Data Benchmark

In [21]:
output = !cd .. && cargo run --example nanopore_bench --release --features simd_avx2 --quiet
output

['# time (s)',
 'algorithm, dataset, time',
 'ours (no trace 32-32), nanopore 25kbp, 1.037147543',
 'ours (no trace 32-32), random, 2.588213908',
 'ours (trace 32-32), nanopore 25kbp, 1.435873778',
 'ours (trace 32-32), random, 3.512781687',
 'ours (trace 32-64), nanopore 25kbp, 1.7466684369999999',
 'ours (trace 32-64), random, 3.770565065']

In [22]:
data = csv_to_pandas(output)
data

Unnamed: 0,algorithm,dataset,time
0,ours (no trace 32-32),nanopore 25kbp,1.037148
1,ours (no trace 32-32),random,2.588214
2,ours (trace 32-32),nanopore 25kbp,1.435874
3,ours (trace 32-32),random,3.512782
4,ours (trace 32-64),nanopore 25kbp,1.746668
5,ours (trace 32-64),random,3.770565


In [23]:
output2 = !cd ../../diff-bench-paper/supplementary_data/benchmark_codes && ./custom_bench.sh

for i, o in enumerate(output2):
    if o.startswith("cells("):
        break
output2 = output2[i + 1:]

output2.insert(0, "algorithm\tfill time\ttrace time\tconvert time\ttotal time\tscore\tfail")
output2

['algorithm\tfill time\ttrace time\tconvert time\ttotal time\tscore\tfail',
 'editdist\t501718000\t179593000\t70828000\t752139000\t6880489\t0',
 'non-diff\t714455000\t294347000\t64626000\t1073428000\t27124786\t52',
 'diff-raw\t652971000\t229122000\t66711000\t948804000\t27291141\t32',
 'libgaba\t469139000\t162786000\t33930000\t665855000\t27121546\t53',
 'edlib\t29745290000\t20522605000\t113502000\t50381397000\t37\t0',
 'seqan\t93122235000\t0\t0\t93122235000\t0\t0']

In [24]:
data2 = csv_to_pandas(output2, d = "\t")
data2

Unnamed: 0,algorithm,fill time,trace time,convert time,total time,score,fail
0,editdist,501718000,179593000,70828000,752139000,6880489,0
1,non-diff,714455000,294347000,64626000,1073428000,27124786,52
2,diff-raw,652971000,229122000,66711000,948804000,27291141,32
3,libgaba,469139000,162786000,33930000,665855000,27121546,53
4,edlib,29745290000,20522605000,113502000,50381397000,37,0
5,seqan,93122235000,0,0,93122235000,0,0


In [25]:
cleaned2 = data2.drop(columns = ["trace time", "convert time", "total time", "score", "fail"])
cleaned2 = cleaned2.rename(columns = {"fill time": "time"})
cleaned2["time"] /= 1e9
cleaned2

Unnamed: 0,algorithm,time
0,editdist,0.501718
1,non-diff,0.714455
2,diff-raw,0.652971
3,libgaba,0.469139
4,edlib,29.74529
5,seqan,93.122235


In [26]:
cleaned = data.drop(index = [1, 3, 5])
cleaned = cleaned.drop(columns = ["dataset"])
cleaned = cleaned.append(cleaned2, ignore_index = True)
cleaned

Unnamed: 0,algorithm,time
0,ours (no trace 32-32),1.037148
1,ours (trace 32-32),1.435874
2,ours (trace 32-64),1.746668
3,editdist,0.501718
4,non-diff,0.714455
5,diff-raw,0.652971
6,libgaba,0.469139
7,edlib,29.74529
8,seqan,93.122235


25kbp Nanopore Reads Benchmark (AVX2)

In [27]:
chart1 = alt.Chart(cleaned).mark_point(opacity = 1, filled = True).encode(
    x = alt.X("time", axis = alt.Axis(title = "time (s)", grid = True), scale = alt.Scale(type = "log")),
    y = alt.Y("algorithm", axis = alt.Axis(grid = True), sort = alt.EncodingSortField(field = "time"))
).transform_filter((datum.algorithm != "ours (trace 32-32)") & (datum.algorithm != "ours (no trace 32-32)") & (datum.algorithm != "ours (trace 32-64)"))

chart2 = alt.Chart(cleaned).mark_point(color = "red", filled = True).encode(
    x = alt.X("time", axis = alt.Axis(title = "time (s)", grid = True), scale = alt.Scale(type = "log")),
    y = alt.Y("algorithm", axis = alt.Axis(grid = True), sort = alt.EncodingSortField(field = "time"))
).transform_filter((datum.algorithm == "ours (trace 32-32)") | (datum.algorithm == "ours (no trace 32-32)") | (datum.algorithm == "ours (trace 32-64)"))

c = (chart1 + chart2).properties(
    width = 150,
    height = 150
)
save(c, "nanopore_bench.pdf")
c

## Sequence-to-Profile Alignment Benchmark

In [28]:
output = !cd .. && cargo run --example pssm_bench --release --features simd_avx2 --quiet
output

['size, time',
 '32-32, 0.129455766',
 '32-64, 0.165505947',
 '32-128, 0.205791073',
 '128-128, 0.221064389',
 'parasail, 0.604619803',
 '# Done!']

In [29]:
data = csv_to_pandas(output)
data

Unnamed: 0,size,time
0,32-32,0.129456
1,32-64,0.165506
2,32-128,0.205791
3,128-128,0.221064
4,parasail,0.60462


SCOP Sequence-to-Profile Alignment Benchmark (AVX2)

In [30]:
c = alt.Chart(data).mark_bar().encode(
    x = alt.X("size", sort = ["32-32", "32-64", "32-128", "128-128", "parasail"]),
    y = alt.Y("time", axis = alt.Axis(title = "time (s)")),
    color = alt.Color("size", sort = ["32-32", "32-64", "32-128", "128-128", "parasail"], legend = None)
).transform_filter(
    datum.size != "2048-2048"
).properties(
    width = 75,
    height = 100
)
t = c.mark_text(dy = -4, size = 7).encode(text = alt.Text("time", format = ".2"))
c = c + t
save(c, "pssm_size_bench.pdf")
c

## WASM SIMD

[Wasmtime](https://wasmtime.dev/) is needed to run the webassembly code.

In [31]:
output = !CARGO_TARGET_WASM32_WASI_RUNNER="wasmtime --wasm-features simd --" cargo bench --target=wasm32-wasi --features simd_wasm --quiet -- --nocapture | grep 'bench:' | awk '{print $2"\t"$5}'
output

['bench_rustbio_aa_100_1000\t25,135,980',
 'bench_rustbio_aa_10_100\t261,746',
 'bench_scan_aa_1000_10000\t781,174',
 'bench_scan_aa_1000_10000_insert\t22,903,757',
 'bench_scan_aa_1000_10000_small\t579,988',
 'bench_scan_aa_1000_10000_trace\t2,039,430',
 'bench_scan_aa_100_1000\t74,346',
 'bench_scan_aa_100_1000_insert\t156,228',
 'bench_scan_aa_100_1000_small\t56,907',
 'bench_scan_aa_100_1000_trace\t234,039',
 'bench_scan_aa_10_100\t6,843',
 'bench_scan_aa_10_100_insert\t7,115',
 'bench_scan_aa_10_100_small\t5,674',
 'bench_scan_aa_10_100_trace\t90,384',
 'bench_scan_nuc_1000_10000\t585,283',
 'bench_scan_nuc_100_1000\t59,274']

In [32]:
cleaned = ["algorithm\talphabet\tk\tlength\tproperty\ttime"]
names = ["rustbio_aa", "scan_aa", "scan_nuc"]
new_names = ["rust bio\tprotein", "ours\tprotein", "ours\tnucleotide"]

for o in output:
    o = o[len("bench_"):]
    for n, nn in zip(names, new_names):
        if o.startswith(n):
            suffix = o[len(n):].replace("_", "\t")
            o = nn + suffix
            break
    if len(o.split("\t")) < len(cleaned[0].split("\t")):
        insert_idx = o.rindex("\t")
        o = o[:insert_idx] + "\tdefault" + o[insert_idx:]
    cleaned.append(o)

cleaned

['algorithm\talphabet\tk\tlength\tproperty\ttime',
 'rust bio\tprotein\t100\t1000\tdefault\t25,135,980',
 'rust bio\tprotein\t10\t100\tdefault\t261,746',
 'ours\tprotein\t1000\t10000\tdefault\t781,174',
 'ours\tprotein\t1000\t10000\tinsert\t22,903,757',
 'ours\tprotein\t1000\t10000\tsmall\t579,988',
 'ours\tprotein\t1000\t10000\ttrace\t2,039,430',
 'ours\tprotein\t100\t1000\tdefault\t74,346',
 'ours\tprotein\t100\t1000\tinsert\t156,228',
 'ours\tprotein\t100\t1000\tsmall\t56,907',
 'ours\tprotein\t100\t1000\ttrace\t234,039',
 'ours\tprotein\t10\t100\tdefault\t6,843',
 'ours\tprotein\t10\t100\tinsert\t7,115',
 'ours\tprotein\t10\t100\tsmall\t5,674',
 'ours\tprotein\t10\t100\ttrace\t90,384',
 'ours\tnucleotide\t1000\t10000\tdefault\t585,283',
 'ours\tnucleotide\t100\t1000\tdefault\t59,274']

In [33]:
data = csv_to_pandas(cleaned, d = "\t", t = ",")
data

Unnamed: 0,algorithm,alphabet,k,length,property,time
0,rust bio,protein,100,1000,default,25135980
1,rust bio,protein,10,100,default,261746
2,ours,protein,1000,10000,default,781174
3,ours,protein,1000,10000,insert,22903757
4,ours,protein,1000,10000,small,579988
5,ours,protein,1000,10000,trace,2039430
6,ours,protein,100,1000,default,74346
7,ours,protein,100,1000,insert,156228
8,ours,protein,100,1000,small,56907
9,ours,protein,100,1000,trace,234039


In [34]:
data["algorithm property"] = data["algorithm"] + " " + data["property"]
data["time"] /= 1000

Random Protein Sequences Benchmark (WASM SIMD)

In [35]:
c = alt.Chart(data).mark_point(opacity = 1, filled = True).encode(
    x = alt.X("time", axis = alt.Axis(title = "time (us)"), scale = alt.Scale(type = "log")),
    y = alt.Y("algorithm property", axis = alt.Axis(title = "algorithm", grid = True), sort = alt.EncodingSortField(field = "time")),
    color = "length:N",
    shape = "length:N"
).transform_filter(
    datum.alphabet == "protein"
).properties(
    width = 200,
    height = 150
)
save(c, "random_protein_bench_wasm.pdf")
c