# All-in-one runner

This notebook runs the two notebooks sequentially in separate processes.

In [2]:
import time
import nbformat
from nbclient import NotebookClient

def run_notebook(path):
    print(f'==> START: {path}')
    start = time.time()
    nb = nbformat.read(path, as_version=4)
    client = NotebookClient(nb, timeout=None)
    total = len(nb.cells)
    with client.setup_kernel():
        for idx, cell in enumerate(nb.cells):
            if cell.cell_type != 'code':
                continue
            print(f'  -> Cell {idx + 1}/{total}')
            client.execute_cell(cell, idx)
    nbformat.write(nb, path)
    elapsed = time.time() - start
    print(f'<== DONE: {path} ({elapsed:.1f}s)')

run_notebook('sequence_generation.ipynb')
run_notebook('embedding_and_visualization.ipynb')


==> START: sequence_generation.ipynb
  -> Cell 2/14
  -> Cell 4/14
  -> Cell 6/14
  -> Cell 7/14
  -> Cell 9/14
  -> Cell 10/14
  -> Cell 11/14
  -> Cell 12/14
  -> Cell 14/14
<== DONE: sequence_generation.ipynb (1431.4s)
==> START: embedding_and_visualization.ipynb
  -> Cell 2/26
  -> Cell 4/26
  -> Cell 7/26
  -> Cell 9/26
  -> Cell 10/26
  -> Cell 14/26
  -> Cell 16/26
  -> Cell 19/26
  -> Cell 20/26
  -> Cell 21/26


CellExecutionError: An error occurred while executing the following cell:
------------------
from visualization import _plot_final_similarity_raincloud_on_ax

model_labels = list(all_embeddings.keys())

# ===== 1) Coding vs Non-coding (side-by-side) =====
n_models = len(model_labels)
fig = plt.figure(figsize=(6.8 * n_models, 4.6))
grid = fig.add_gridspec(1, n_models, wspace=0.25)

for idx, model_label in enumerate(model_labels):
    ax = fig.add_subplot(grid[0, idx])
    embeddings_dict = all_embeddings.get(model_label, {})
    palette = {"Coding": "#4C78A8", "Non-coding": "#F58518"}
    _plot_final_similarity_raincloud_on_ax(
        ax,
        embeddings_dict,
        category_map=gene_type_map,
        category_label="Gene Type",
        order=["Coding", "Non-coding"],
        strategies="sampling_t1.0",
        model_label=model_label,
        palette=palette,
        show_ylabel=(idx == 0),
    )
    y_labels = ["Coding", "Non-coding"]
    ax.set_yticks(range(len(y_labels)))
    ax.set_yticklabels(y_labels)
fig.suptitle("Final Similarity by Gene Type", y=1.02)
plt.show()

# ===== 2) Real vs Pseudogene (side-by-side) =====
n_models = len(model_labels)
fig = plt.figure(figsize=(6.8 * n_models, 4.6))
grid = fig.add_gridspec(1, n_models, wspace=0.25)

for idx, model_label in enumerate(model_labels):
    ax = fig.add_subplot(grid[0, idx])
    embeddings_dict = all_embeddings.get(model_label, {})
    palette = {"Real": "#54A24B", "Pseudogene": "#E45756"}
    _plot_final_similarity_raincloud_on_ax(
        ax,
        embeddings_dict,
        category_map=gene_status_map,
        category_label="Gene Status",
        order=["Real", "Pseudogene"],
        strategies="sampling_t1.0",
        model_label=model_label,
        palette=palette,
        show_ylabel=(idx == 0),
        gene_type_map=gene_type_map,
        type_filter=None,
    )
    y_labels = ["Real", "Pseudogene"]
    ax.set_yticks(range(len(y_labels)))
    ax.set_yticklabels(y_labels)
fig.suptitle("Final Similarity by Gene Status", y=1.02)
plt.show()

# ===== 2) Real vs Pseudogene (side-by-side) =====
n_models = len(model_labels)
fig = plt.figure(figsize=(6.8 * n_models, 4.6))
grid = fig.add_gridspec(1, n_models, wspace=0.25)

for idx, model_label in enumerate(model_labels):
    ax = fig.add_subplot(grid[0, idx])
    embeddings_dict = all_embeddings.get(model_label, {})
    palette = {"Real": "#54A24B", "Pseudogene": "#E45756"}
    _plot_final_similarity_raincloud_on_ax(
        ax,
        embeddings_dict,
        category_map=gene_status_map,
        category_label="Gene Status",
        order=["Real", "Pseudogene"],
        strategies="sampling_t1.0",
        model_label=model_label,
        palette=palette,
        show_ylabel=(idx == 0),
        gene_type_map=gene_type_map,
        type_filter="Non-coding",
    )
    y_labels = ["Real", "Pseudogene"]
    ax.set_yticks(range(len(y_labels)))
    ax.set_yticklabels(y_labels)
fig.suptitle("Final Similarity by Gene Status", y=1.02)
plt.show()
------------------


[0;31m---------------------------------------------------------------------------[0m
[0;31mValueError[0m                                Traceback (most recent call last)
Cell [0;32mIn[10], line 40[0m
[1;32m     38[0m embeddings_dict [38;5;241m=[39m all_embeddings[38;5;241m.[39mget(model_label, {})
[1;32m     39[0m palette [38;5;241m=[39m {[38;5;124m"[39m[38;5;124mReal[39m[38;5;124m"[39m: [38;5;124m"[39m[38;5;124m#54A24B[39m[38;5;124m"[39m, [38;5;124m"[39m[38;5;124mPseudogene[39m[38;5;124m"[39m: [38;5;124m"[39m[38;5;124m#E45756[39m[38;5;124m"[39m}
[0;32m---> 40[0m [43m_plot_final_similarity_raincloud_on_ax[49m[43m([49m
[1;32m     41[0m [43m    [49m[43max[49m[43m,[49m
[1;32m     42[0m [43m    [49m[43membeddings_dict[49m[43m,[49m
[1;32m     43[0m [43m    [49m[43mcategory_map[49m[38;5;241;43m=[39;49m[43mgene_status_map[49m[43m,[49m
[1;32m     44[0m [43m    [49m[43mcategory_label[49m[38;5;241;43m=[39;49m[38;5;124;43m"[39;49m[38;5;124;43mGene Status[39;49m[38;5;124;43m"[39;49m[43m,[49m
[1;32m     45[0m [43m    [49m[43morder[49m[38;5;241;43m=[39;49m[43m[[49m[38;5;124;43m"[39;49m[38;5;124;43mReal[39;49m[38;5;124;43m"[39;49m[43m,[49m[43m [49m[38;5;124;43m"[39;49m[38;5;124;43mPseudogene[39;49m[38;5;124;43m"[39;49m[43m][49m[43m,[49m
[1;32m     46[0m [43m    [49m[43mstrategies[49m[38;5;241;43m=[39;49m[38;5;124;43m"[39;49m[38;5;124;43msampling_t1.0[39;49m[38;5;124;43m"[39;49m[43m,[49m
[1;32m     47[0m [43m    [49m[43mmodel_label[49m[38;5;241;43m=[39;49m[43mmodel_label[49m[43m,[49m
[1;32m     48[0m [43m    [49m[43mpalette[49m[38;5;241;43m=[39;49m[43mpalette[49m[43m,[49m
[1;32m     49[0m [43m    [49m[43mshow_ylabel[49m[38;5;241;43m=[39;49m[43m([49m[43midx[49m[43m [49m[38;5;241;43m==[39;49m[43m [49m[38;5;241;43m0[39;49m[43m)[49m[43m,[49m
[1;32m     50[0m [43m    [49m[43mgene_type_map[49m[38;5;241;43m=[39;49m[43mgene_type_map[49m[43m,[49m
[1;32m     51[0m [43m    [49m[43mtype_filter[49m[38;5;241;43m=[39;49m[38;5;28;43;01mNone[39;49;00m[43m,[49m
[1;32m     52[0m [43m[49m[43m)[49m
[1;32m     53[0m y_labels [38;5;241m=[39m [[38;5;124m"[39m[38;5;124mReal[39m[38;5;124m"[39m, [38;5;124m"[39m[38;5;124mPseudogene[39m[38;5;124m"[39m]
[1;32m     54[0m ax[38;5;241m.[39mset_yticks([38;5;28mrange[39m([38;5;28mlen[39m(y_labels)))

File [0;32m~/Desktop/01_Projects/iterative reconstruction of DNA sequence/iterative-dna-reconstruction/visualization.py:875[0m, in [0;36m_plot_final_similarity_raincloud_on_ax[0;34m(ax, embeddings_dict, category_map, category_label, order, strategies, model_label, palette, show_ylabel, gene_type_map, type_filter)[0m
[1;32m    872[0m     palette [38;5;241m=[39m {}
[1;32m    874[0m before [38;5;241m=[39m [38;5;28mlen[39m(ax[38;5;241m.[39mcollections)
[0;32m--> 875[0m [43msns[49m[38;5;241;43m.[39;49m[43mviolinplot[49m[43m([49m
[1;32m    876[0m [43m    [49m[43mdata[49m[38;5;241;43m=[39;49m[43mdf[49m[43m,[49m
[1;32m    877[0m [43m    [49m[43mx[49m[38;5;241;43m=[39;49m[38;5;124;43m"[39;49m[38;5;124;43mFinalSimilarity[39;49m[38;5;124;43m"[39;49m[43m,[49m
[1;32m    878[0m [43m    [49m[43my[49m[38;5;241;43m=[39;49m[38;5;124;43m"[39;49m[38;5;124;43mCategory[39;49m[38;5;124;43m"[39;49m[43m,[49m
[1;32m    879[0m [43m    [49m[43mhue[49m[38;5;241;43m=[39;49m[38;5;124;43m"[39;49m[38;5;124;43mCategory[39;49m[38;5;124;43m"[39;49m[43m,[49m
[1;32m    880[0m [43m    [49m[43morder[49m[38;5;241;43m=[39;49m[43morder[49m[43m,[49m
[1;32m    881[0m [43m    [49m[43mpalette[49m[38;5;241;43m=[39;49m[43mpalette[49m[43m,[49m
[1;32m    882[0m [43m    [49m[43minner[49m[38;5;241;43m=[39;49m[38;5;28;43;01mNone[39;49;00m[43m,[49m
[1;32m    883[0m [43m    [49m[43mcut[49m[38;5;241;43m=[39;49m[38;5;241;43m0[39;49m[43m,[49m
[1;32m    884[0m [43m    [49m[43mlinewidth[49m[38;5;241;43m=[39;49m[38;5;241;43m1.0[39;49m[43m,[49m
[1;32m    885[0m [43m    [49m[43max[49m[38;5;241;43m=[39;49m[43max[49m[43m,[49m
[1;32m    886[0m [43m    [49m[43mlegend[49m[38;5;241;43m=[39;49m[38;5;28;43;01mFalse[39;49;00m[43m,[49m
[1;32m    887[0m [43m[49m[43m)[49m
[1;32m    888[0m [38;5;28;01mfor[39;00m artist [38;5;129;01min[39;00m ax[38;5;241m.[39mcollections[before:]:
[1;32m    889[0m     artist[38;5;241m.[39mset_alpha([38;5;241m0.4[39m)

File [0;32m/opt/homebrew/Cellar/micromamba/envs/dna-fm/lib/python3.10/site-packages/seaborn/categorical.py:1754[0m, in [0;36mviolinplot[0;34m(data, x, y, hue, order, hue_order, orient, color, palette, saturation, fill, inner, split, width, dodge, gap, linewidth, linecolor, cut, gridsize, bw_method, bw_adjust, density_norm, common_norm, hue_norm, formatter, log_scale, native_scale, legend, scale, scale_hue, bw, inner_kws, ax, **kwargs)[0m
[1;32m   1751[0m palette, hue_order [38;5;241m=[39m p[38;5;241m.[39m_hue_backcompat(color, palette, hue_order)
[1;32m   1753[0m saturation [38;5;241m=[39m saturation [38;5;28;01mif[39;00m fill [38;5;28;01melse[39;00m [38;5;241m1[39m
[0;32m-> 1754[0m [43mp[49m[38;5;241;43m.[39;49m[43mmap_hue[49m[43m([49m[43mpalette[49m[38;5;241;43m=[39;49m[43mpalette[49m[43m,[49m[43m [49m[43morder[49m[38;5;241;43m=[39;49m[43mhue_order[49m[43m,[49m[43m [49m[43mnorm[49m[38;5;241;43m=[39;49m[43mhue_norm[49m[43m,[49m[43m [49m[43msaturation[49m[38;5;241;43m=[39;49m[43msaturation[49m[43m)[49m
[1;32m   1755[0m color [38;5;241m=[39m _default_color(
[1;32m   1756[0m     ax[38;5;241m.[39mfill_between, hue, color,
[1;32m   1757[0m     {k: v [38;5;28;01mfor[39;00m k, v [38;5;129;01min[39;00m kwargs[38;5;241m.[39mitems() [38;5;28;01mif[39;00m k [38;5;129;01min[39;00m [[38;5;124m"[39m[38;5;124mc[39m[38;5;124m"[39m, [38;5;124m"[39m[38;5;124mcolor[39m[38;5;124m"[39m, [38;5;124m"[39m[38;5;124mfc[39m[38;5;124m"[39m, [38;5;124m"[39m[38;5;124mfacecolor[39m[38;5;124m"[39m]},
[1;32m   1758[0m     saturation[38;5;241m=[39msaturation,
[1;32m   1759[0m )
[1;32m   1760[0m linecolor [38;5;241m=[39m p[38;5;241m.[39m_complement_color(linecolor, color, p[38;5;241m.[39m_hue_map)

File [0;32m/opt/homebrew/Cellar/micromamba/envs/dna-fm/lib/python3.10/site-packages/seaborn/_base.py:838[0m, in [0;36mVectorPlotter.map_hue[0;34m(self, palette, order, norm, saturation)[0m
[1;32m    837[0m [38;5;28;01mdef[39;00m[38;5;250m [39m[38;5;21mmap_hue[39m([38;5;28mself[39m, palette[38;5;241m=[39m[38;5;28;01mNone[39;00m, order[38;5;241m=[39m[38;5;28;01mNone[39;00m, norm[38;5;241m=[39m[38;5;28;01mNone[39;00m, saturation[38;5;241m=[39m[38;5;241m1[39m):
[0;32m--> 838[0m     mapping [38;5;241m=[39m [43mHueMapping[49m[43m([49m[38;5;28;43mself[39;49m[43m,[49m[43m [49m[43mpalette[49m[43m,[49m[43m [49m[43morder[49m[43m,[49m[43m [49m[43mnorm[49m[43m,[49m[43m [49m[43msaturation[49m[43m)[49m
[1;32m    839[0m     [38;5;28mself[39m[38;5;241m.[39m_hue_map [38;5;241m=[39m mapping

File [0;32m/opt/homebrew/Cellar/micromamba/envs/dna-fm/lib/python3.10/site-packages/seaborn/_base.py:150[0m, in [0;36mHueMapping.__init__[0;34m(self, plotter, palette, order, norm, saturation)[0m
[1;32m    147[0m [38;5;28;01melif[39;00m map_type [38;5;241m==[39m [38;5;124m"[39m[38;5;124mcategorical[39m[38;5;124m"[39m:
[1;32m    149[0m     cmap [38;5;241m=[39m norm [38;5;241m=[39m [38;5;28;01mNone[39;00m
[0;32m--> 150[0m     levels, lookup_table [38;5;241m=[39m [38;5;28;43mself[39;49m[38;5;241;43m.[39;49m[43mcategorical_mapping[49m[43m([49m
[1;32m    151[0m [43m        [49m[43mdata[49m[43m,[49m[43m [49m[43mpalette[49m[43m,[49m[43m [49m[43morder[49m[43m,[49m
[1;32m    152[0m [43m    [49m[43m)[49m
[1;32m    154[0m [38;5;66;03m# --- Option 3: datetime mapping[39;00m
[1;32m    155[0m 
[1;32m    156[0m [38;5;28;01melse[39;00m:
[1;32m    157[0m     [38;5;66;03m# TODO this needs actual implementation[39;00m
[1;32m    158[0m     cmap [38;5;241m=[39m norm [38;5;241m=[39m [38;5;28;01mNone[39;00m

File [0;32m/opt/homebrew/Cellar/micromamba/envs/dna-fm/lib/python3.10/site-packages/seaborn/_base.py:234[0m, in [0;36mHueMapping.categorical_mapping[0;34m(self, data, palette, order)[0m
[1;32m    232[0m     [38;5;28;01mif[39;00m [38;5;28many[39m(missing):
[1;32m    233[0m         err [38;5;241m=[39m [38;5;124m"[39m[38;5;124mThe palette dictionary is missing keys: [39m[38;5;132;01m{}[39;00m[38;5;124m"[39m
[0;32m--> 234[0m         [38;5;28;01mraise[39;00m [38;5;167;01mValueError[39;00m(err[38;5;241m.[39mformat(missing))
[1;32m    236[0m     lookup_table [38;5;241m=[39m palette
[1;32m    238[0m [38;5;28;01melse[39;00m:

[0;31mValueError[0m: The palette dictionary is missing keys: {'Pseudo'}
