In [None]:
import pandas as pd
from os import sys
sys.path.append('../src/')

from utils.utils import populate_training_metrics
from utils.make_graphs import plot_batch_means, plot_topology_3d

# Paths to your JSONL files
train_json = "../data/interim/cd98_10k/cd98_biophi_1024.jsonl"
sim_json_low   = "../data/processed/cd98_10k/cd98_10k_v3/cd98_final_1024_low.jsonl"
sim_json_high = "../data/processed/cd98_10k/cd98_10k_v1/cd98_final_1024_high.jsonl"

In [None]:
# 1) Populate missing training metrics
complete_df_low = populate_training_metrics(
   train_jsonl=train_json,
   sim_jsonl=sim_json_low,
   output_jsonl="../data/processed/cd98_10k/cd98_low.jsonl"
)
print(f"Loaded cd98_low.jsonl with {len(complete_df_low)} records.")
display(complete_df_low.head())

In [None]:
# 1) Populate missing training metrics
complete_df_high = populate_training_metrics(
   train_jsonl=train_json,
   sim_jsonl=sim_json_high,
   output_jsonl="../data/processed/cd98_10k/cd98_high.jsonl"
)
print(f"Loaded cd98_high.jsonl with {len(complete_df_high)} records.")
display(complete_df_high.head())

In [None]:
# 2) Line plot of batch means
plot_batch_means(
   jsonl_paths=["../data/processed/cd98_10k/cd98_high.jsonl", "../data/processed/cd98_10k/cd98_low.jsonl",],
   metric="y_high",
   labels=["20 Iterations, 10 Sequences Batches", "4 Iterations, 50 Sequences Batches"],
   title="Mean In-Silico Fitness Across Rounds Bayesian Optimization",
   xlabel="BO Iteration",
   ylabel="Mean Fitness",
   save_path=None   # or "batch_means.png"
)

In [None]:
plot_topology_3d(
   jsonl_path="../data/processed/cd98_10k/cd98_low.jsonl",
   reduce_method="umap",       # or "tsne"
   grid_size=60,
   cmap_name="Reds",
   elev=90,
   azim=180,
   connect_line_color="red",
   connect_line_width=2.5,
   save_path=None
)

In [None]:
plot_topology_3d(
   jsonl_path="../data/processed/cd98_10k/cd98_low.jsonl",
   reduce_method="umap",       # or "tsne"
   grid_size=60,
   cmap_name="Reds",
   elev=90,
   azim=180,
   connect_line_color="red",
   connect_line_width=2.5,
   save_path=None
)

In [None]:
plot_topology_3d(
   jsonl_path="../data/processed/cd98_10k/cd98_high.jsonl",
   reduce_method="umap",       # or "tsne"
   grid_size=60,
   cmap_name="Reds",
   elev=5,
   azim=135,
   connect_line_color="red",
   connect_line_width=2.5,
   save_path=None
)

In [None]:
plot_topology_3d(
   jsonl_path="../data/processed/cd98_10k/cd98_high.jsonl",
   reduce_method="umap",       # or "tsne"
   metric="y_high",
   grid_size=60,
   cmap_name="Reds",
   elev=90,
   azim=180,
   connect_line_color="red",
   connect_line_width=2.5,
   save_path=None
)


In [None]:
# 2) Line plot of batch means
plot_batch_means(
   jsonl_paths=["../data/processed/cd98_10k/cd98_high.jsonl", "../data/processed/cd98_10k/cd98_low.jsonl",],
   metric="dev_score",
   labels=["20 Iterations, 10 Sequences Batches", "4 Iterations, 50 Sequences Batches"],
   title="Mean Dev_Score Across Rounds Bayesian Optimization",
   xlabel="BO Iteration",
   ylabel="Developability Score",
   save_path=None   # or "batch_means.png"
)

In [None]:
plot_topology_3d(
   jsonl_path="../data/processed/cd98_10k/cd98_low.jsonl",
   reduce_method="umap",       # or "tsne"
   metric="dev_score",
   grid_size=60,
   cmap_name="Reds",
   elev=30,
   azim=135,
   connect_line_color="red",
   connect_line_width=2.5,
   save_path=None
)

In [None]:
plot_topology_3d(
   jsonl_path="../data/processed/cd98_10k/cd98_low.jsonl",
   reduce_method="umap",       # or "tsne"
   metric="dev_score",
   grid_size=60,
   cmap_name="Reds",
   elev=0,
   azim=135,
   connect_line_color="red",
   connect_line_width=2.5,
   save_path=None
)

In [None]:
plot_topology_3d(
   jsonl_path="../data/processed/cd98_10k/cd98_high.jsonl",
   reduce_method="umap",       # or "tsne"
   metric="dev_score",
   grid_size=60,
   cmap_name="Reds",
   elev=40,
   azim=135,
   connect_line_color="red",
   connect_line_width=2.5,
   save_path=None
)

In [None]:
plot_topology_3d(
   jsonl_path="../data/processed/cd98_10k/cd98_high.jsonl",
   reduce_method="umap",       # or "tsne"
   metric="dev_score",
   grid_size=60,
   cmap_name="Reds",
   elev=5,
   azim=135,
   connect_line_color="red",
   connect_line_width=2.5,
   save_path=None
)