In [1]:
import pickle

# Path to your checkpoint file
file_path = "checkpoint.pkl"

# Safely load the checkpoint
with open(file_path, "rb") as f:
    checkpoint = pickle.load(f)

# Display top-level keys or structure
print("Keys or attributes in checkpoint:")
if isinstance(checkpoint, dict):
    for key in checkpoint.keys():
        print(" -", key)
else:
    print("Object type:", type(checkpoint))
    print(checkpoint)

Detected IPython. Loading juliacall extension. See https://juliapy.github.io/PythonCall.jl/stable/compat/#IPython
Keys or attributes in checkpoint:
Object type: <class 'pysr.sr.PySRRegressor'>
PySRRegressor.equations_ = [
	    pick     score                                           equation  \
	0         0.000000                                        0.102472365   
	1         0.145637                                   0.015495526 / x2   
	2         0.038410                    0.009915686 * (x2 ^ -1.2391542)   
	3         0.079136                 (0.03346586 / sin(x2)) - 0.1305686   
	4         1.398780            sin(x1 * 7.086507) * (x0 + -0.42030776)   
	5         0.076918         sin(x1 * 7.086507) * sin(x0 + -0.42030776)   
	6         1.243171  ((x0 + -0.35920104) * sin(x1 * 6.833488)) - -0...   
	7         0.032931  ((x0 + -0.4816545) * sin(x1 * 7.3092513)) / (c...   
	8   >>>>  0.785648  ((sin(x1 * 6.842869) * (x0 + -0.36660212)) / (...   
	9         0.004631  (sin(6.8392644 * 

In [7]:
import pickle
from pathlib import Path

# Optional, only needed for the DOCX export part at the end
try:
    from docx import Document
    HAVE_DOCX = True
except Exception:
    HAVE_DOCX = False

pkl_path = "checkpoint.pkl"

with open(pkl_path, "rb") as f:
    model = pickle.load(f)

print("Loaded object type,", type(model))

# PySRRegressor specific helpers
def safe_col(df, name, default=None):
    return df[name] if name in df.columns else default

if hasattr(model, "equations_"):
    eq_df = model.equations_.copy()
    # Normalize likely column names across PySR versions
    # Common possibilities, equation or expression or sympy_format
    if "equation" in eq_df.columns:
        eq_df.rename(columns={"equation": "expression"}, inplace=True)
    if "sympy_format" in eq_df.columns and "expression" not in eq_df.columns:
        eq_df.rename(columns={"sympy_format": "expression"}, inplace=True)
    if "score" in eq_df.columns and "r2" not in eq_df.columns:
        eq_df.rename(columns={"score": "r2"}, inplace=True)

    # Keep a compact view
    keep_cols = [c for c in ["rank", "expression", "loss", "r2", "complexity", "size"] if c in eq_df.columns]
    view = eq_df[keep_cols].sort_values(by=keep_cols[0] if "rank" in keep_cols else "loss").reset_index(drop=True)

    print("\nTop equations, compact view,")
    print(view.head(10).to_string(index=False))

    # Best equation
    best_row = view.iloc[0]
    print("\nBest equation,")
    print("expression,", best_row.get("expression", "n,a"))
    print("loss,", best_row.get("loss", "n,a"))
    print("r2,", best_row.get("r2", "n,a"))

    # Save to CSV for publication
    out_csv = Path("symbolic_equations_hall_of_fame.csv")
    view.to_csv(out_csv, index=False)
    print(f"\nSaved, {out_csv.resolve()}")

    # Optional, also save a small TXT summary
    with open("best_equation.txt", "w", encoding="utf8") as w:
        w.write("Best symbolic equation\n")
        w.write(f"expression, {best_row.get('expression','n,a')}\n")
        w.write(f"loss, {best_row.get('loss','n,a')}\n")
        w.write(f"r2, {best_row.get('r2','n,a')}\n")
    print("Saved, best_equation.txt")

    # Optional, export a DOCX table
    if HAVE_DOCX:
        doc = Document()
        doc.add_heading("Top ranked symbolic regression equations", level=1)
        table = doc.add_table(rows=1 + min(15, len(view)), cols=3)
        hdr = table.rows[0].cells
        hdr[0].text = "Equation"
        hdr[1].text = "R²"
        hdr[2].text = "Loss"

        for i, row in view.head(15).iterrows():
            cells = table.rows[i + 1].cells
            cells[0].text = str(row.get("expression", ""))
            cells[1].text = str(row.get("r2", ""))
            cells[2].text = str(row.get("loss", ""))

        doc.save("symbolic_equations_table.docx")
        print("Saved, symbolic_equations_table.docx")
    else:
        print("python docx not installed, skip DOCX export")
else:
    # Not a PySRRegressor, fall back to general introspection
    print("No equations_ attribute found, printing attributes,")
    attrs = [a for a in dir(model) if not a.startswith("_")]
    print(attrs)

Loaded object type, <class 'pysr.sr.PySRRegressor'>

Top equations, compact view,
                                                                                                                                       expression     loss       r2  complexity
      ((x0 + -0.36708188) * (sin(x1 * 6.8481627) / ((x2 ^ -0.06798316) ^ cos((x0 ^ exp((x2 ^ x2) ^ cos(x2 / x2))) / 0.54394233)))) - -0.054766685 0.000020 0.052934          29
(((x0 + -0.36692575) * sin(x1 * 6.838255)) / ((x2 ^ -0.07833913) ^ cos((x0 ^ (exp(cos(x2) ^ 1.406679) * 0.4822747)) / 0.54974186))) - -0.05544609 0.000023 0.011967          27
                       (((x0 + -0.36611685) * sin(x1 * 6.845902)) / ((x0 ^ -0.093031414) ^ ((x1 + x0) ^ ((x0 - x2) * 2.0796912)))) - -0.056334484 0.000024 0.044522          24
                           (sin(x1 * 6.842869) * ((x0 + -0.36660212) / ((x0 ^ -0.09424276) ^ ((x0 / sin(x0)) ^ (x1 * 6.842869))))) - -0.056475516 0.000025 0.003770          23
                                  (sin

In [12]:
import pickle
from pathlib import Path

pkl_path = "checkpoint.pkl"

with open(pkl_path, "rb") as f:
    model = pickle.load(f)

print("Loaded type,", type(model))

# Helper to show what is available on the object
attrs = [a for a in dir(model) if not a.startswith("_")]
print("Top level attributes on model,")
print(attrs)

# Try to access the equations table
eq_df = getattr(model, "equations_", None)

if eq_df is None:
    print("This PySRRegressor has no equations_ attribute yet.")
    print("If the model was not fitted, call model.fit before exporting equations.")
else:
    # Some PySR versions use MultiIndex by target, handle both single and multi target
    try:
        is_multi_target = hasattr(eq_df.index, "names") and ("target" in (eq_df.index.names or []))
    except Exception:
        is_multi_target = False

    # Normalize column names to a common set
    df = eq_df.copy()
    if "equation" in df.columns:
        df = df.rename(columns={"equation": "expression"})
    if "sympy_format" in df.columns and "expression" not in df.columns:
        df = df.rename(columns={"sympy_format": "expression"})
    if "score" in df.columns and "r2" not in df.columns:
        df = df.rename(columns={"score": "r2"})

    keep = [c for c in ["rank", "target", "expression", "loss", "r2", "complexity", "size"] if c in df.columns]
    df_view = df[keep].reset_index(drop=True)

    # Sort by rank if present, else by loss
    sort_key = "rank" if "rank" in df_view.columns else "loss"
    df_view = df_view.sort_values(by=sort_key, ascending=True).reset_index(drop=True)

    print("\nTop equations, compact view,")
    print(df_view.head(10).to_string(index=False))

    # Select the best per target if multi target, else the single best
    if is_multi_target and "target" in df_view.columns:
        best_rows = []
        for tgt in sorted(df_view["target"].unique()):
            sub = df_view[df_view["target"] == tgt]
            best_rows.append(sub.iloc[0])
        print("\nBest equation per target,")
        for row in best_rows:
            print(f"target {row['target']}, expression, {row.get('expression','n,a')}, loss, {row.get('loss','n,a')}, r2, {row.get('r2','n,a')}")
    else:
        best = df_view.iloc[0]
        print("\nBest equation,")
        print("expression,", best.get("expression", "n,a"))
        print("loss,", best.get("loss", "n,a"))
        print("r2,", best.get("r2", "n,a"))

    # Save clean outputs
    out_csv = Path("symbolic_equations_hall_of_fame.csv")
    df_view.to_csv(out_csv, index=False, encoding="utf8")
    print(f"\nSaved, {out_csv.resolve()}")

    with open("best_equation.txt", "w", encoding="utf8") as w:
        if is_multi_target and "target" in df_view.columns:
            w.write("Best symbolic equation per target\n")
            for tgt in sorted(df_view["target"].unique()):
                sub = df_view[df_view["target"] == tgt].iloc[0]
                w.write(f"target {tgt}, expression, {sub.get('expression','n,a')}, loss, {sub.get('loss','n,a')}, r2, {sub.get('r2','n,a')}\n")
        else:
            best = df_view.iloc[0]
            w.write("Best symbolic equation\n")
            w.write(f"expression, {best.get('expression','n,a')}\n")
            w.write(f"loss, {best.get('loss','n,a')}\n")
            w.write(f"r2, {best.get('r2','n,a')}\n")
    print("Saved, best_equation.txt")

Loaded type, <class 'pysr.sr.PySRRegressor'>
Top level attributes on model,

Top equations, compact view,
                                                                                                                                       expression     loss       r2  complexity
      ((x0 + -0.36708188) * (sin(x1 * 6.8481627) / ((x2 ^ -0.06798316) ^ cos((x0 ^ exp((x2 ^ x2) ^ cos(x2 / x2))) / 0.54394233)))) - -0.054766685 0.000020 0.052934          29
(((x0 + -0.36692575) * sin(x1 * 6.838255)) / ((x2 ^ -0.07833913) ^ cos((x0 ^ (exp(cos(x2) ^ 1.406679) * 0.4822747)) / 0.54974186))) - -0.05544609 0.000023 0.011967          27
                       (((x0 + -0.36611685) * sin(x1 * 6.845902)) / ((x0 ^ -0.093031414) ^ ((x1 + x0) ^ ((x0 - x2) * 2.0796912)))) - -0.056334484 0.000024 0.044522          24
                           (sin(x1 * 6.842869) * ((x0 + -0.36660212) / ((x0 ^ -0.09424276) ^ ((x0 / sin(x0)) ^ (x1 * 6.842869))))) - -0.056475516 0.000025 0.003770          23
              

In [15]:
import pandas as pd

# Load your symbolic regression results
df = pd.read_csv("symbolic_equations_hall_of_fame.csv")

# Show the column names so you can see what’s available
print("Columns found in file:", df.columns.tolist())

# Sort by rank or R² depending on what’s available
if "rank" in df.columns:
    df = df.sort_values("rank")
elif "r2" in df.columns:
    df = df.sort_values("r2", ascending=False)

# Display top 5 equations
top = df.head(5)
print("\nTop 5 Symbolic Equations:\n")
for i, row in top.iterrows():
    expression = row.get("expression", "n/a")
    r2 = row.get("r2", "n/a")
    loss = row.get("loss", "n/a")
    print(f"Rank {row.get('rank', i+1)} | R²={r2} | Loss={loss}")
    print(f"Equation: {expression}\n")

Columns found in file: ['expression', 'loss', 'r2', 'complexity']

Top 5 Symbolic Equations:

Rank 14 | R²=1.39878041313053 | Loss=0.00023649499
Equation: sin(x1 * 7.086507) * (x0 + -0.42030776)

Rank 12 | R²=1.2431712545413254 | Loss=6.317054e-05
Equation: ((x0 + -0.35920104) * sin(x1 * 6.833488)) - -0.05599398

Rank 10 | R²=0.785647606267612 | Loss=2.6085965e-05
Equation: ((sin(x1 * 6.842869) * (x0 + -0.36660212)) / (x0 ^ -0.09486909)) - -0.0564455

Rank 17 | R²=0.1456372705814201 | Loss=0.0045343763
Equation: 0.015495526 / x2

Rank 15 | R²=0.0791358118304355 | Loss=0.003879602
Equation: (0.03346586 / sin(x2)) - 0.1305686

