In [1]:
import sys
import os
sys.path.append(os.path.abspath("../src"))
from compute_flops import estimate_max_training_steps

max_steps = estimate_max_training_steps(
    data_path="../lotka_volterra_data.h5",
    lora_rank=4,
    input_fraction=0.7,
    batch_size=4,
    flop_budget=1e17,
    train_series_count=700,
    eval_series_count=300,
    context_length=512
)


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.



🔢 Avg token count: 512
⚙️  FLOPs per training step (batch): 6.76e+12
🧪 Evaluation FLOPs: 1.69e+14
🎯 Remaining budget for training: 9.98e+16
🚀 Max training steps allowed: 14776


- Start with 1000 time series
- Split immediately → 700 for training, 300 for evaluation
- Tokenise the 700 training series into sequences
- Train the model on this
- Evaluate the model on the validation/test split (the 300)

In [None]:
import pandas as pd

# FLOPs constants (from your setup)
FLOPS_PER_STEP = 6.76e12
EVAL_FLOPS = 1.69e14
FLOP_BUDGET = 1e17

# Define experiment blocks
experiments = [
    {"Part": "a i", "Experiment": "Default LoRA (4, 1e-5)", "LoRA Rank": 4, "Learning Rate": 1e-5, "Context Length": 512, "Max Steps": 1000, "Average Validation Loss":1.1926},
    {"Part": "b i", "Experiment": "LoRA (2, 1e-5)", "LoRA Rank": 2, "Learning Rate": 1e-5, "Context Length": 512, "Max Steps": 1, "Average Validation Loss":0.8880},
    {"Part": "b ii", "Experiment": "LoRA (4, 1e-5)", "LoRA Rank": 4, "Learning Rate": 1e-5, "Context Length": 512, "Max Steps": 1000, "Average Validation Loss":0},
    {"Part": "b iii", "Experiment": "LoRA (8, 1e-5)", "LoRA Rank": 8, "Learning Rate": 1e-5, "Context Length": 512, "Max Steps": 1, "Average Validation Loss":0},
    {"Part": "b iv", "Experiment": "LoRA (2, 5e-5)", "LoRA Rank": 2, "Learning Rate": 5e-5, "Context Length": 512, "Max Steps": 1000, "Average Validation Loss":0},
    {"Part": "b v", "Experiment": "LoRA (4, 5e-5)", "LoRA Rank": 4, "Learning Rate": 5e-5, "Context Length": 512, "Max Steps": 1000, "Average Validation Loss":0},
    {"Part": "b vi", "Experiment": "LoRA (8, 5e-5)", "LoRA Rank": 8, "Learning Rate": 5e-5, "Context Length": 512, "Max Steps": 1000, "Average Validation Loss":0},
    {"Part": "b vii", "Experiment": "LoRA (2, 1e-4)", "LoRA Rank": 2, "Learning Rate": 1e-4, "Context Length": 512, "Max Steps": 1, "Average Validation Loss":0},
    {"Part": "b viii", "Experiment": "LoRA (4, 1e-4)", "LoRA Rank": 4, "Learning Rate": 1e-4, "Context Length": 512, "Max Steps": 1000, "Average Validation Loss":0},
    {"Part": "b ix", "Experiment": "LoRA (8, 1e-4)", "LoRA Rank": 8, "Learning Rate": 1e-4, "Context Length": 512, "Max Steps": 1, "Average Validation Loss":0},
    {"Part": "b x", "Experiment": "Best config (CL=128)", "LoRA Rank": None, "Learning Rate": None, "Context Length": 128, "Max Steps": 1000, "Average Validation Loss":0},
    {"Part": "b xi", "Experiment": "Best config (CL=768)", "LoRA Rank": None, "Learning Rate": None, "Context Length": 768, "Max Steps": 1000, "Average Validation Loss":0},
    {"Part": "c i", "Experiment": "Final model (best config)", "LoRA Rank": None, "Learning Rate": None, "Context Length": None, "Max Steps": 3000, "Average Validation Loss":0},
]

# Create DataFrame
df = pd.DataFrame(experiments)

# Estimate training FLOPs per experiment
df["Training FLOPs"] = df["Max Steps"] * FLOPS_PER_STEP

# Cumulative FLOPs (include evaluation just once at the start)
df["Cumulative FLOPs"] = df["Training FLOPs"].cumsum() + EVAL_FLOPS

# Flag if over the total budget
df["Exceeds Budget"] = df["Cumulative FLOPs"] > FLOP_BUDGET

# Format for clarity
df["Training FLOPs"] = df["Training FLOPs"].apply(lambda x: f"{x:.2e}")
df["Cumulative FLOPs"] = df["Cumulative FLOPs"].apply(lambda x: f"{x:.2e}")

df


Unnamed: 0,Part,Experiment,LoRA Rank,Learning Rate,Context Length,Max Steps,Average Validation Loss,Training FLOPs,Cumulative FLOPs,Exceeds Budget
0,a i,"Default LoRA (4, 1e-5)",4.0,1e-05,512.0,1000,1.1926,6760000000000000.0,6930000000000000.0,False
1,b i,"LoRA (2, 1e-5)",2.0,1e-05,512.0,1,0.0,6760000000000.0,6940000000000000.0,False
2,b ii,"LoRA (4, 1e-5)",4.0,1e-05,512.0,1000,0.0,6760000000000000.0,1.37e+16,False
3,b iii,"LoRA (8, 1e-5)",8.0,1e-05,512.0,1,0.0,6760000000000.0,1.37e+16,False
4,b iv,"LoRA (2, 5e-5)",2.0,5e-05,512.0,1000,0.0,6760000000000000.0,2.05e+16,False
5,b v,"LoRA (4, 5e-5)",4.0,5e-05,512.0,1000,0.0,6760000000000000.0,2.72e+16,False
6,b vi,"LoRA (8, 5e-5)",8.0,5e-05,512.0,1000,0.0,6760000000000000.0,3.4e+16,False
7,b vii,"LoRA (2, 1e-4)",2.0,0.0001,512.0,1,0.0,6760000000000.0,3.4e+16,False
8,b viii,"LoRA (4, 1e-4)",4.0,0.0001,512.0,1000,0.0,6760000000000000.0,4.07e+16,False
9,b ix,"LoRA (8, 1e-4)",8.0,0.0001,512.0,1,0.0,6760000000000.0,4.08e+16,False


In [3]:
# FLOP constraint
FLOP_LIMIT_STEPS = 14776  # Max training steps from FLOP budget

# Calculate how many steps you've allocated so far
allocated_steps = df["Max Steps"].sum()

# Remaining steps
remaining_steps = FLOP_LIMIT_STEPS - allocated_steps

# Print summary
print("FLOP-Constrained Training Budget Planning")
print(f"FLOP budget allows for:     {FLOP_LIMIT_STEPS:,} training steps")
print(f"Steps currently allocated:  {allocated_steps:,}")
print(f"Remaining steps available:  {remaining_steps:,}")

if remaining_steps < 0:
    print("\nYou have exceeded the FLOP training step budget!")
else:
    print("\nYou're within the FLOP training budget.")


FLOP-Constrained Training Budget Planning
FLOP budget allows for:     14,776 training steps
Steps currently allocated:  11,004
Remaining steps available:  3,772

You're within the FLOP training budget.
