In [None]:
import sys
import os
import h5py
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import torch
import warnings
warnings.filterwarnings("ignore")

sys.path.append(os.path.abspath("../src"))

from preprocessor import LLMTIMEPreprocessor
from load_qwen import load_qwen_model
from final_model import LoRALinear, process_sequences, evaluate 
from compute_flops import compute_flops
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from accelerate import Accelerator
from tqdm import tqdm
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from final_model import train_lora_model




In [None]:
model, tokenizer, val_loader, val_loss, perplexity = train_lora_model(
    max_steps=6000,
    verbose=True
)

In [None]:
compute_flops(
    data_path="../lotka_volterra_data.h5",
    input_fraction=1,
    lora_rank=8,
    batch_size=4,
    training_steps=6000,
    flop_budget=1e17,
    train_series_count=700,
    eval_series_count=200,
    context_length=768
)

In [None]:
torch.save(model.state_dict(), "trained_lora_model.pt")
print(" Model weights saved to trained_lora_model.pt")


In [None]:

device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")

# === Load base model and tokenizer ===
model, tokenizer = load_qwen_model()

lora_rank = 8  
for layer in model.model.layers:
    layer.self_attn.q_proj = LoRALinear(layer.self_attn.q_proj, r=lora_rank)
    layer.self_attn.v_proj = LoRALinear(layer.self_attn.v_proj, r=lora_rank)

state_dict = torch.load("trained_lora_model.pt", map_location="cpu")
model.load_state_dict(state_dict, strict=False)
model.to(device)
model.eval()
print("Trained LoRA model loaded.")


In [None]:

preprocessor = LLMTIMEPreprocessor()
with h5py.File("../lotka_volterra_data.h5", "r") as f:
    data = f["trajectories"][:] 

def test_trained_model(data, sample_ids=[972], input_timesteps=70):
    predictions = {}
    for sample_id in sample_ids:
        prey = data[sample_id, :, 0]
        predator = data[sample_id, :, 1]

        input_text, tokenized_input, scale_factor = preprocessor.preprocess_sample(prey, predator, input_timesteps)
        tokens = tokenized_input.to(device)

        with torch.no_grad():
            generated = model.generate(tokens, max_new_tokens=300)

        semicolons = (generated[0] == tokenizer.convert_tokens_to_ids(';')).nonzero(as_tuple=True)[0]
        while len(semicolons) < 100 and len(generated[0]) < 2000:
            generated = model.generate(generated, max_new_tokens=20)
            semicolons = (generated[0] == tokenizer.convert_tokens_to_ids(';')).nonzero(as_tuple=True)[0]

        if len(semicolons) >= 100:
            tokens_1d = generated[0][:semicolons[99] + 1]
        else:
            tokens_1d = generated[0]

        decoded = tokenizer.decode(tokens_1d, skip_special_tokens=True)
        decoded_pairs = [list(map(float, pair.split(','))) for pair in decoded.split(';') if ',' in pair]
        decoded_prey, decoded_predator = zip(*decoded_pairs) if decoded_pairs else ([], [])

        predictions[sample_id] = {
            "prey": np.array(decoded_prey) * scale_factor,
            "predator": np.array(decoded_predator) * scale_factor
        }

        print(f" Sample {sample_id} | Input tokens: {tokenized_input.shape[1]}, Generated tokens: {generated.shape[1] - tokens.shape[1]}")

    return predictions

# === Predict ===
predictions = test_trained_model(data, sample_ids=[972], input_timesteps=50)

# === Plotting function ===
def plot_predictions(predictions, original_series, sample_id):
    pred_prey = predictions[sample_id]["prey"]
    pred_predator = predictions[sample_id]["predator"]
    true_prey = original_series[:, 0]
    true_predator = original_series[:, 1]
    timesteps = np.arange(len(true_prey))

    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(timesteps, true_prey, label='True Prey', color='blue')
    plt.plot(timesteps[:len(pred_prey)], pred_prey, '--', label='Predicted Prey', color='blue', alpha=0.6)
    plt.title(f"Prey - Sample {sample_id}")
    plt.xlabel("Timestep")
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(timesteps, true_predator, label='True Predator', color='red')
    plt.plot(timesteps[:len(pred_predator)], pred_predator, '--', label='Predicted Predator', color='red', alpha=0.6)
    plt.title(f"Predator - Sample {sample_id}")
    plt.xlabel("Timestep")
    plt.legend()

    plt.tight_layout()
    plt.savefig(f"sample{sample_id}_trained.png", dpi=300)
    plt.show()

# === Run plotting and metrics ===
original_series = data[972]
plot_predictions(predictions, original_series, sample_id=972)


In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

def evaluate_metrics(predictions, original_series, sample_id):
    pred_prey = predictions[sample_id]["prey"]
    pred_predator = predictions[sample_id]["predator"]
    true_prey = original_series[:, 0]
    true_predator = original_series[:, 1]

    # Ensure predictions and ground truth align in length
    min_len_prey = min(len(pred_prey), len(true_prey))
    min_len_predator = min(len(pred_predator), len(true_predator))

    prey_metrics = {
        "MSE": mean_squared_error(true_prey[:min_len_prey], pred_prey[:min_len_prey]),
        "MAE": mean_absolute_error(true_prey[:min_len_prey], pred_prey[:min_len_prey]),
        "R²":  r2_score(true_prey[:min_len_prey], pred_prey[:min_len_prey]),
    }

    predator_metrics = {
        "MSE": mean_squared_error(true_predator[:min_len_predator], pred_predator[:min_len_predator]),
        "MAE": mean_absolute_error(true_predator[:min_len_predator], pred_predator[:min_len_predator]),
        "R²":  r2_score(true_predator[:min_len_predator], pred_predator[:min_len_predator]),
    }

    print(f"\nEvaluation Metrics for Sample ID {sample_id}")
    print("Prey:")
    for k, v in prey_metrics.items():
        print(f"  {k}: {v:.4f}")
    print("Predator:")
    for k, v in predator_metrics.items():
        print(f"  {k}: {v:.4f}")

    return {"prey": prey_metrics, "predator": predator_metrics}

# === Run the metrics ===
metrics = evaluate_metrics(predictions, original_series, sample_id=972)


In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# === Preprocessor and data ===
preprocessor = LLMTIMEPreprocessor()
with h5py.File("../lotka_volterra_data.h5", "r") as f:
    data = f["trajectories"][:]  # (N, 100, 2)

# === Prediction function for trained model ===
def test_trained_model(data, sample_ids=[972], input_timesteps=70):
    predictions = {}
    for sample_id in sample_ids:
        prey = data[sample_id, :, 0]
        predator = data[sample_id, :, 1]

        input_text, tokenized_input, scale_factor = preprocessor.preprocess_sample(prey, predator, input_timesteps)
        tokens = tokenized_input.to(device)

        with torch.no_grad():
            generated = model.generate(tokens, max_new_tokens=300)

        semicolons = (generated[0] == tokenizer.convert_tokens_to_ids(';')).nonzero(as_tuple=True)[0]
        while len(semicolons) < 100 and len(generated[0]) < 2000:
            generated = model.generate(generated, max_new_tokens=20)
            semicolons = (generated[0] == tokenizer.convert_tokens_to_ids(';')).nonzero(as_tuple=True)[0]

        if len(semicolons) >= 100:
            tokens_1d = generated[0][:semicolons[99] + 1]
        else:
            tokens_1d = generated[0]

        decoded = tokenizer.decode(tokens_1d, skip_special_tokens=True)
        decoded_pairs = [list(map(float, pair.split(','))) for pair in decoded.split(';') if ',' in pair]
        decoded_prey, decoded_predator = zip(*decoded_pairs) if decoded_pairs else ([], [])

        predictions[sample_id] = {
            "prey": np.array(decoded_prey) * scale_factor,
            "predator": np.array(decoded_predator) * scale_factor
        }

        print(f" Sample {sample_id} | Input tokens: {tokenized_input.shape[1]}, Generated tokens: {generated.shape[1] - tokens.shape[1]}")

    return predictions

def plot_predictions(predictions, original_series, sample_id):
    pred_prey = predictions[sample_id]["prey"]
    pred_predator = predictions[sample_id]["predator"]
    true_prey = original_series[:, 0]
    true_predator = original_series[:, 1]
    timesteps = np.arange(len(true_prey))

    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(timesteps, true_prey, label='True Prey', color='blue')
    plt.plot(timesteps[:len(pred_prey)], pred_prey, '--', label='Predicted Prey', color='blue', alpha=0.6)
    plt.title(f"Prey - Sample {sample_id}")
    plt.xlabel("Timestep")
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(timesteps, true_predator, label='True Predator', color='red')
    plt.plot(timesteps[:len(pred_predator)], pred_predator, '--', label='Predicted Predator', color='red', alpha=0.6)
    plt.title(f"Predator - Sample {sample_id}")
    plt.xlabel("Timestep")
    plt.legend()

    plt.tight_layout()
    plt.show()

def evaluate_metrics(predictions, original_series, sample_id):
    pred_prey = predictions[sample_id]["prey"]
    pred_predator = predictions[sample_id]["predator"]
    true_prey = original_series[:, 0]
    true_predator = original_series[:, 1]

    # Ensure predictions and ground truth align in length
    min_len_prey = min(len(pred_prey), len(true_prey))
    min_len_predator = min(len(pred_predator), len(true_predator))

    prey_metrics = {
        "MSE": mean_squared_error(true_prey[:min_len_prey], pred_prey[:min_len_prey]),
        "MAE": mean_absolute_error(true_prey[:min_len_prey], pred_prey[:min_len_prey]),
        "R²":  r2_score(true_prey[:min_len_prey], pred_prey[:min_len_prey]),
    }

    predator_metrics = {
        "MSE": mean_squared_error(true_predator[:min_len_predator], pred_predator[:min_len_predator]),
        "MAE": mean_absolute_error(true_predator[:min_len_predator], pred_predator[:min_len_predator]),
        "R²":  r2_score(true_predator[:min_len_predator], pred_predator[:min_len_predator]),
    }

    print(f"\n Evaluation Metrics for Sample ID {sample_id}")
    print("Prey:")
    for k, v in prey_metrics.items():
        print(f"  {k}: {v:.4f}")
    print("Predator:")
    for k, v in predator_metrics.items():
        print(f"  {k}: {v:.4f}")

    return {"prey": prey_metrics, "predator": predator_metrics}

# === Predict for sample IDs 990–999 ===
sample_ids = list(range(990, 1000))
predictions = test_trained_model(data, sample_ids=sample_ids, input_timesteps=50)

# === Track all metrics ===
all_metrics = {
    "prey": {"MSE": [], "MAE": [], "R²": []},
    "predator": {"MSE": [], "MAE": [], "R²": []}
}

# === Evaluate and plot each ===
for sample_id in sample_ids:
    original_series = data[sample_id]
    plot_predictions(predictions, original_series, sample_id)
    metrics = evaluate_metrics(predictions, original_series, sample_id)

    for target in ["prey", "predator"]:
        for metric in ["MSE", "MAE", "R²"]:
            all_metrics[target][metric].append(metrics[target][metric])

# === Compute and print averages ===
print("\n Average Metrics Across Samples 990–999")
for target in ["prey", "predator"]:
    print(f"\n {target.capitalize()}:")
    for metric in ["MSE", "MAE", "R²"]:
        avg = np.mean(all_metrics[target][metric])
        print(f"  Avg {metric}: {avg:.4f}")
