In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from google.colab import files
import scipy.signal

In [None]:
uploaded = files.upload()

In [None]:
df = pd.read_csv(uploaded)
df.head()

In [None]:
# Convert strings to floats
# for col in ["round", "step", "eval_loss", "train_loss", "kl_loss", "perplexity", "ensemble_size"]:
#     df[col] = pd.to_numeric(df[col], errors="coerce")

In [None]:
# Student next token train loss over rounds
student_train_df = df[(df['role'] == 'student') & (df['phase'] == 'train') & (df['function'] == 'compute_loss')]
student_train_df = student_train_df.sort_values(by=["round", "step"]).reset_index(drop=True)
student_train_df["global_step"] = range(len(student_train_df))

round_changes = student_train_df['round'].diff().fillna(0) != 0
for idx in student_train_df[round_changes].index:
    plt.axvline(x=student_train_df.loc[idx, 'global_step'], color='gray', linestyle='--', alpha=0.3)

plt.figure(figsize=(50, 6))
plt.plot(student_train_df["global_step"], scipy.signal.medfilt(student_train_df["train_next_token_loss"], 11), label="Next Token Loss", linestyle='-')
plt.title("Training Next Token Prediction Loss Across Rounds")
plt.xlabel("Global Step")
plt.ylabel("Train Loss")
plt.grid(True)
plt.show()

In [None]:
# Student train loss (kl) over rounds
plt.figure(figsize=(50, 6))
plt.plot(student_train_df["global_step"], scipy.signal.medfilt(student_train_df["train_kl_loss"], 11), label="KL Loss", linestyle='-')
plt.title("Training KL Loss Across Rounds")
plt.xlabel("Global Step")
plt.ylabel("Train Loss")
plt.grid(True)
plt.show()

In [None]:
# Student train loss (hybrid) over rounds
plt.figure(figsize=(50, 6))
plt.plot(student_train_df["global_step"], scipy.signal.medfilt(student_train_df["train_loss"], 11), label="Next Token Loss", linestyle='-')
plt.title("Training Loss Across Rounds")
plt.xlabel("Global Step")
plt.ylabel("Train Loss")
plt.grid(True)
plt.show()

In [None]:
# Per round logging of training loss ?

In [None]:
# Per round logging of student kl eval loss
student_eval_df = df[
    (df["phase"] == "eval") &
    (df["role"] == "student") &
    (df["function"] == "prediction_step")
]

student_eval_df = student_eval_df.sort_values(by=["round", "step"]).reset_index(drop=True)
student_eval_df["global_step"] = range(len(student_eval_df))

plt.figure(figsize=(10, 6))
plt.plot(student_eval_df["global_step"], scipy.signal.medfilt(student_eval_df["eval_kl_loss"], 11), label="Student KL Eval Loss", linestyle='-')
plt.title("Student KL Eval Loss")
plt.xlabel("Global Step")
plt.ylabel("Eval KL Loss")
plt.grid(True)
plt.show()

In [None]:
# Per round logging of student LM eval loss
plt.figure(figsize=(10, 6))
plt.plot(student_eval_df["global_step"], scipy.signal.medfilt(student_eval_df["eval_loss"], 11), label="Student LM Eval Loss", linestyle='-')
plt.title("Student LM Eval Loss")
plt.xlabel("Global Step")
plt.ylabel("Train Loss")
plt.grid(True)
plt.show()

plt.xlabel("Round")
plt.ylabel("Eval LM Loss")
plt.title("Student LM Eval Loss")
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# Student vs Teacher vs Ensemble performance over rounds on next_token_prediction, custom evaluation method
df_eval = df[df.phase=="custom_eval"]
df_teacher = df_eval[df_eval.role=="teacher"].sort_values("round")
df_ensemble = df_eval[df_eval.role=="ensemble"].sort_values("round")
df_student = df_eval[df_eval.role=="student"].sort_values("round")

plt.figure(figsize=(10, 6))
plt.plot(
    df_teacher["round"],
    df_teacher["eval_loss"],
    marker="o", linestyle="-", color="C1", label="teacher"
)
plt.plot(
    df_ensemble["round"],
    df_ensemble["eval_loss"],
    marker="o", linestyle=":", color="C2", label="ensemble"
)
plt.plot(
    df_student["round"],
    df_student["eval_loss"],
    marker="o", linestyle="-", color="C3", label="student"
)

plt.xlabel("Round")
plt.ylabel("Eval Loss")
plt.title("Next Token Prediction loss across rounds")
plt.legend()
plt.grid(True)
plt.show()


In [None]:
# Student vs Teacher vs Ensemble perplexity over rounds, custom evaluation method
plt.figure(figsize=(10, 6))
plt.plot(
    df_teacher["round"],
    df_teacher["perplexity"],
    marker="o", linestyle="-", color="C1", label="teacher"
)
plt.plot(
    df_ensemble["round"],
    df_ensemble["perplexity"],
    marker="o", linestyle=":", color="C2", label="ensemble"
)
plt.plot(
    df_student["round"],
    df_student["perplexity"],
    marker="o", linestyle="-", color="C3", label="student"
)

plt.xlabel("Round")
plt.ylabel("Perplexity")
plt.title("Perplexity across rounds")
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# Eval loss vs ensemble size
plt.figure(figsize=(10, 6))
df_ensemble = df[(df["role"] == "ensemble") & (df["phase"] == "custom_eval")]
df_teacher = df[(df["role"] == "teacher") & (df["phase"] == "custom_eval")]

plt.plot(df_ensemble["ensemble_size"], df_ensemble["eval_loss"], label="Ensemble", marker="o")
plt.plot(df_teacher["ensemble_size"], df_teacher["eval_loss"], label="Teacher", marker="o")
plt.xlabel("Ensemble Size")
plt.ylabel("Eval Loss")
plt.title("Eval Loss vs Ensemble Size")
plt.grid(True)
plt.legend()
plt.show()
