In [None]:
import matplotlib.pyplot as plt
import numpy as np
import torch
import pickle
import os

In [20]:
lrs = [1e-3, 1e-4, 5e-4]

vgg_data = {}
vgg_bn_data = {}

def process_results(results):
    # 保证 grads 在 CPU 上，并转换为 numpy 数组
    grads_cpu = [g.detach().cpu().numpy() if isinstance(g, torch.Tensor) else g for g in results["grads"]]
    losses = results["train_losses"]
    return {"losses": losses, "grads": grads_cpu}

for lr in lrs:
    lr_key = float(lr)

    with open(f"results/VGG-{lr:.0e}/training_results.pkl", "rb") as f:
        results_VGG = pickle.load(f)
        vgg_data[lr_key] = process_results(results_VGG)

    with open(f"results/VGG_BN-{lr:.0e}/training_results.pkl", "rb") as f:
        results_VGG_BN = pickle.load(f)
        vgg_bn_data[lr_key] = process_results(results_VGG_BN)


In [23]:
def plot_train_loss(vgg_data, vgg_bn_data, save_path="figures/train_loss_curve.png"):
    # 选择某个代表性学习率（如 1e-3）
    lr = 1e-3
    vgg_loss = vgg_data[lr]["losses"]
    vgg_bn_loss = vgg_bn_data[lr]["losses"]
    steps = range(len(vgg_loss))

    plt.figure(figsize=(8, 5))
    plt.plot(steps, vgg_loss, label="VGG", color="red")
    plt.plot(steps, vgg_bn_loss, label="VGG_BN", color="green")
    plt.title("Train Loss Curve (VGG vs VGG_BN)")
    plt.xlabel("Training Step")
    plt.ylabel("Loss")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.savefig(save_path)
    plt.close()

def plot_combined_landscape(data1, data2, label1, label2, save_path="figures/loss_landscape.png"):
    min1, max1, min2, max2 = [], [], [], []
    steps = range(len(next(iter(data1.values()))["losses"]))
    for i in steps:
        l1 = [d["losses"][i] for d in data1.values()]
        l2 = [d["losses"][i] for d in data2.values()]
        min1.append(min(l1)); max1.append(max(l1))
        min2.append(min(l2)); max2.append(max(l2))

    plt.figure(figsize=(8, 5))
    plt.fill_between(steps, min1, max1, alpha=0.3, color='orange', label=f'{label1} Loss Range')
    plt.fill_between(steps, min2, max2, alpha=0.3, color='green', label=f'{label2} Loss Range')
    plt.plot(steps, min1, '--', color='orange')
    plt.plot(steps, max1, color='orange')
    plt.plot(steps, min2, '--', color='green')
    plt.plot(steps, max2, color='green')
    plt.title("Loss Landscape Comparison")
    plt.xlabel("Training Step")
    plt.ylabel("Loss")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.savefig(save_path)
    plt.close()

def compute_grad_diffs(grads):
    diffs = []
    for i in range(1, len(grads)):
        grad_prev = grads[i - 1].flatten()
        grad_curr = grads[i].flatten()
        diff = np.linalg.norm(grad_curr - grad_prev)
        diffs.append(diff)
    return diffs

def plot_grad_variation(vgg_data, vgg_bn_data, save_path="figures/grad_variation.png"):
    lr = 1e-3  # 可切换其他学习率查看差异
    grads_vgg = vgg_data[lr]["grads"]
    grads_bn = vgg_bn_data[lr]["grads"]

    diffs_vgg = compute_grad_diffs(grads_vgg)
    diffs_bn = compute_grad_diffs(grads_bn)
    steps = range(1, len(grads_vgg))

    plt.figure(figsize=(8, 5))
    plt.plot(steps, diffs_vgg, label="VGG", color="red")
    plt.plot(steps, diffs_bn, label="VGG_BN", color="green")
    plt.title("Gradient Variation (∥∇Lₜ₊₁ − ∇Lₜ∥)")
    plt.xlabel("Training Step")
    plt.ylabel("Gradient Difference (L2 Norm)")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.savefig(save_path)
    plt.close()


In [24]:
plot_train_loss(vgg_data, vgg_bn_data)
plot_combined_landscape(vgg_data, vgg_bn_data, "VGG", "VGG_BN")
plot_grad_variation(vgg_data, vgg_bn_data)
