In [1]:
import os
import pandas as pd
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib import cm
import seaborn as sns
sns.set(style='whitegrid')

df = pd.read_csv('')

class MplColorHelper:

    def __init__(self, cmap_name, start_val, stop_val):
        self.cmap_name = cmap_name
        self.cmap = plt.get_cmap(cmap_name)
        self.norm = mpl.colors.Normalize(vmin=start_val, vmax=stop_val)
        self.scalarMap = cm.ScalarMappable(norm=self.norm, cmap=self.cmap)

    def get_rgb(self, val):
        return self.scalarMap.to_rgba(val)

variants = [
    # ('qk_sqrt_parameterization_sp', 'SP 1/sqrt(d)'),
    ('qk_sqrt_parameterization_sp', 'SP'),
    # ('qk_sqrt_parameterization_mup', r'$\mu$P 1/sqrt(d)'),
    # ('qk_linear_parameterization_mup', r'$\mu$P 1/d'),
    ('qk_linear_parameterization_mup', r'$\mu$P'),
    # ('qk_sqrt_parameterization_sparse_mup', r'Sparse $\mu$P 1/sqrt(d)'),
    # ('qk_linear_parameterization_sparse_mup', r'Sparse $\mu$P 1/d'),
    ('qk_linear_parameterization_sparse_mup', r'S$\mu$Par'),
    # ('qk_linear_parameterization_sparse_mup_stdrenorm_neuronwise', r'Sparse $\mu$P 1/d stdrenorm_neuronwise'),
]
layer_types = [
    ('word_embedding_act_abs_mean', 'Word Embedding'),
    ('attn_output_act_abs_mean', 'Attention Output'),
    ('ffn_output_act_abs_mean', 'FFN Output'),
    ('output_logits_act_abs_mean vs samples', 'Output Logits'),
]
# t_max = 1
t_max = 10
# t_max = 5
# color_helper = MplColorHelper('inferno', 0, t_max)
# color_helper = MplColorHelper('magma_r', 1, t_max+1)
color_helper = MplColorHelper('coolwarm', 0, t_max)
# color_helper = MplColorHelper('viridis', 0, t_max)
n_cols = len(layer_types)
n_rows = len(variants)
fig, axes = plt.subplots(n_rows, n_cols, figsize=(4.5*n_cols, 3*n_rows))
axes = np.array(axes).reshape((n_rows, n_cols))

for variant_idx, (variant, variant_str) in enumerate(variants):
    # sparsities = np.array([0, 0.5, 0.75, 0.875, 0.9375, 0.96875, 0.984375])
    sparsities = np.array([0, 0.5, 0.75, 0.875, 0.9375])
    densities = 1 - sparsities
    seeds = np.arange(1,11)
    # seeds = np.arange(1,2)
    results_matrix = np.zeros((len(layer_types), t_max, len(sparsities), len(seeds))) # (layer_type, t, sparsity, seed)
    # results_matrix = []
    for sparsity_idx, sparsity in enumerate(sparsities):
        for seed_idx, seed in enumerate(seeds):
            sparsity_str = float(sparsity) if sparsity != 0 else int(sparsity)
            job_name = f'gpt2_h2048_d2_kv32_s{sparsity_str}_gbs4_mbs1_lr0.01676_mopt_static_seed{seed}_{variant}'
            try:
                ckpt_df = df_dict[job_name]
            except:
                print(job_name)
            if len(ckpt_df) == 0 or ckpt_df['step'].max() == 0:
                print(job_name)
                continue
            for layer_type_idx, (layer_type, layer_type_str) in enumerate(layer_types):
                results_matrix[layer_type_idx, :, sparsity_idx, seed_idx] = ckpt_df[layer_type].dropna().values[:t_max].flatten()

    for layer_type_idx, (layer_type, layer_type_str) in enumerate(layer_types):
        for t in range(0,t_max):
            means = []
            stderrs = []
            for sparsity_idx, sparsity in enumerate(sparsities):
                nnz_results = results_matrix[layer_type_idx, t, sparsity_idx][results_matrix[layer_type_idx, t, sparsity_idx] != 0]
                means.append(nnz_results.mean())
                stderrs.append(np.std(nnz_results, ddof=1) / np.sqrt(len(nnz_results)))
            # means = nnz_results.mean()
            # stderrs = np.std(nnz_results, ddof=1) / np.sqrt(len(z))
            means = np.array(means)
            stderrs = np.array(stderrs)
            axes[variant_idx, layer_type_idx].plot(densities, means, label=f't={t}', color=color_helper.get_rgb(t), marker='.')
            axes[variant_idx, layer_type_idx].fill_between(densities, means-stderrs, means+stderrs, color=color_helper.get_rgb(t), alpha=0.5)

        # axes[variant_idx, layer_type_idx].set_title(layer_type_str)
        # axes[variant_idx, layer_type_idx].set_xlabel('Density')
        axes[0, layer_type_idx].set_title(layer_type_str)
        axes[n_rows-1, layer_type_idx].set_xlabel('Density')
        axes[variant_idx, 0].set_ylabel(variant_str)
        # axes[variant_idx, layer_type_idx].legend(loc='upper left')
        axes[0, 0].legend(loc='upper left')
        axes[variant_idx, layer_type_idx].set_xscale('log', base=2)
        # axes[variant_idx, layer_type_idx].set_yscale('log')
        # axes[variant_idx, layer_type_idx].set_ylim(1e-3, 1e3)
        if variant_idx < n_rows-1:
            axes[variant_idx, layer_type_idx].xaxis.set_ticklabels([])

# plt.suptitle(f'240321_mega_sparse_coord_check_h4096, h4096_d2_kv32_ffnmult1, 10 steps, {len(seeds)} seeds, SlimPJ')
plt.tight_layout()
plt.show()
plt.close()

ModuleNotFoundError: No module named 'matplotlib'