In [4]:
import torch
import torch.nn.functional as F
from data_utils import GuidedGridMLMDataset, GuidedGridMLM_collate_fn
from torch.utils.data import DataLoader
from GridMLM_tokenizers import GuidedGridMLMTokenizer
from models import GuidedMLMH
from tqdm import tqdm
import numpy as np
from generate_utils import load_model

In [5]:
train_dir = '/media/maindisk/data/hooktheory_hr/hooktheory_CA_train'

tokenizer = GuidedGridMLMTokenizer(fixed_length=256)
train_dataset = GuidedGridMLMDataset(train_dir, tokenizer, 512, frontloading=True)

Loading data file.


In [6]:
subfolder = 'unf_CA'
device_name = 'cuda:2'
curriculum_type = 'random'
ablation = 'all'

In [7]:
model = load_model( curriculum_type, subfolder, ablation, device_name, tokenizer )

In [31]:
all_z_list = []
for d in tqdm(train_dataset):
    z, mu, logvar, recon_seq, z_proj = model.vae(torch.LongTensor([d['input_ids']]).to(model.device)) # TODO: do we need to detach?
    z_dmodel = model.guidance_to_dmodel(z)
    all_z_list.append( z_dmodel.tolist() )

100%|██████████| 9105/9105 [00:39<00:00, 232.41it/s]


In [32]:
all_z = torch.FloatTensor(all_z_list)
print(all_z.shape)

torch.Size([9105, 1, 512])


In [37]:
def analyze_film_modulation(film_layer, all_z, threshold=0.1):
    with torch.no_grad():
        W_gamma = film_layer.film_gamma.weight      # (D_out, D_z)
        b_gamma = film_layer.film_gamma.bias        # (D_out)
        W_beta = film_layer.film_beta.weight
        b_beta = film_layer.film_beta.bias

        # Compute gamma(z) and beta(z)
        gamma_vals = (all_z @ W_gamma.T + b_gamma)  # (N, D_out)
        beta_vals = (all_z @ W_beta.T + b_beta)

        # gamma_vals = film_layer.film_gamma(all_z)
        # beta_vals = film_layer.film_beta(all_z)

        # Metrics
        gamma_diff = torch.norm(gamma_vals - 1.0, dim=1)  # Distance from identity
        beta_norm = torch.norm(beta_vals, dim=1)

        near_identity = (gamma_diff < threshold) & (beta_norm < threshold)
        percent_near_identity = 100 * near_identity.sum().item() / all_z.size(0)

        print(f"Layer: {film_layer.__class__.__name__}")
        print(f"γ values: min {gamma_vals.min():.3f}, max {gamma_vals.max():.3f}, mean {gamma_vals.mean():.3f}")
        print(f"β values: min {beta_vals.min():.3f}, max {beta_vals.max():.3f}, mean {beta_vals.mean():.3f}")
        print(f"Proportion near-identity FiLM: {percent_near_identity:.2f}%")

        return gamma_vals, beta_vals


In [38]:
# all_z = collect_z_vectors_from_training_data(model.vae, train_loader)

for i, layer in enumerate(model.encoder.layers):
    print(f"\n=== Layer {i} ===")
    analyze_film_modulation(layer, all_z.to(model.device))



=== Layer 0 ===
Layer: FiLMTransformerEncoderLayer
γ values: min -2.023, max 2.281, mean 0.030
β values: min -1.266, max 1.270, mean -0.002
Proportion near-identity FiLM: 783.99%

=== Layer 1 ===
Layer: FiLMTransformerEncoderLayer
γ values: min -2.567, max 2.021, mean -0.006
β values: min -1.255, max 1.270, mean 0.004
Proportion near-identity FiLM: 302.44%

=== Layer 2 ===
Layer: FiLMTransformerEncoderLayer
γ values: min -2.000, max 2.051, mean -0.024
β values: min -1.452, max 1.340, mean 0.003
Proportion near-identity FiLM: 431.82%

=== Layer 3 ===
Layer: FiLMTransformerEncoderLayer
γ values: min -2.041, max 2.069, mean -0.007
β values: min -1.275, max 1.305, mean -0.001
Proportion near-identity FiLM: 306.45%

=== Layer 4 ===
Layer: FiLMTransformerEncoderLayer
γ values: min -2.056, max 2.429, mean 0.009
β values: min -1.261, max 1.205, mean -0.001
Proportion near-identity FiLM: 315.75%

=== Layer 5 ===
Layer: FiLMTransformerEncoderLayer
γ values: min -2.001, max 2.136, mean 0.004
β v