In [1]:
from pruning import *
from train_utils import *
import json
import copy 
from datasets import load_dataset
import os

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# # Generate all the acceptable model sizes for the GPT-2

# num_heads_options = [8, 10, 12]
# hidden_size_options = [2.5, 3, 3.5, 4]
# embed_size_options = [512, 640, 768]

# param_range = (115_000_000, 135_000_000)

# model_name = "openai-community/gpt2-medium"
# base_model, tokenizer = load_model(model_name)

# acceptable_params = find_acceptable_model_sizes(base_model, tokenizer, num_heads_options, hidden_size_options, embed_size_options, param_range)

# Load params directly from file
with open("pruning_params.json", "r") as f:
    acceptable_params = json.load(f)


In [3]:
# Do forward pass 
dataset = load_dataset("stas/openwebtext-10k", trust_remote_code=True)
model_name = "openai-community/gpt2-medium"
base_model, tokenizer = load_model(model_name)
calibration_pass(model=base_model,
                 tokenizer=tokenizer,
                 dataset=dataset,
                 sample_size=128,
                 batch_size=4,)

100%|██████████| 32/32 [00:11<00:00,  2.83it/s]


In [None]:
tokenized_dataset = tokenize_dataset(tokenizer, dataset)

os.makedirs("./saved_metrics", exist_ok=True)

training_metrics_path = "./saved_metrics/training_metrics.json"
eval_metrics_path = "./saved_metrics/eval_metrics.json"

if os.path.exists(training_metrics_path):
    with open(training_metrics_path, "r") as f:
        training_metrics = json.load(f)
else:
    training_metrics = {}

if os.path.exists(eval_metrics_path):
    with open(eval_metrics_path, "r") as f:
        eval_metrics = json.load(f)
else:
    eval_metrics = {}

for param in acceptable_params:
    num_heads = param["num_heads"]
    hidden_size = param["hidden_size"]
    embed_size = param["embed_size"]

    param_key = f"num_heads={num_heads}_hidden_size={hidden_size}_embed_size={embed_size}"

    if param_key in training_metrics:
        print(f"Skipping training for {param_key}, already exists.")
    else:
        print(f"Training model for {param_key}...")
        
        model = copy.deepcopy(base_model)

        prune_model(model, num_heads, int(hidden_size * embed_size), embed_size)

        remove_all_forward_hooks(model)
        torch.cuda.empty_cache()
        
        trainer = trainer_gpt2(model, tokenizer, tokenized_dataset, batch_size=4, num_epochs=2)
        trainer.evaluate()
        trainer.train()
        training_metrics[param_key] = trainer.log_metrics()

        with open(training_metrics_path, "w") as f:
            json.dump(training_metrics, f, indent=4)
        torch.cuda.empty_cache()
        
    if param_key in eval_metrics:
        print(f"Skipping evaluation for {param_key}, already exists.")
    else:
        print(f"Evaluating perplexity for {param_key}...")

        model = copy.deepcopy(base_model)
        eval_metrics[param_key] = evaluate_perplexity(model, tokenizer, stride=1024)

        with open(eval_metrics_path, "w") as f:
            json.dump(eval_metrics, f, indent=4)

        del model
        torch.cuda.empty_cache()

TypeError: 'GPT2TokenizerFast' object is not subscriptable