In [3]:
import torch
import os
import matplotlib.pyplot as plt
import pandas as pd
from model_config import ModelConfig
from pruning_methods.wanda import wanda_pruning
from pruning_methods.magnitude import magnitude_pruning
from evaluation_pruning import global_evaluation, generate_text, count_parameters, calculate_ecological_impact
from data_loading import get_wikitext2
from dotenv import load_dotenv
from plot_functions import plot_metrics, compare_prompt, compare_ecological_impact, plot_metrics_vertical


In [None]:
load_dotenv()
token = os.getenv("HUGGINGFACE_TOKEN")
llama_model = "meta-llama/Llama-3.2-1B"

### Settings

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
ratios = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6]

### Magnitude Pruning - Facebook/OPT-350M

In [10]:
facebook_model_config = ModelConfig(model="facebook/opt-350m")
facebook_opt_model = facebook_model_config.load_llm()

trainloader , testloader = get_wikitext2(facebook_model_config.nsamples, facebook_model_config.seed, facebook_model_config.seqlen, facebook_model_config.tokenizer)

results = []
for ratio in ratios:
    print(f"Pruning ratio: {ratio}")
    tmp_model_config = facebook_model_config.copy_model()
    if ratio != 0:
        pruning_result = magnitude_pruning(tmp_model_config.model, ratio)
    
    result_eval = global_evaluation(tmp_model_config, ratio, trainloader=trainloader, testloader=testloader, is_structured=False, device=device)
    results.append(result_eval)
        
    print(count_parameters(tmp_model_config.model))

Loading model 'facebook/opt-350m' from cache directory '.my_cache/llm_weights/'...




In [None]:
ecological_impact = compare_ecological_impact(results, ratios)
display(ecological_impact)

prompt = compare_prompt(results, ratios)
display(prompt)

# Extract data for plotting
perplexity = [result["perplexity"] for result in results]
flops = [result["computational_complexity"]["total_flops"] for result in results]
model_size = [result["model_size"] for result in results]
plot_metrics_vertical(ratios, perplexity, flops, model_size)

## Magnitude Pruning

In [11]:

results = []
ratios = [0.1, 0.2, 0.3]

trainloader , testloader = get_wikitext2(modelConfig.nsamples, modelConfig.seed, modelConfig.seqlen, modelConfig.tokenizer)

for ratio in ratios:
    print(f"Pruning ratio: {ratio}")
    tmpModelConfig = modelConfig.copy_model()

    if ratio != 0:
        pruning_result = magnitude_pruning(tmpModelConfig.model, ratio)

    results.append(global_evaluation(tmpModelConfig, ratio, trainloader=trainloader, testloader=testloader, is_structured=False, device=device))

    print(count_parameters(tmpModelConfig.model))
    print(count_parameters(modelConfig.model))


ImportError: 
 requires the protobuf library but it was not found in your environment. Checkout the instructions on the
installation page of its repo: https://github.com/protocolbuffers/protobuf/tree/master/python#installation and follow the ones
that match your environment. Please note that you may need to restart your runtime after installation.


In [None]:
ecological_impact = compare_ecological_impact(results, ratios)
display(ecological_impact)

prompt = compare_prompt(results, ratios)
display(prompt)

perplexity =  [result["perplexity"] for result in results]
plot_metrics(perplexity, ratios, "perplexity")

flops =  [result["computational_complexity"]["total_flops"] for result in results]
plot_metrics(flops, ratios, "Flop")


model_size = [result["model_size"] for result in results]
plot_metrics(model_size, ratios, "Model Size")

## Wanda Pruning

In [None]:
# wanda_pruning(modelConfig)