<a href="https://colab.research.google.com/github/peremartra/LLMOptCost/blob/main/PRUNING/pruning_structured_gpt2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Import Libraries & Support functions.

In [7]:
import torch
import torch.nn.utils.prune as prune
from transformers import AutoModelForCausalLM, AutoTokenizer
import matplotlib.pyplot as plt

In [8]:
# Load  Hugging Face model
base_model_name = "gpt2"
pruned_model_name = "gpt2-uncased-pruned"

In [9]:
model = AutoModelForCausalLM.from_pretrained(base_model_name)

In [10]:
print(model.config)

GPT2Config {
  "_name_or_path": "gpt2",
  "activation_function": "gelu_new",
  "architectures": [
    "GPT2LMHeadModel"
  ],
  "attn_pdrop": 0.1,
  "bos_token_id": 50256,
  "embd_pdrop": 0.1,
  "eos_token_id": 50256,
  "initializer_range": 0.02,
  "layer_norm_epsilon": 1e-05,
  "model_type": "gpt2",
  "n_ctx": 1024,
  "n_embd": 768,
  "n_head": 12,
  "n_inner": null,
  "n_layer": 12,
  "n_positions": 1024,
  "reorder_and_upcast_attn": false,
  "resid_pdrop": 0.1,
  "scale_attn_by_inverse_layer_idx": false,
  "scale_attn_weights": true,
  "summary_activation": null,
  "summary_first_dropout": 0.1,
  "summary_proj_to_labels": true,
  "summary_type": "cls_index",
  "summary_use_proj": true,
  "task_specific_params": {
    "text-generation": {
      "do_sample": true,
      "max_length": 50
    }
  },
  "transformers_version": "4.44.2",
  "use_cache": true,
  "vocab_size": 50257
}



In [11]:
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model_name)

In [12]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [13]:
def compute_neuron_importance(mlp_layer):
    # Access the first linear layer in the MLP
    fc_in = mlp_layer.c_fc
    weight = fc_in.weight.data  # Shape: (intermediate_size, hidden_size)

    # Compute the L2 norm of each neuron (over input features)
    neuron_importance = torch.norm(weight, p=2, dim=1)  # Shape: (intermediate_size,)

    return neuron_importance

In [14]:
def select_neurons_to_prune(neuron_importance, pruning_percentage):
    num_neurons = neuron_importance.size(0)
    num_prune = int(num_neurons * pruning_percentage)

    # Get indices of neurons sorted by importance
    sorted_indices = torch.argsort(neuron_importance)

    # Select indices to prune (least important neurons)
    neurons_to_prune = sorted_indices[:num_prune]

    return neurons_to_prune

In [15]:
def prune_mlp_neurons(mlp_layer, neurons_to_prune):
    # Access the MLP's linear layers
    fc_in = mlp_layer.c_fc
    fc_out = mlp_layer.c_proj

    # Prune neurons in fc_in (input to MLP)
    prune_linear_layer(fc_in, neurons_to_prune, dim=0)

    # Prune corresponding neurons in fc_out (output of MLP)
    prune_linear_layer(fc_out, neurons_to_prune, dim=1)

In [16]:
def prune_linear_layer(layer, indices_to_prune, dim=0):
    """
    Prune specified indices from a linear layer along the given dimension.
    """
    # Convert indices to keep
    indices_to_keep = torch.tensor([
        idx for idx in range(layer.weight.size(dim))
        if idx not in set(indices_to_prune.tolist())
    ])

    # Prune weights
    weight = layer.weight.data.index_select(dim, indices_to_keep)
    layer.weight = torch.nn.Parameter(weight)

    # Prune biases if necessary
    if layer.bias is not None:
        if dim == 0:
            bias = layer.bias.data.index_select(0, indices_to_keep)
            layer.bias = torch.nn.Parameter(bias)

    # Update layer dimensions
    if dim == 0:
        layer.out_features = weight.size(0)
    else:
        layer.in_features = weight.size(1)

In [17]:
def prune_mlp_neurons_in_model(model, pruning_percentage):
    num_layers = model.config.n_layer

    for layer_idx in range(num_layers):
        # Access the MLP layer
        mlp_layer = model.transformer.h[layer_idx].mlp

        # Compute neuron importance
        neuron_importance = compute_neuron_importance(mlp_layer)

        # Select neurons to prune
        neurons_to_prune = select_neurons_to_prune(neuron_importance, pruning_percentage)

        # Prune neurons
        prune_mlp_neurons(mlp_layer, neurons_to_prune)

    # After pruning, update n_inner in the config
    new_intermediate_size = model.transformer.h[0].mlp.c_fc.out_features
    model.config.n_inner = new_intermediate_size


In [18]:
# Before pruning
total_params_before = count_parameters(model)
print(f"Total parameters before pruning: {total_params_before}")

Total parameters before pruning: 124439808


In [20]:
prompt="Paris is a"
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
base_output = model.generate(**inputs)
base_response = tokenizer.decode(base_output[0], skip_special_tokens=True)
print(base_response)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Paris is a city of people, of people, of people. It's a place where people come


## Prune Model

In [22]:
# Define the pruning percentage (e.g., 20% pruning)
pruning_percentage = 0.3  # Adjust as needed

# Prune MLP neurons in the model
prune_mlp_neurons_in_model(model, pruning_percentage)

### Modify Configuration & test pruned model

In [23]:
# After pruning
total_params_after = count_parameters(model)
print(f"Total parameters after pruning: {total_params_after}")

# Calculate the number of parameters removed
params_removed = total_params_before - total_params_after
print(f"Number of parameters removed: {params_removed}")

# Calculate percentage reduction
percent_reduction = 100.0 * params_removed / total_params_before
print(f"Percentage reduction in parameters: {percent_reduction:.2f}%")


Total parameters after pruning: 107451960
Number of parameters removed: 16987848
Percentage reduction in parameters: 13.65%


In [24]:
prompt="Tell me a joke"
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
base_output = model.generate(**inputs)
base_response = tokenizer.decode(base_output[0], skip_special_tokens=True)
print(base_response)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


RuntimeError: mat1 and mat2 shapes cannot be multiplied (4x768 and 538x3072)

## Save & Upload Model

In [None]:
# Save the quantized model
model.save_pretrained(
    "bloomz-560m-pruned-structured3",
)


In [None]:
tokenizer.save_pretrained("bloomz-560m-pruned-structured3")

('bloomz-560m-pruned-structured3/tokenizer_config.json',
 'bloomz-560m-pruned-structured3/special_tokens_map.json',
 'bloomz-560m-pruned-structured3/tokenizer.json')

In [None]:
model.push_to_hub("bloomz-560m-pruned-structured3",
                  private=True,
                  use_temp_dir=False)

model.safetensors:   0%|          | 0.00/2.00G [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/oopere/bloomz-560m-pruned-structured3/commit/a021ec286c6125e5b8750f4fc001891940ac8503', commit_message='Upload BloomForCausalLM', commit_description='', oid='a021ec286c6125e5b8750f4fc001891940ac8503', pr_url=None, pr_revision=None, pr_num=None)

In [None]:
tokenizer.push_to_hub("bloomz-560m-pruned-structured3",
                      private=False,
                      use_temp_dir=False)

README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/14.5M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/oopere/bloomz-560m-pruned-structured3/commit/30657e1a9c83e8978d070bb686f794a0018e9dd4', commit_message='Upload tokenizer', commit_description='', oid='30657e1a9c83e8978d070bb686f794a0018e9dd4', pr_url=None, pr_revision=None, pr_num=None)

# Load the pruned model from Hugging Face & Test.

In [None]:
download_model_pruned_name = "oopere/bloomz-560m-pruned-structured3"
model = AutoModelForCausalLM.from_pretrained(download_model_pruned_name)

config.json:   0%|          | 0.00/836 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.00G [00:00<?, ?B/s]

RuntimeError: Error(s) in loading state_dict for BloomForCausalLM:
	size mismatch for transformer.h.0.mlp.dense_h_to_4h.weight: copying a param with shape torch.Size([2868, 1024]) from checkpoint, the shape in current model is torch.Size([4096, 1024]).
	size mismatch for transformer.h.0.mlp.dense_h_to_4h.bias: copying a param with shape torch.Size([2868]) from checkpoint, the shape in current model is torch.Size([4096]).
	size mismatch for transformer.h.0.mlp.dense_4h_to_h.weight: copying a param with shape torch.Size([1024, 2868]) from checkpoint, the shape in current model is torch.Size([1024, 4096]).
	size mismatch for transformer.h.1.mlp.dense_h_to_4h.weight: copying a param with shape torch.Size([2868, 1024]) from checkpoint, the shape in current model is torch.Size([4096, 1024]).
	size mismatch for transformer.h.1.mlp.dense_h_to_4h.bias: copying a param with shape torch.Size([2868]) from checkpoint, the shape in current model is torch.Size([4096]).
	size mismatch for transformer.h.1.mlp.dense_4h_to_h.weight: copying a param with shape torch.Size([1024, 2868]) from checkpoint, the shape in current model is torch.Size([1024, 4096]).
	size mismatch for transformer.h.2.mlp.dense_h_to_4h.weight: copying a param with shape torch.Size([2868, 1024]) from checkpoint, the shape in current model is torch.Size([4096, 1024]).
	size mismatch for transformer.h.2.mlp.dense_h_to_4h.bias: copying a param with shape torch.Size([2868]) from checkpoint, the shape in current model is torch.Size([4096]).
	size mismatch for transformer.h.2.mlp.dense_4h_to_h.weight: copying a param with shape torch.Size([1024, 2868]) from checkpoint, the shape in current model is torch.Size([1024, 4096]).
	size mismatch for transformer.h.3.mlp.dense_h_to_4h.weight: copying a param with shape torch.Size([2868, 1024]) from checkpoint, the shape in current model is torch.Size([4096, 1024]).
	size mismatch for transformer.h.3.mlp.dense_h_to_4h.bias: copying a param with shape torch.Size([2868]) from checkpoint, the shape in current model is torch.Size([4096]).
	size mismatch for transformer.h.3.mlp.dense_4h_to_h.weight: copying a param with shape torch.Size([1024, 2868]) from checkpoint, the shape in current model is torch.Size([1024, 4096]).
	size mismatch for transformer.h.4.mlp.dense_h_to_4h.weight: copying a param with shape torch.Size([2868, 1024]) from checkpoint, the shape in current model is torch.Size([4096, 1024]).
	size mismatch for transformer.h.4.mlp.dense_h_to_4h.bias: copying a param with shape torch.Size([2868]) from checkpoint, the shape in current model is torch.Size([4096]).
	size mismatch for transformer.h.4.mlp.dense_4h_to_h.weight: copying a param with shape torch.Size([1024, 2868]) from checkpoint, the shape in current model is torch.Size([1024, 4096]).
	size mismatch for transformer.h.5.mlp.dense_h_to_4h.weight: copying a param with shape torch.Size([2868, 1024]) from checkpoint, the shape in current model is torch.Size([4096, 1024]).
	size mismatch for transformer.h.5.mlp.dense_h_to_4h.bias: copying a param with shape torch.Size([2868]) from checkpoint, the shape in current model is torch.Size([4096]).
	size mismatch for transformer.h.5.mlp.dense_4h_to_h.weight: copying a param with shape torch.Size([1024, 2868]) from checkpoint, the shape in current model is torch.Size([1024, 4096]).
	size mismatch for transformer.h.6.mlp.dense_h_to_4h.weight: copying a param with shape torch.Size([2868, 1024]) from checkpoint, the shape in current model is torch.Size([4096, 1024]).
	size mismatch for transformer.h.6.mlp.dense_h_to_4h.bias: copying a param with shape torch.Size([2868]) from checkpoint, the shape in current model is torch.Size([4096]).
	size mismatch for transformer.h.6.mlp.dense_4h_to_h.weight: copying a param with shape torch.Size([1024, 2868]) from checkpoint, the shape in current model is torch.Size([1024, 4096]).
	size mismatch for transformer.h.7.mlp.dense_h_to_4h.weight: copying a param with shape torch.Size([2868, 1024]) from checkpoint, the shape in current model is torch.Size([4096, 1024]).
	size mismatch for transformer.h.7.mlp.dense_h_to_4h.bias: copying a param with shape torch.Size([2868]) from checkpoint, the shape in current model is torch.Size([4096]).
	size mismatch for transformer.h.7.mlp.dense_4h_to_h.weight: copying a param with shape torch.Size([1024, 2868]) from checkpoint, the shape in current model is torch.Size([1024, 4096]).
	size mismatch for transformer.h.8.mlp.dense_h_to_4h.weight: copying a param with shape torch.Size([2868, 1024]) from checkpoint, the shape in current model is torch.Size([4096, 1024]).
	size mismatch for transformer.h.8.mlp.dense_h_to_4h.bias: copying a param with shape torch.Size([2868]) from checkpoint, the shape in current model is torch.Size([4096]).
	size mismatch for transformer.h.8.mlp.dense_4h_to_h.weight: copying a param with shape torch.Size([1024, 2868]) from checkpoint, the shape in current model is torch.Size([1024, 4096]).
	size mismatch for transformer.h.9.mlp.dense_h_to_4h.weight: copying a param with shape torch.Size([2868, 1024]) from checkpoint, the shape in current model is torch.Size([4096, 1024]).
	size mismatch for transformer.h.9.mlp.dense_h_to_4h.bias: copying a param with shape torch.Size([2868]) from checkpoint, the shape in current model is torch.Size([4096]).
	size mismatch for transformer.h.9.mlp.dense_4h_to_h.weight: copying a param with shape torch.Size([1024, 2868]) from checkpoint, the shape in current model is torch.Size([1024, 4096]).
	size mismatch for transformer.h.10.mlp.dense_h_to_4h.weight: copying a param with shape torch.Size([2868, 1024]) from checkpoint, the shape in current model is torch.Size([4096, 1024]).
	size mismatch for transformer.h.10.mlp.dense_h_to_4h.bias: copying a param with shape torch.Size([2868]) from checkpoint, the shape in current model is torch.Size([4096]).
	size mismatch for transformer.h.10.mlp.dense_4h_to_h.weight: copying a param with shape torch.Size([1024, 2868]) from checkpoint, the shape in current model is torch.Size([1024, 4096]).
	size mismatch for transformer.h.11.mlp.dense_h_to_4h.weight: copying a param with shape torch.Size([2868, 1024]) from checkpoint, the shape in current model is torch.Size([4096, 1024]).
	size mismatch for transformer.h.11.mlp.dense_h_to_4h.bias: copying a param with shape torch.Size([2868]) from checkpoint, the shape in current model is torch.Size([4096]).
	size mismatch for transformer.h.11.mlp.dense_4h_to_h.weight: copying a param with shape torch.Size([1024, 2868]) from checkpoint, the shape in current model is torch.Size([1024, 4096]).
	size mismatch for transformer.h.12.mlp.dense_h_to_4h.weight: copying a param with shape torch.Size([2868, 1024]) from checkpoint, the shape in current model is torch.Size([4096, 1024]).
	size mismatch for transformer.h.12.mlp.dense_h_to_4h.bias: copying a param with shape torch.Size([2868]) from checkpoint, the shape in current model is torch.Size([4096]).
	size mismatch for transformer.h.12.mlp.dense_4h_to_h.weight: copying a param with shape torch.Size([1024, 2868]) from checkpoint, the shape in current model is torch.Size([1024, 4096]).
	size mismatch for transformer.h.13.mlp.dense_h_to_4h.weight: copying a param with shape torch.Size([2868, 1024]) from checkpoint, the shape in current model is torch.Size([4096, 1024]).
	size mismatch for transformer.h.13.mlp.dense_h_to_4h.bias: copying a param with shape torch.Size([2868]) from checkpoint, the shape in current model is torch.Size([4096]).
	size mismatch for transformer.h.13.mlp.dense_4h_to_h.weight: copying a param with shape torch.Size([1024, 2868]) from checkpoint, the shape in current model is torch.Size([1024, 4096]).
	size mismatch for transformer.h.14.mlp.dense_h_to_4h.weight: copying a param with shape torch.Size([2868, 1024]) from checkpoint, the shape in current model is torch.Size([4096, 1024]).
	size mismatch for transformer.h.14.mlp.dense_h_to_4h.bias: copying a param with shape torch.Size([2868]) from checkpoint, the shape in current model is torch.Size([4096]).
	size mismatch for transformer.h.14.mlp.dense_4h_to_h.weight: copying a param with shape torch.Size([1024, 2868]) from checkpoint, the shape in current model is torch.Size([1024, 4096]).
	size mismatch for transformer.h.15.mlp.dense_h_to_4h.weight: copying a param with shape torch.Size([2868, 1024]) from checkpoint, the shape in current model is torch.Size([4096, 1024]).
	size mismatch for transformer.h.15.mlp.dense_h_to_4h.bias: copying a param with shape torch.Size([2868]) from checkpoint, the shape in current model is torch.Size([4096]).
	size mismatch for transformer.h.15.mlp.dense_4h_to_h.weight: copying a param with shape torch.Size([1024, 2868]) from checkpoint, the shape in current model is torch.Size([1024, 4096]).
	size mismatch for transformer.h.16.mlp.dense_h_to_4h.weight: copying a param with shape torch.Size([2868, 1024]) from checkpoint, the shape in current model is torch.Size([4096, 1024]).
	size mismatch for transformer.h.16.mlp.dense_h_to_4h.bias: copying a param with shape torch.Size([2868]) from checkpoint, the shape in current model is torch.Size([4096]).
	size mismatch for transformer.h.16.mlp.dense_4h_to_h.weight: copying a param with shape torch.Size([1024, 2868]) from checkpoint, the shape in current model is torch.Size([1024, 4096]).
	size mismatch for transformer.h.17.mlp.dense_h_to_4h.weight: copying a param with shape torch.Size([2868, 1024]) from checkpoint, the shape in current model is torch.Size([4096, 1024]).
	size mismatch for transformer.h.17.mlp.dense_h_to_4h.bias: copying a param with shape torch.Size([2868]) from checkpoint, the shape in current model is torch.Size([4096]).
	size mismatch for transformer.h.17.mlp.dense_4h_to_h.weight: copying a param with shape torch.Size([1024, 2868]) from checkpoint, the shape in current model is torch.Size([1024, 4096]).
	size mismatch for transformer.h.18.mlp.dense_h_to_4h.weight: copying a param with shape torch.Size([2868, 1024]) from checkpoint, the shape in current model is torch.Size([4096, 1024]).
	size mismatch for transformer.h.18.mlp.dense_h_to_4h.bias: copying a param with shape torch.Size([2868]) from checkpoint, the shape in current model is torch.Size([4096]).
	size mismatch for transformer.h.18.mlp.dense_4h_to_h.weight: copying a param with shape torch.Size([1024, 2868]) from checkpoint, the shape in current model is torch.Size([1024, 4096]).
	size mismatch for transformer.h.19.mlp.dense_h_to_4h.weight: copying a param with shape torch.Size([2868, 1024]) from checkpoint, the shape in current model is torch.Size([4096, 1024]).
	size mismatch for transformer.h.19.mlp.dense_h_to_4h.bias: copying a param with shape torch.Size([2868]) from checkpoint, the shape in current model is torch.Size([4096]).
	size mismatch for transformer.h.19.mlp.dense_4h_to_h.weight: copying a param with shape torch.Size([1024, 2868]) from checkpoint, the shape in current model is torch.Size([1024, 4096]).
	size mismatch for transformer.h.20.mlp.dense_h_to_4h.weight: copying a param with shape torch.Size([2868, 1024]) from checkpoint, the shape in current model is torch.Size([4096, 1024]).
	size mismatch for transformer.h.20.mlp.dense_h_to_4h.bias: copying a param with shape torch.Size([2868]) from checkpoint, the shape in current model is torch.Size([4096]).
	size mismatch for transformer.h.20.mlp.dense_4h_to_h.weight: copying a param with shape torch.Size([1024, 2868]) from checkpoint, the shape in current model is torch.Size([1024, 4096]).
	size mismatch for transformer.h.21.mlp.dense_h_to_4h.weight: copying a param with shape torch.Size([2868, 1024]) from checkpoint, the shape in current model is torch.Size([4096, 1024]).
	size mismatch for transformer.h.21.mlp.dense_h_to_4h.bias: copying a param with shape torch.Size([2868]) from checkpoint, the shape in current model is torch.Size([4096]).
	size mismatch for transformer.h.21.mlp.dense_4h_to_h.weight: copying a param with shape torch.Size([1024, 2868]) from checkpoint, the shape in current model is torch.Size([1024, 4096]).
	size mismatch for transformer.h.22.mlp.dense_h_to_4h.weight: copying a param with shape torch.Size([2868, 1024]) from checkpoint, the shape in current model is torch.Size([4096, 1024]).
	size mismatch for transformer.h.22.mlp.dense_h_to_4h.bias: copying a param with shape torch.Size([2868]) from checkpoint, the shape in current model is torch.Size([4096]).
	size mismatch for transformer.h.22.mlp.dense_4h_to_h.weight: copying a param with shape torch.Size([1024, 2868]) from checkpoint, the shape in current model is torch.Size([1024, 4096]).
	size mismatch for transformer.h.23.mlp.dense_h_to_4h.weight: copying a param with shape torch.Size([2868, 1024]) from checkpoint, the shape in current model is torch.Size([4096, 1024]).
	size mismatch for transformer.h.23.mlp.dense_h_to_4h.bias: copying a param with shape torch.Size([2868]) from checkpoint, the shape in current model is torch.Size([4096]).
	size mismatch for transformer.h.23.mlp.dense_4h_to_h.weight: copying a param with shape torch.Size([1024, 2868]) from checkpoint, the shape in current model is torch.Size([1024, 4096]).
	You may consider adding `ignore_mismatched_sizes=True` in the model `from_pretrained` method.