<a href="https://colab.research.google.com/github/peremartra/LLMOptCost/blob/main/PRUNING/pruning_structured_distilgpt2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Import Libraries & Support functions.

In [1]:
import torch
import torch.nn.utils.prune as prune
from transformers import AutoModelForCausalLM, AutoTokenizer

In [2]:
# Check for GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
def load_model_and_tokenizer(model_name):
    """ Load model and tokenizer from Hugging Face """
    model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    return model, tokenizer

In [4]:
def get_answer(prompt, model, tokenizer):
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = model.generate(**inputs, max_length=50)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

In [5]:
def count_parameters(model):
    """ Count total parameters in the model """
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


In [6]:
def compute_neuron_importance(linear_layer):
    weight = linear_layer.weight.data
    neuron_importance = torch.norm(weight, p=2, dim=1)
    return neuron_importance

In [7]:
def select_neurons_to_prune(neuron_importance, pruning_percentage):
    num_neurons = neuron_importance.size(0)
    num_prune = int(num_neurons * pruning_percentage)
    sorted_indices = torch.argsort(neuron_importance)
    neurons_to_prune = sorted_indices[:num_prune]
    return neurons_to_prune

In [8]:
def prune_linear_layer(layer, indices_to_prune, dim=0):
    device = layer.weight.device
    indices_to_keep = torch.tensor([idx for idx in range(layer.weight.size(dim))
                                    if idx not in set(indices_to_prune.tolist())], device=device)

    weight = layer.weight.data.index_select(dim, indices_to_keep)
    layer.weight = torch.nn.Parameter(weight)

    if layer.bias is not None and dim == 0:
        bias = layer.bias.data.index_select(0, indices_to_keep)
        layer.bias = torch.nn.Parameter(bias)

    if dim == 0:
        layer.out_features = weight.size(0)
    else:
        layer.in_features = weight.size(1)

In [16]:
def prune_mlp_neurons(mlp_layer, neurons_to_prune):
    prune_linear_layer(mlp_layer.c_fc, neurons_to_prune, dim=0)
    prune_linear_layer(mlp_layer.c_proj, neurons_to_prune, dim=1)

In [17]:
def prune_mlp_neurons_in_model(model, pruning_percentage):
    for layer in model.transformer.h:
        mlp_layer = layer.mlp
        neuron_importance = compute_neuron_importance(mlp_layer.c_fc)
        neurons_to_prune = select_neurons_to_prune(neuron_importance, pruning_percentage)
        prune_mlp_neurons(mlp_layer, neurons_to_prune)

    # Update the config to reflect the new sizes
    model.config.n_inner = model.transformer.h[0].mlp.c_fc.out_features


In [11]:
def prune_model(model, pruning_percentage=0.3):

    print(f"Total parameters before pruning: {count_parameters(model)}")

    prune_mlp_neurons_in_model(model, pruning_percentage)

    print(f"Total parameters after pruning: {count_parameters(model)}")


## Prune Model

In [12]:
# Load and prune the TinyLlama model
model_name = "distilgpt2"
""" Main function to prune model and test it """
model, tokenizer = load_model_and_tokenizer(model_name)

config.json:   0%|          | 0.00/762 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/353M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]



In [14]:
prompt="Paris is the capital of"
base_response = get_answer(prompt, model, tokenizer)
print(f"Pre-pruning response: {base_response}")

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Pre-pruning response: Paris is the capital of the United States.











































In [18]:
prune_model(model, 0.1)

Total parameters before pruning: 81912576
Total parameters after pruning: 79096632


In [19]:
base_response = get_answer(prompt, model, tokenizer)
print(f"Post-pruning response: {base_response}")

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


RuntimeError: mat1 and mat2 shapes cannot be multiplied (5x768 and 692x3072)

### Modify Configuration & test pruned model

## Save & Upload Model

In [27]:
# Save the quantized model
model.save_pretrained(
    "bloomz-560m-pruned-structured3",
)


In [28]:
tokenizer.save_pretrained("bloomz-560m-pruned-structured3")

('bloomz-560m-pruned-structured3/tokenizer_config.json',
 'bloomz-560m-pruned-structured3/special_tokens_map.json',
 'bloomz-560m-pruned-structured3/tokenizer.model',
 'bloomz-560m-pruned-structured3/added_tokens.json',
 'bloomz-560m-pruned-structured3/tokenizer.json')

In [29]:
model.push_to_hub("bloomz-560m-pruned-structured3",
                  private=True,
                  use_temp_dir=False)

README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/4.10G [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/oopere/bloomz-560m-pruned-structured3/commit/75144d4adb1fb84f426b814911a3b8ea648b398f', commit_message='Upload LlamaForCausalLM', commit_description='', oid='75144d4adb1fb84f426b814911a3b8ea648b398f', pr_url=None, pr_revision=None, pr_num=None)

In [30]:
tokenizer.push_to_hub("bloomz-560m-pruned-structured3",
                      private=False,
                      use_temp_dir=False)

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

CommitInfo(commit_url='https://huggingface.co/oopere/bloomz-560m-pruned-structured3/commit/ccffba2020fc56d9db636ea26afb1ba7c647007d', commit_message='Upload tokenizer', commit_description='', oid='ccffba2020fc56d9db636ea26afb1ba7c647007d', pr_url=None, pr_revision=None, pr_num=None)

# Load the pruned model from Hugging Face & Test.

In [31]:
download_model_pruned_name = "oopere/bloomz-560m-pruned-structured3"
model = AutoModelForCausalLM.from_pretrained(download_model_pruned_name)

config.json:   0%|          | 0.00/722 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/4.10G [00:00<?, ?B/s]

RuntimeError: Error(s) in loading state_dict for LlamaForCausalLM:
	size mismatch for model.layers.0.mlp.gate_proj.weight: copying a param with shape torch.Size([5069, 2048]) from checkpoint, the shape in current model is torch.Size([5632, 2048]).
	size mismatch for model.layers.0.mlp.up_proj.weight: copying a param with shape torch.Size([5069, 2048]) from checkpoint, the shape in current model is torch.Size([5632, 2048]).
	size mismatch for model.layers.0.mlp.down_proj.weight: copying a param with shape torch.Size([2048, 5069]) from checkpoint, the shape in current model is torch.Size([2048, 5632]).
	size mismatch for model.layers.1.mlp.gate_proj.weight: copying a param with shape torch.Size([5069, 2048]) from checkpoint, the shape in current model is torch.Size([5632, 2048]).
	size mismatch for model.layers.1.mlp.up_proj.weight: copying a param with shape torch.Size([5069, 2048]) from checkpoint, the shape in current model is torch.Size([5632, 2048]).
	size mismatch for model.layers.1.mlp.down_proj.weight: copying a param with shape torch.Size([2048, 5069]) from checkpoint, the shape in current model is torch.Size([2048, 5632]).
	size mismatch for model.layers.2.mlp.gate_proj.weight: copying a param with shape torch.Size([5069, 2048]) from checkpoint, the shape in current model is torch.Size([5632, 2048]).
	size mismatch for model.layers.2.mlp.up_proj.weight: copying a param with shape torch.Size([5069, 2048]) from checkpoint, the shape in current model is torch.Size([5632, 2048]).
	size mismatch for model.layers.2.mlp.down_proj.weight: copying a param with shape torch.Size([2048, 5069]) from checkpoint, the shape in current model is torch.Size([2048, 5632]).
	size mismatch for model.layers.3.mlp.gate_proj.weight: copying a param with shape torch.Size([5069, 2048]) from checkpoint, the shape in current model is torch.Size([5632, 2048]).
	size mismatch for model.layers.3.mlp.up_proj.weight: copying a param with shape torch.Size([5069, 2048]) from checkpoint, the shape in current model is torch.Size([5632, 2048]).
	size mismatch for model.layers.3.mlp.down_proj.weight: copying a param with shape torch.Size([2048, 5069]) from checkpoint, the shape in current model is torch.Size([2048, 5632]).
	size mismatch for model.layers.4.mlp.gate_proj.weight: copying a param with shape torch.Size([5069, 2048]) from checkpoint, the shape in current model is torch.Size([5632, 2048]).
	size mismatch for model.layers.4.mlp.up_proj.weight: copying a param with shape torch.Size([5069, 2048]) from checkpoint, the shape in current model is torch.Size([5632, 2048]).
	size mismatch for model.layers.4.mlp.down_proj.weight: copying a param with shape torch.Size([2048, 5069]) from checkpoint, the shape in current model is torch.Size([2048, 5632]).
	size mismatch for model.layers.5.mlp.gate_proj.weight: copying a param with shape torch.Size([5069, 2048]) from checkpoint, the shape in current model is torch.Size([5632, 2048]).
	size mismatch for model.layers.5.mlp.up_proj.weight: copying a param with shape torch.Size([5069, 2048]) from checkpoint, the shape in current model is torch.Size([5632, 2048]).
	size mismatch for model.layers.5.mlp.down_proj.weight: copying a param with shape torch.Size([2048, 5069]) from checkpoint, the shape in current model is torch.Size([2048, 5632]).
	size mismatch for model.layers.6.mlp.gate_proj.weight: copying a param with shape torch.Size([5069, 2048]) from checkpoint, the shape in current model is torch.Size([5632, 2048]).
	size mismatch for model.layers.6.mlp.up_proj.weight: copying a param with shape torch.Size([5069, 2048]) from checkpoint, the shape in current model is torch.Size([5632, 2048]).
	size mismatch for model.layers.6.mlp.down_proj.weight: copying a param with shape torch.Size([2048, 5069]) from checkpoint, the shape in current model is torch.Size([2048, 5632]).
	size mismatch for model.layers.7.mlp.gate_proj.weight: copying a param with shape torch.Size([5069, 2048]) from checkpoint, the shape in current model is torch.Size([5632, 2048]).
	size mismatch for model.layers.7.mlp.up_proj.weight: copying a param with shape torch.Size([5069, 2048]) from checkpoint, the shape in current model is torch.Size([5632, 2048]).
	size mismatch for model.layers.7.mlp.down_proj.weight: copying a param with shape torch.Size([2048, 5069]) from checkpoint, the shape in current model is torch.Size([2048, 5632]).
	size mismatch for model.layers.8.mlp.gate_proj.weight: copying a param with shape torch.Size([5069, 2048]) from checkpoint, the shape in current model is torch.Size([5632, 2048]).
	size mismatch for model.layers.8.mlp.up_proj.weight: copying a param with shape torch.Size([5069, 2048]) from checkpoint, the shape in current model is torch.Size([5632, 2048]).
	size mismatch for model.layers.8.mlp.down_proj.weight: copying a param with shape torch.Size([2048, 5069]) from checkpoint, the shape in current model is torch.Size([2048, 5632]).
	size mismatch for model.layers.9.mlp.gate_proj.weight: copying a param with shape torch.Size([5069, 2048]) from checkpoint, the shape in current model is torch.Size([5632, 2048]).
	size mismatch for model.layers.9.mlp.up_proj.weight: copying a param with shape torch.Size([5069, 2048]) from checkpoint, the shape in current model is torch.Size([5632, 2048]).
	size mismatch for model.layers.9.mlp.down_proj.weight: copying a param with shape torch.Size([2048, 5069]) from checkpoint, the shape in current model is torch.Size([2048, 5632]).
	size mismatch for model.layers.10.mlp.gate_proj.weight: copying a param with shape torch.Size([5069, 2048]) from checkpoint, the shape in current model is torch.Size([5632, 2048]).
	size mismatch for model.layers.10.mlp.up_proj.weight: copying a param with shape torch.Size([5069, 2048]) from checkpoint, the shape in current model is torch.Size([5632, 2048]).
	size mismatch for model.layers.10.mlp.down_proj.weight: copying a param with shape torch.Size([2048, 5069]) from checkpoint, the shape in current model is torch.Size([2048, 5632]).
	size mismatch for model.layers.11.mlp.gate_proj.weight: copying a param with shape torch.Size([5069, 2048]) from checkpoint, the shape in current model is torch.Size([5632, 2048]).
	size mismatch for model.layers.11.mlp.up_proj.weight: copying a param with shape torch.Size([5069, 2048]) from checkpoint, the shape in current model is torch.Size([5632, 2048]).
	size mismatch for model.layers.11.mlp.down_proj.weight: copying a param with shape torch.Size([2048, 5069]) from checkpoint, the shape in current model is torch.Size([2048, 5632]).
	size mismatch for model.layers.12.mlp.gate_proj.weight: copying a param with shape torch.Size([5069, 2048]) from checkpoint, the shape in current model is torch.Size([5632, 2048]).
	size mismatch for model.layers.12.mlp.up_proj.weight: copying a param with shape torch.Size([5069, 2048]) from checkpoint, the shape in current model is torch.Size([5632, 2048]).
	size mismatch for model.layers.12.mlp.down_proj.weight: copying a param with shape torch.Size([2048, 5069]) from checkpoint, the shape in current model is torch.Size([2048, 5632]).
	size mismatch for model.layers.13.mlp.gate_proj.weight: copying a param with shape torch.Size([5069, 2048]) from checkpoint, the shape in current model is torch.Size([5632, 2048]).
	size mismatch for model.layers.13.mlp.up_proj.weight: copying a param with shape torch.Size([5069, 2048]) from checkpoint, the shape in current model is torch.Size([5632, 2048]).
	size mismatch for model.layers.13.mlp.down_proj.weight: copying a param with shape torch.Size([2048, 5069]) from checkpoint, the shape in current model is torch.Size([2048, 5632]).
	size mismatch for model.layers.14.mlp.gate_proj.weight: copying a param with shape torch.Size([5069, 2048]) from checkpoint, the shape in current model is torch.Size([5632, 2048]).
	size mismatch for model.layers.14.mlp.up_proj.weight: copying a param with shape torch.Size([5069, 2048]) from checkpoint, the shape in current model is torch.Size([5632, 2048]).
	size mismatch for model.layers.14.mlp.down_proj.weight: copying a param with shape torch.Size([2048, 5069]) from checkpoint, the shape in current model is torch.Size([2048, 5632]).
	size mismatch for model.layers.15.mlp.gate_proj.weight: copying a param with shape torch.Size([5069, 2048]) from checkpoint, the shape in current model is torch.Size([5632, 2048]).
	size mismatch for model.layers.15.mlp.up_proj.weight: copying a param with shape torch.Size([5069, 2048]) from checkpoint, the shape in current model is torch.Size([5632, 2048]).
	size mismatch for model.layers.15.mlp.down_proj.weight: copying a param with shape torch.Size([2048, 5069]) from checkpoint, the shape in current model is torch.Size([2048, 5632]).
	size mismatch for model.layers.16.mlp.gate_proj.weight: copying a param with shape torch.Size([5069, 2048]) from checkpoint, the shape in current model is torch.Size([5632, 2048]).
	size mismatch for model.layers.16.mlp.up_proj.weight: copying a param with shape torch.Size([5069, 2048]) from checkpoint, the shape in current model is torch.Size([5632, 2048]).
	size mismatch for model.layers.16.mlp.down_proj.weight: copying a param with shape torch.Size([2048, 5069]) from checkpoint, the shape in current model is torch.Size([2048, 5632]).
	size mismatch for model.layers.17.mlp.gate_proj.weight: copying a param with shape torch.Size([5069, 2048]) from checkpoint, the shape in current model is torch.Size([5632, 2048]).
	size mismatch for model.layers.17.mlp.up_proj.weight: copying a param with shape torch.Size([5069, 2048]) from checkpoint, the shape in current model is torch.Size([5632, 2048]).
	size mismatch for model.layers.17.mlp.down_proj.weight: copying a param with shape torch.Size([2048, 5069]) from checkpoint, the shape in current model is torch.Size([2048, 5632]).
	size mismatch for model.layers.18.mlp.gate_proj.weight: copying a param with shape torch.Size([5069, 2048]) from checkpoint, the shape in current model is torch.Size([5632, 2048]).
	size mismatch for model.layers.18.mlp.up_proj.weight: copying a param with shape torch.Size([5069, 2048]) from checkpoint, the shape in current model is torch.Size([5632, 2048]).
	size mismatch for model.layers.18.mlp.down_proj.weight: copying a param with shape torch.Size([2048, 5069]) from checkpoint, the shape in current model is torch.Size([2048, 5632]).
	size mismatch for model.layers.19.mlp.gate_proj.weight: copying a param with shape torch.Size([5069, 2048]) from checkpoint, the shape in current model is torch.Size([5632, 2048]).
	size mismatch for model.layers.19.mlp.up_proj.weight: copying a param with shape torch.Size([5069, 2048]) from checkpoint, the shape in current model is torch.Size([5632, 2048]).
	size mismatch for model.layers.19.mlp.down_proj.weight: copying a param with shape torch.Size([2048, 5069]) from checkpoint, the shape in current model is torch.Size([2048, 5632]).
	size mismatch for model.layers.20.mlp.gate_proj.weight: copying a param with shape torch.Size([5069, 2048]) from checkpoint, the shape in current model is torch.Size([5632, 2048]).
	size mismatch for model.layers.20.mlp.up_proj.weight: copying a param with shape torch.Size([5069, 2048]) from checkpoint, the shape in current model is torch.Size([5632, 2048]).
	size mismatch for model.layers.20.mlp.down_proj.weight: copying a param with shape torch.Size([2048, 5069]) from checkpoint, the shape in current model is torch.Size([2048, 5632]).
	size mismatch for model.layers.21.mlp.gate_proj.weight: copying a param with shape torch.Size([5069, 2048]) from checkpoint, the shape in current model is torch.Size([5632, 2048]).
	size mismatch for model.layers.21.mlp.up_proj.weight: copying a param with shape torch.Size([5069, 2048]) from checkpoint, the shape in current model is torch.Size([5632, 2048]).
	size mismatch for model.layers.21.mlp.down_proj.weight: copying a param with shape torch.Size([2048, 5069]) from checkpoint, the shape in current model is torch.Size([2048, 5632]).
	You may consider adding `ignore_mismatched_sizes=True` in the model `from_pretrained` method.