# Connectivity project

## Load packages

In [None]:
!git clone https://github.com/EleutherAI/lm-evaluation-harness --depth 1
%cd lm-evaluation-harness
%pip install -e .

In [None]:
!pip install accelerate

In [None]:
!pip install -i https://pypi.org/simple/ bitsandbytes

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import sys

import torch
import torch.nn as nn

import lm_eval
import transformers

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

## Download pre-trained model

In [None]:
import gc
torch.cuda.empty_cache()
gc.collect()

In [None]:
model_name = "EleutherAI/pythia-2.8b"
# model_name = "tiiuae/falcon-7b"
# model_name = "lmsys/vicuna-7b-v1.5"
# model_name = "EleutherAI/pythia-6.9b"

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    low_cpu_mem_usage=True,
    device_map="auto",
    load_in_8bit=True,
)
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [None]:
# save the model
model.save_pretrained('./local-example-llm')
tokenizer.save_pretrained("./local-example-llm/tokenizer/")

In [None]:
!ls ./local-example-llm

### Evaluate

In [None]:
# Specified models

!{sys.executable} -m lm_eval --model hf \
    --model_args pretrained=./local-example-llm,tokenizer=./local-example-llm/tokenizer,dtype="float16" \
    --tasks lambada_openai,hellaswag \
    --device cuda:0 \
    --batch_size auto:4

### Download and prepare "Vicuna" dataset
https://lmsys.org/blog/2023-03-30-vicuna/

In [None]:
!wget https://raw.githubusercontent.com/lm-sys/FastChat/main/fastchat/llm_judge/data/vicuna_bench/question.jsonl

In [None]:
import json

file_path = './question.jsonl'

with open(file_path, 'r') as json_file:
    json_list = list(json_file)

vicuna_dataset = {}

for json_str in json_list:
    result = json.loads(json_str)
    # turns, categories, question_id
    prompt = result['turns'][0]
    assert len(result['turns']) == 1
    category = result['category']
    if not category in vicuna_dataset:
        vicuna_dataset[category] = []
    vicuna_dataset[category].append(prompt)

In [None]:
print(vicuna_dataset.keys())

In [None]:
print(vicuna_dataset['math'][1])

### Example evaluation

In [None]:
input_text = vicuna_dataset['math'][1]

input_ids = tokenizer(input_text, return_tensors="pt")
out = model.generate(input_ids['input_ids'], max_new_tokens=100, pad_token_id=tokenizer.eos_token_id)
result = tokenizer.decode(out[0])

print(result)

## Example code for pruning

In [None]:
# Simple magnitude-based pruning method. Code taken from: https://github.com/locuslab/wanda

def find_layers(module, layers=[nn.Linear], name=''):
    """
    Recursively find the layers of a certain type in a module.

    Args:
        module (nn.Module): PyTorch module.
        layers (list): List of layer types to find.
        name (str): Name of the module.

    Returns:
        dict: Dictionary of layers of the given type(s) within the module.
    """
    if type(module) in layers:
        return {name: module}
    res = {}
    for name1, child in module.named_children():
        res.update(find_layers(
            child, layers=layers, name=name + '.' + name1 if name != '' else name1
        ))
    return res

def prune_magnitude(model, tokenizer, device=torch.device("cuda:0"), sparsity_ratio=0, prune_n=0, prune_m=0):
    if hasattr(model, 'base_model'):
      if hasattr(model.base_model, 'layers'):
          layers = model.base_model.layers
      else:
          layers = model.base_model._modules
    else:
      layers = model.model.layers

    for i in range(len(layers)):
        layer = layers[i]
        subset = find_layers(layer)

        for name in subset:
            W = subset[name].weight.data
            W_metric = torch.abs(W)
            if prune_n != 0:
                W_mask = (torch.zeros_like(W)==1)
                for ii in range(W_metric.shape[1]):
                    if ii % prune_m == 0:
                        tmp = W_metric[:,ii:(ii+prune_m)].float()
                        W_mask.scatter_(1,ii+torch.topk(tmp, prune_n,dim=1, largest=False)[1], True)
            else:
                thresh = torch.sort(W_metric.flatten().cuda())[0][int(W.numel()*sparsity_ratio)].cpu()
                W_mask = (W_metric<=thresh)

            W[W_mask] = 0

In [None]:
model.base_model.layers

In [None]:
model.base_model._modules

### Apply pruning, save the pruned model, evaluate it

In [None]:
prune_magnitude(model, tokenizer, sparsity_ratio=0.4)

In [None]:
!rm -rf ./local-example-pruned-llm
model.save_pretrained('./local-example-pruned-llm')
tokenizer.save_pretrained("./local-example-pruned-llm/tokenizer/")

In [None]:
import sys
!{sys.executable} -m lm_eval --model hf \
    --model_args pretrained=./local-example-pruned-llm,tokenizer=./local-example-pruned-llm/tokenizer \
    --tasks lambada_openai,hellaswag \
    --device cuda:0 \
    --batch_size auto:4

In [None]:
input_text = vicuna_dataset['math'][1]

input_ids = tokenizer(input_text, return_tensors="pt")
out = model.generate(input_ids['input_ids'], max_new_tokens=100, pad_token_id=tokenizer.eos_token_id)
result = tokenizer.decode(out[0])

print(result)