In [1]:
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "7"

import gc

import pandas as pd
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

from fusion_bench.method.pruning.wanda_utils.eval import eval_ppl
from fusion_bench.models.modeling_losparse_llama import LoSparseLlamaForCausalLM

In [2]:
from fusion_bench.models.modeling_losparse_llama.modeling_losparse_llama import (
    LoSparseLinear,
    LoSparseLlamaForCausalLM,
)


def model_eval_ppl(model_path):
    gc.collect()
    torch.cuda.empty_cache()
    model = AutoModelForCausalLM.from_pretrained(
        model_path,
        torch_dtype=torch.float16,
        low_cpu_mem_usage=True,
        device_map="auto",
    )
    model.seqlen = model.config.max_position_embeddings
    tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=False)

    with torch.no_grad():
        result = eval_ppl(model, tokenizer)

    print(f"PPL for {model_path}: {result}")
    return result

## Dense

In [4]:
model_eval_ppl(
    "/data0/users/tanganke/data/huggingface_models/decapoda-research/llama-7b-hf"
)



Loading checkpoint shards:   0%|          | 0/33 [00:00<?, ?it/s]

We've detected an older driver with an RTX 4000 series GPU. These drivers have issues with P2P. This can affect the multi-gpu inference when using accelerate device_map.Please make sure to update your driver to the latest version which resolves this.
You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama.LlamaTokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 - if you loaded a llama tokenizer from a GGUF file you can ignore this message


evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0


We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)


sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/data/huggingface_models/decapoda-research/llama-7b-hf: 5.677204132080078


5.677204132080078

## Magnitude

In [4]:
ppl_data = {"sparsity_level": [], "ppl": []}
for sparsity_level in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]:
    ppl_data["sparsity_level"].append(sparsity_level)
    ppl_data["ppl"].append(
        model_eval_ppl(
            f"/data0/users/tanganke/projects/fusion_bench/outputs/llama/magnitude/unstructured/{sparsity_level}"
        )
    )
print(ppl_data)



Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0


We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)


sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/magnitude/unstructured/0.1: 5.803542613983154




Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/magnitude/unstructured/0.2: 6.017833709716797


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/magnitude/unstructured/0.3: 6.621606349945068


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/magnitude/unstructured/0.4: 8.041034698486328


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/magnitude/unstructured/0.5: 17.285242080688477


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/magnitude/unstructured/0.6: 152.3600616455078


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/magnitude/unstructured/0.7: 48427.84765625


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/magnitude/unstructured/0.8: 132127.296875
{'sparsity_level': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8], 'ppl': [5.803542613983154, 6.017833709716797, 6.621606349945068, 8.041034698486328, 17.285242080688477, 152.3600616455078, 48427.84765625, 132127.296875]}


In [6]:
pd.DataFrame(ppl_data)

Unnamed: 0,sparsity_level,ppl
0,0.1,5.803543
1,0.2,6.017834
2,0.3,6.621606
3,0.4,8.041035
4,0.5,17.285242
5,0.6,152.360062
6,0.7,48427.847656
7,0.8,132127.296875


In [10]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/magnitude/unstructured/0.45"
)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/magnitude/unstructured/0.45: 9.627829551696777


9.627829551696777

In [9]:
gc.collect()
torch.cuda.empty_cache()
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/magnitude/unstructured/0.5"
)



Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

We've detected an older driver with an RTX 4000 series GPU. These drivers have issues with P2P. This can affect the multi-gpu inference when using accelerate device_map.Please make sure to update your driver to the latest version which resolves this.


evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/magnitude/unstructured/0.5: 17.285242080688477


17.285242080688477

In [11]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/magnitude/unstructured/0.55"
)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/magnitude/unstructured/0.55: 24.706262588500977


24.706262588500977

In [12]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/magnitude/unstructured/0.65"
)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/magnitude/unstructured/0.65: 17000.6015625


17000.6015625

In [14]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/magnitude/unstructured/0.7"
)



Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

We've detected an older driver with an RTX 4000 series GPU. These drivers have issues with P2P. This can affect the multi-gpu inference when using accelerate device_map.Please make sure to update your driver to the latest version which resolves this.


evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/magnitude/unstructured/0.7: 48427.84765625


48427.84765625

In [3]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/magnitude/unstructured/0.8"
)



Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

We've detected an older driver with an RTX 4000 series GPU. These drivers have issues with P2P. This can affect the multi-gpu inference when using accelerate device_map.Please make sure to update your driver to the latest version which resolves this.


evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0


We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)


sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/magnitude/unstructured/0.8: 132127.296875


132127.296875

In [10]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/magnitude/semistructured/2_4"
)



Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

We've detected an older driver with an RTX 4000 series GPU. These drivers have issues with P2P. This can affect the multi-gpu inference when using accelerate device_map.Please make sure to update your driver to the latest version which resolves this.


evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/magnitude/semistructured/2_4: 42.534244537353516


42.534244537353516

In [12]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/magnitude/semistructured/4_8"
)



Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

We've detected an older driver with an RTX 4000 series GPU. These drivers have issues with P2P. This can affect the multi-gpu inference when using accelerate device_map.Please make sure to update your driver to the latest version which resolves this.
Using the latest cached version of the dataset since wikitext couldn't be found on the Hugging Face Hub
Found the latest cached dataset configuration 'wikitext-2-raw-v1' at /data0/users/tanganke/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/0.0.0/b08601e04326c79dfdd32d625aee71d232d685c3 (last modified on Wed Aug 28 13:14:33 2024).


evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/magnitude/semistructured/4_8: 16.829370498657227


16.829370498657227

## Wanda

In [8]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/wanda_pruning/unstructured/0.1"
)



Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/wanda_pruning/unstructured/0.1: 5.696322441101074


5.696322441101074

In [9]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/wanda_pruning/unstructured/0.2"
)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/wanda_pruning/unstructured/0.2: 5.817018032073975


5.817018032073975

In [5]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/wanda_pruning/unstructured/0.3"
)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)


nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/wanda_pruning/unstructured/0.3: 5.998795032501221


5.998795032501221

In [6]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/wanda_pruning/unstructured/0.4"
)



Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/wanda_pruning/unstructured/0.4: 6.386667251586914


6.386667251586914

In [4]:
gc.collect()
torch.cuda.empty_cache()
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/wanda_pruning/unstructured/0.5"
)



Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

We've detected an older driver with an RTX 4000 series GPU. These drivers have issues with P2P. This can affect the multi-gpu inference when using accelerate device_map.Please make sure to update your driver to the latest version which resolves this.


evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0


We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)


sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/wanda_pruning/unstructured/0.5: 7.257486343383789


7.257486343383789

In [10]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/wanda_pruning/unstructured/0.6"
)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)


nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/wanda_pruning/unstructured/0.6: 10.691720962524414


10.691720962524414

In [5]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/wanda_pruning/unstructured/0.7"
)



Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

We've detected an older driver with an RTX 4000 series GPU. These drivers have issues with P2P. This can affect the multi-gpu inference when using accelerate device_map.Please make sure to update your driver to the latest version which resolves this.


evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/wanda_pruning/unstructured/0.7: 84.69001770019531


84.69001770019531

In [None]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/wanda_pruning/unstructured/0.8"
)



Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

We've detected an older driver with an RTX 4000 series GPU. These drivers have issues with P2P. This can affect the multi-gpu inference when using accelerate device_map.Please make sure to update your driver to the latest version which resolves this.


evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/wanda_pruning/unstructured/0.8: 6239.4091796875


6239.4091796875

In [None]:
gc.collect()
torch.cuda.empty_cache()
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/wanda_pruning/semistructured/2_4"
)



Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

We've detected an older driver with an RTX 4000 series GPU. These drivers have issues with P2P. This can affect the multi-gpu inference when using accelerate device_map.Please make sure to update your driver to the latest version which resolves this.


evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0


We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)


sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/wanda_pruning/semistructured/2_4: 11.527883529663086


11.527883529663086

In [None]:
gc.collect()
torch.cuda.empty_cache()
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/wanda_pruning/semistructured/4_8"
)



Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

We've detected an older driver with an RTX 4000 series GPU. These drivers have issues with P2P. This can affect the multi-gpu inference when using accelerate device_map.Please make sure to update your driver to the latest version which resolves this.


evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0


We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)


sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/wanda_pruning/semistructured/4_8: 8.567511558532715


8.567511558532715

## Wanda w/ SVD

In [3]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/wanda/unstructured/0.5"
)



Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

We've detected an older driver with an RTX 4000 series GPU. These drivers have issues with P2P. This can affect the multi-gpu inference when using accelerate device_map.Please make sure to update your driver to the latest version which resolves this.


evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0


We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)


sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/wanda/unstructured/0.5: 7.086696624755859


7.086696624755859

In [11]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/wanda/unstructured/0.6"
)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/wanda/unstructured/0.6: 9.595812797546387


9.595812797546387

In [4]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/wanda/unstructured/0.7"
)



Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

We've detected an older driver with an RTX 4000 series GPU. These drivers have issues with P2P. This can affect the multi-gpu inference when using accelerate device_map.Please make sure to update your driver to the latest version which resolves this.


evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/wanda/unstructured/0.7: 35.647666931152344


35.647666931152344

In [5]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/wanda/unstructured/0.8"
)



Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

We've detected an older driver with an RTX 4000 series GPU. These drivers have issues with P2P. This can affect the multi-gpu inference when using accelerate device_map.Please make sure to update your driver to the latest version which resolves this.


evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/wanda/unstructured/0.8: 978.4933471679688


978.4933471679688

In [22]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/wanda/semistructured/2_4"
)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/wanda/semistructured/2_4: 10.475244522094727


10.475244522094727

In [23]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/wanda/semistructured/4_8"
)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/wanda/semistructured/4_8: 8.141329765319824


8.141329765319824

## Magnitude w/ SVD

In [7]:
ppl_data = {"sparsity_level": [], "ppl": []}
for sparsity_level in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]:
    ppl_data["sparsity_level"].append(sparsity_level)
    ppl_data["ppl"].append(
        model_eval_ppl(
            f"/data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/magnitude/unstructured/{sparsity_level}"
        )
    )
pd.DataFrame(ppl_data)



Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)


nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/magnitude/unstructured/0.1: 5.772339344024658


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/magnitude/unstructured/0.2: 5.920957565307617


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/magnitude/unstructured/0.3: 6.2547383308410645


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/magnitude/unstructured/0.4: 6.850766658782959


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/magnitude/unstructured/0.5: 8.063112258911133


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/magnitude/unstructured/0.6: 13.5851411819458


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/magnitude/unstructured/0.7: 286.7393493652344


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/magnitude/unstructured/0.8: 12499.7509765625


Unnamed: 0,sparsity_level,ppl
0,0.1,5.772339
1,0.2,5.920958
2,0.3,6.254738
3,0.4,6.850767
4,0.5,8.063112
5,0.6,13.585141
6,0.7,286.739349
7,0.8,12499.750977


In [13]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/magnitude/unstructured/0.55"
)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/magnitude/unstructured/0.55: 9.581928253173828


9.581928253173828

In [14]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/magnitude/unstructured/0.65"
)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/magnitude/unstructured/0.65: 33.92226791381836


33.92226791381836

In [3]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/magnitude/unstructured/0.5"
)



Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0


We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)


sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/magnitude/unstructured/0.5: 8.063112258911133


8.063112258911133

In [4]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/magnitude/semistructured/2_4"
)



Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/magnitude/semistructured/2_4: 12.724442481994629


12.724442481994629

In [6]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/magnitude/semistructured/4_8"
)



Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

We've detected an older driver with an RTX 4000 series GPU. These drivers have issues with P2P. This can affect the multi-gpu inference when using accelerate device_map.Please make sure to update your driver to the latest version which resolves this.


evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/magnitude/semistructured/4_8: 9.289239883422852


9.289239883422852

# Iterative Sparselo

## Magnitude

In [3]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/magnitude/unstructured/0.1"
)



Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0


We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)


sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/magnitude/unstructured/0.1: 5.72618293762207


5.72618293762207

In [4]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/magnitude/unstructured/0.2"
)



Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/magnitude/unstructured/0.2: 5.892498016357422


5.892498016357422

In [5]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/magnitude/unstructured/0.3"
)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/magnitude/unstructured/0.3: 6.216446399688721


6.216446399688721

In [9]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/magnitude/unstructured/0.4"
)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/magnitude/unstructured/0.4: 6.817346572875977


6.817346572875977

In [3]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/magnitude/unstructured/0.5"
)



Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

We've detected an older driver with an RTX 4000 series GPU. These drivers have issues with P2P. This can affect the multi-gpu inference when using accelerate device_map.Please make sure to update your driver to the latest version which resolves this.


evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0


We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)


sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/magnitude/unstructured/0.5: 7.974487781524658


7.974487781524658

In [7]:
model_eval_ppl_skip_lowrank(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/magnitude/unstructured/0.5"
)



Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0


We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)


sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/magnitude/unstructured/0.5: 702.6592407226562


702.6592407226562

In [3]:
model_eval_ppl_skip_sparse(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/magnitude/unstructured/0.5"
)



Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0


We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)


sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/magnitude/unstructured/0.5: 436568.6875


436568.6875

In [6]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/magnitude/unstructured/0.6"
)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/magnitude/unstructured/0.6: 12.137877464294434


12.137877464294434

In [4]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/magnitude/unstructured/0.7"
)



Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/magnitude/unstructured/0.7: 200.09254455566406


200.09254455566406

In [5]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/magnitude/unstructured/0.8"
)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/magnitude/unstructured/0.8: 14475.9248046875


14475.9248046875

In [3]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/magnitude/semistructured/2_4"
)



Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0


We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)


sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/magnitude/semistructured/2_4: 10.735949516296387


10.735949516296387

In [6]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/magnitude/semistructured/4_8"
)



Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

We've detected an older driver with an RTX 4000 series GPU. These drivers have issues with P2P. This can affect the multi-gpu inference when using accelerate device_map.Please make sure to update your driver to the latest version which resolves this.


evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/magnitude/semistructured/4_8: 8.859417915344238


8.859417915344238

## Wanda

In [3]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/wanda/unstructured/0.1"
)



Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0


We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)


sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/wanda/unstructured/0.1: 5.680875778198242


5.680875778198242

In [4]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/wanda/unstructured/0.2"
)



Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/wanda/unstructured/0.2: 5.75728178024292


5.75728178024292

In [3]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/wanda/unstructured/0.3"
)



Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0


We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)


sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/wanda/unstructured/0.3: 5.913062572479248


5.913062572479248

In [9]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/wanda/unstructured/0.4"
)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/wanda/unstructured/0.4: 6.231904029846191


6.231904029846191

In [4]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/wanda/unstructured/0.5"
)



Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/wanda/unstructured/0.5: 6.920602321624756


6.920602321624756

In [5]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/wanda/unstructured/0.6"
)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/wanda/unstructured/0.6: 8.972464561462402


8.972464561462402

In [6]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/wanda/unstructured/0.7"
)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/wanda/unstructured/0.7: 32.90485382080078


32.90485382080078

In [7]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/wanda/semistructured/2_4"
)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/wanda/semistructured/2_4: 9.1827392578125


9.1827392578125

In [8]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/wanda/semistructured/4_8"
)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/wanda/semistructured/4_8: 7.743643760681152


7.743643760681152

# PCP with mask

## Magnitude

In [5]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/magnitude/unstructured/0.1"
)



Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0


We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)


sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/magnitude/unstructured/0.1: 5.800900459289551


5.800900459289551

In [6]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/magnitude/unstructured/0.2"
)



Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/magnitude/unstructured/0.2: 6.002498149871826


6.002498149871826

In [7]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/magnitude/unstructured/0.3"
)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/magnitude/unstructured/0.3: 6.358213424682617


6.358213424682617

In [8]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/magnitude/unstructured/0.4"
)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/magnitude/unstructured/0.4: 7.0995635986328125


7.0995635986328125

In [12]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/magnitude/unstructured/0.5"
)



Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/magnitude/unstructured/0.5: 8.69730281829834


8.69730281829834

In [13]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/magnitude/unstructured/0.6"
)



Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/magnitude/unstructured/0.6: 16.671098709106445


16.671098709106445

In [14]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/magnitude/unstructured/0.7"
)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/magnitude/unstructured/0.7: 727.54296875


727.54296875

In [5]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/magnitude/semistructured/2_4"
)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/magnitude/semistructured/2_4: 16.622926712036133


16.622926712036133

In [4]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/magnitude/semistructured/4_8"
)



Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/magnitude/semistructured/4_8: 10.601716995239258


10.601716995239258

## Wanda

In [21]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/wanda/unstructured/0.1"
)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/wanda/unstructured/0.1: 5.691865921020508


5.691865921020508

In [20]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/wanda/unstructured/0.2"
)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/wanda/unstructured/0.2: 5.833505630493164


5.833505630493164

In [19]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/wanda/unstructured/0.3"
)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/wanda/unstructured/0.3: 6.017829418182373


6.017829418182373

In [18]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/wanda/unstructured/0.4"
)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/wanda/unstructured/0.4: 6.446828842163086


6.446828842163086

In [15]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/wanda/unstructured/0.5"
)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/wanda/unstructured/0.5: 7.278290271759033


7.278290271759033

In [16]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/wanda/unstructured/0.6"
)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/wanda/unstructured/0.6: 10.189253807067871


10.189253807067871

In [17]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/wanda/unstructured/0.7"
)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/wanda/unstructured/0.7: 47.11124038696289


47.11124038696289

In [10]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/wanda/semistructured/2_4"
)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/wanda/semistructured/2_4: 11.22415542602539


11.22415542602539

In [6]:
model_eval_ppl(
    "/data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/wanda/semistructured/4_8"
)



Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

evaluating on wikitext2


Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]

nsamples 166
sample 0
sample 50
sample 100
sample 150
PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/wanda/semistructured/4_8: 8.633923530578613


8.633923530578613