In [1]:
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [2]:

import torch
import transformers
from utils import *

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
huggingface_models = {"base": "bert-base-uncased",
                        "large": "bert-large-uncased",
                      "base-ft-sst2": "yoshitomo-matsubara/bert-base-uncased-sst2",
                        "large-ft-sst2": "yoshitomo-matsubara/bert-large-uncased-sst2",
                      "base-ft-stsb": "gchhablani/bert-base-cased-finetuned-stsb",
                        "large-ft-stsb": "yoshitomo-matsubara/bert-large-uncased-stsb",
                      "base-ft-mrpc": "textattack/bert-base-uncased-MRPC",
                        "large-ft-mrpc": "yoshitomo-matsubara/bert-large-uncased-mrpc",
                      "base-ft-cola": "yoshitomo-matsubara/bert-base-uncased-cola",
                        "large-ft-cola": "yoshitomo-matsubara/bert-large-uncased-cola",
                      "base-ft-qnli": "gchhablani/bert-base-cased-finetuned-qnli",
                        "large-ft-qnli": "yoshitomo-matsubara/bert-large-uncased-qnli",
                      "base-ft-mnli": "yoshitomo-matsubara/bert-base-uncased-mnli",
                        "large-ft-mnli": "yoshitomo-matsubara/bert-large-uncased-mnli",
                      "base-ft-rte": "anirudh21/bert-base-uncased-finetuned-rte",
                        "large-ft-rte": "yoshitomo-matsubara/bert-large-uncased-rte",
                      "base-ft-qqp": "A-bhimany-u08/bert-base-cased-qqp",
                        "large-ft-qqp": "yoshitomo-matsubara/bert-large-uncased-qqp",
                      "base-ft-wnli": "gchhablani/bert-base-cased-finetuned-wnli",
                        "large-ft-wnli": "yoshitomo-matsubara/bert-large-uncased-wnli",
                      }

In [4]:
model_base, tokenizer = get_classification_bert_model(pre_trained_model_name=huggingface_models["base-ft-mrpc"])

In [5]:
train_dataloader, validation_dataloader, dataset = get_glue_task_dataset('mrpc', tokenizer)

In [6]:
model_base.load_state_dict(torch.load("/scr/models/LC/models_archive/Bert/base-ft-mrpc/orig_mrpc.pth"))

<All keys matched successfully>

In [17]:
standard_evaluate(model_base, dataset["validation"], "mrpc")

{'eval_loss': 0.7286306023597717,
 'eval_accuracy': 0.8308823529411765,
 'eval_f1': 0.8848080133555927,
 'eval_combined_score': 0.8578451831483846,
 'eval_runtime': 3.2495,
 'eval_samples_per_second': 125.558,
 'eval_steps_per_second': 15.695}

In [13]:
model_dynamic_quantized = torch.quantization.quantize_dynamic(
    model_base, qconfig_spec={torch.nn.Linear}, dtype=torch.qint8
)

In [None]:
basic_evaluate(model_dynamic_quantized.to("cpu"), validation_dataloader, metric=clf_metrics, device="cpu")

100%|██████████| 2/2 [01:07<00:00, 33.68s/it]


{'accuracy': 0.8308823529411765,
 'f1': 0.8855721393034827,
 'precision': 0.8240740740740741,
 'recall': 0.956989247311828}

In [9]:
def print_model_size(mdl):
    torch.save(mdl.state_dict(), "tmp.pt")
    print("%.2f MB" %(os.path.getsize("tmp.pt")/1e6))
    os.remove('tmp.pt')

In [10]:
print_model_size(model_base)

438.00 MB


In [11]:
print_model_size(model_dynamic_quantized)

181.48 MB


In [17]:
from torch.nn.utils.prune import random_unstructured

In [None]:
def prune(model):
    for name, module in model.named_modules():
        # prune 20% of connections in all 2D-conv layers
        if isinstance(module, torch.nn.Conv2d):
            prune.l1_unstructured(module, name='weight', amount=0.2)
        # prune 40% of connections in all linear layers
        elif isinstance(module, torch.nn.Linear):
            prune.l1_unstructured(module, name='weight', amount=0.4)