In [1]:
# Import the required libraries
import os
import torch
import numpy as np
import transformers
from transformers import AutoModelForSequenceClassification, GlueDataTrainingArguments, AutoTokenizer
import torchmetrics
import datasets
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
from tqdm import tqdm

from apx import ApproxEmbed, ApxSVD, train_apx
from glue_score import GLUE_TASKS, make_model, Glue, get_dataloaders, validate, get_metrics

  from .autonotebook import tqdm as notebook_tqdm


In [13]:
transformers.utils.logging.set_verbosity_error()
device = "cuda:0" if torch.cuda.is_available() else "cpu"

tokenizer = AutoTokenizer.from_pretrained('prajjwal1/bert-tiny')

epochs=3
batch_size=32

lr=5e-5
levels=8
channels=8
bits=8
neurons=64
nn_levels=2
apx_epochs=5000
apx_batch_size=2**14
checkpoint_every=100

save_path = 'results/compression_finetuned/'

norms = [1,1.25,1.5,1.75,2]

In [29]:
RESULTS={}
for task in list(GLUE_TASKS.keys()):
    
    print(task)
    args=GLUE_TASKS[task]
    model=make_model('prajjwal1/bert-tiny',args)
    
    train_dataloader, val_dataloader = get_dataloaders(args, task, batch_size)
    metrics = get_metrics(args)

    Glue(model, tokenizer, task, args, epochs=epochs, steps_validate=0.2, train_dataloader=train_dataloader, val_dataloader=val_dataloader);

    embeddings = model.bert.embeddings.word_embeddings.weight.to(device)

    results={}

    val_metrics = validate(model, tokenizer, val_dataloader, metrics, args)
    results['Original']=[m.item() for m in val_metrics]
    print('Original', val_metrics)

    for norm in norms:

        # clone the model to not modify the original
        apx_model = make_model('prajjwal1/bert-tiny',args)
        apx_model.load_state_dict(model.state_dict())

        apx = ApproxEmbed(levels = levels, 
                feature_dim = channels,
                num_words = embeddings.shape[0],
                output_dims = embeddings.shape[1],
                feature_std = 0.1,
                feature_bias = 0.0,
                codebook_bitwidth=bits,
                neurons = neurons,
                nn_levels=nn_levels).to('cuda')

        train_apx(apx, embeddings, apx_epochs, apx_batch_size, checkpoint_every, save_path);
        apx.fix_indices()
        apx_model.bert.embeddings.word_embeddings = apx

        val_metrics = validate(apx_model, tokenizer, val_dataloader, metrics, args)
        results["Norm "+str(norm)]=[m.item() for m in val_metrics]
        print(norm, val_metrics)

    # clone the model to not modify the original
    apx_model = make_model('prajjwal1/bert-tiny',args)
    apx_model.load_state_dict(model.state_dict())

    apxSVD = ApxSVD(apx_model.bert.embeddings.word_embeddings.weight, 5)
    apx_model.bert.embeddings.word_embeddings = apxSVD.to('cuda')

    val_metrics = validate(apx_model, tokenizer, val_dataloader, metrics, args)
    results['SVD']=[m.item() for m in val_metrics]
    print('SVD', val_metrics)

    RESULTS[task]=results

cola


Found cached dataset glue (C:/Users/Sam/.cache/huggingface/datasets/glue/cola/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)
Found cached dataset glue (C:/Users/Sam/.cache/huggingface/datasets/glue/cola/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)
Epoch: 1: 100%|██████████| 268/268 [00:07<00:00, 34.84it/s, loss: 0.204]
Epoch: 2: 100%|██████████| 268/268 [00:07<00:00, 36.47it/s, loss: 0.203]
Epoch: 3: 100%|██████████| 268/268 [00:07<00:00, 36.44it/s, loss: 0.202]


Original [tensor(0)]
1.25 [tensor(0)]
SVD [tensor(0)]


In [30]:
RESULTS

{'cola': {'Original': [0], 'Norm1.25': [0], 'SVD': [0]}}

In [31]:
save_path='results/'
import os
if not os.path.exists(save_path):
    os.makedirs(save_path)

torch.save(RESULTS, save_path+'glue_results_no_retrain.pth')

In [32]:
import torch
import pandas as pd

run_dict=torch.load('results/glue_results_no_retrain.pth')

for k_runs, runs in run_dict.items():
    for k_run, run in runs.items():
        run_dict[k_runs][k_run]=f"{run[0]*100:,.1f}" if len(run)==1 else f"{run[0]*100:,.1f}/{run[1]*100:,.1f}"
df=pd.DataFrame(run_dict)
df

Unnamed: 0,cola
Norm1.25,0.0
Original,0.0
SVD,0.0
