In [1]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset
import time
import pandas as pd
from sklearn.metrics import accuracy_score
import torch

dataset = load_dataset("NicolaiSivesind/human-vs-machine", "research_abstracts_labeled")['test'][0:50]
x_test, y_test = dataset['text'], dataset['label']
tokenizer = AutoTokenizer.from_pretrained("andreas122001/bloomz-3b-wiki-detector")
model = AutoModelForSequenceClassification.from_pretrained("andreas122001/bloomz-3b-wiki-detector")

  from .autonotebook import tqdm as notebook_tqdm
Found cached dataset human-vs-machine (/home/codespace/.cache/huggingface/datasets/NicolaiSivesind___human-vs-machine/research_abstracts_labeled/0.0.0/9e9ff0b78fa974ae55166fbed3b9032d432c39f2e76909d68bdd53cb4ea313de)
100%|██████████| 3/3 [00:00<00:00, 568.92it/s]
Loading checkpoint shards: 100%|██████████| 2/2 [00:06<00:00,  3.14s/it]


### 1. Quantize the model

In [2]:
model = torch.quantization.quantize_dynamic(
    model=model,
    qconfig_spec={torch.nn.Linear},
    dtype=torch.qint8
)

### 2. Evaluate Performance

In [3]:
param_size = 0
for param in model.parameters():
    param_size += param.nelement() * param.element_size()
buffer_size = 0
for buffer in model.buffers():
    buffer_size += buffer.nelement() * buffer.element_size()

base_model_size = (param_size + buffer_size) / 1024**2
print('Model size: {:.3f}MB'.format(base_model_size))

Model size: 2451.211MB


In [4]:
y_pred = []
times = []
sigmoid = torch.nn.Sigmoid()
for text in x_test:
    input = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
    st = time.time()
    output = model(**input).logits
    probs = sigmoid(torch.Tensor(output))[0][0].item()
    if probs> 0.5:
        output = 1
    else:
        output = 0
    y_pred.append(output)
    times.append(time.time() - st)

print(pd.Series(times).describe().T)
print("Accuracy:", accuracy_score(y_test, y_pred))


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


count    50.000000
mean      4.368607
std       2.297457
min       0.890930
25%       2.430465
50%       4.472417
75%       5.759864
max      10.271150
dtype: float64
Accuracy: 0.46
