# DistilBERT Endian Classifier Evaluation

## Prerequisites

In [None]:
!pip install transformers[torch] -U
!pip install accelerate -U
!pip install datasets
!pip install evaluate

Collecting datasets
  Downloading datasets-3.0.0-py3-none-any.whl.metadata (19 kB)
Collecting pyarrow>=15.0.0 (from datasets)
  Downloading pyarrow-17.0.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (3.3 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Downloading datasets-3.0.0-py3-none-any.whl (474 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m474.3/474.3 kB[0m [31m9.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m10.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyarrow-17.0.0-cp310-cp310-manylinux_2_28_x86_64.whl (39.9 MB)
[2K 

## Load model and tokenizer

In [None]:
# Load model andf tokenizer

from transformers import AutoTokenizer, AutoModelForSequenceClassification

model_name = "ryfye181/distilbert_endian_classifier"

model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/732 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.20k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/712k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

## Load Dataset

In [None]:
# Load test dataset

from datasets import load_dataset
dataset = load_dataset("ryfye181/endianness", data_dir="test", split='train')

dataset = dataset.shuffle(seed=42)

## Evaluate

In [None]:
import evaluate
import torch
from tqdm import tqdm

def compute_accuracy(model, tokenizer, dataset, device):
    # Initialize accuracy metric
    accuracy = evaluate.load("accuracy")
    label2id = {"little": 0, "big": 1}

    # Configure model for evaluation
    model.eval()
    model.to(device)

    # Initialize lists to store references and predictions
    all_references = []
    all_predictions = []

    # Convert dataset to a list of dictionaries for faster iteration
    dataset_list = dataset.to_pandas().to_dict('records')

     # Set batch size for processing
    batch_size = 32

    # Create a tqdm progress bar for accuracy
    progress_bar = tqdm(range(0, len(dataset_list), batch_size), desc="Processing")

    # Process the dataset in batches
    for i in progress_bar:
        batch = dataset_list[i:i+batch_size]

        # Tokenize the batch
        inputs = tokenizer([row['data'] for row in batch], return_tensors="pt", padding=True, truncation=True).to(device)

        # Perform inference
        with torch.no_grad():
            logits = model(**inputs).logits
            predicted_class_ids = logits.argmax(dim=-1).tolist()

        # Extend the references and predictions lists
        batch_references = [label2id[row['endianness']] for row in batch]
        all_references.extend(batch_references)
        all_predictions.extend(predicted_class_ids)

        # Calculate and display current accuracy
        current_accuracy = accuracy.compute(references=all_references, predictions=all_predictions)

        # Update the progress bar description with current accuracy
        progress_bar.set_description(f"Processing (Current Accuracy: {current_accuracy['accuracy']:.4f})")

    # Compute and return the final accuracy
    final_accuracy = accuracy.compute(references=all_references, predictions=all_predictions)
    print(f"\nFinal Accuracy: {final_accuracy['accuracy']:.4f}")
    return final_accuracy

# Usage
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
result = compute_accuracy(model, tokenizer, dataset, device)

Using device: cuda


Processing (Current Accuracy: 0.7725):   4%|▍         | 1394/32789 [09:50<3:41:42,  2.36it/s]


KeyboardInterrupt: 