In [1]:
from transformers import AutoTokenizer               # <-- You forgot this import

from evaluation.evaluator import EarlyExitEvaluator
from strategies.confidence_exit import ConfidenceExit
from models.gpt2_wrapper import GPT2WithEarlyExit
from evaluation.dataset_loaders.sst2 import load_sst2

tokenizer = AutoTokenizer.from_pretrained("gpt2")

In [4]:
strategy = ConfidenceExit(threshold=1, allowed_layers=[3,6,9])
model = GPT2WithEarlyExit("gpt2", strategy, tokenizer)

In [5]:
dataset = load_sst2(number=500, task="train")   # Number of data to use = 100 

evaluator = EarlyExitEvaluator(tokenizer)

result = evaluator.evaluate(
    model=model,
    strategy=strategy,
    dataset=dataset,
    task_type="classification",
    dataset_name="sst2",
)

print(result)

Evaluating: 100%|█████████████████████████████| 500/500 [00:19<00:00, 26.05it/s]

{'metric': 'accuracy', 'score': np.float64(0.418), 'avg_latency_sec': np.float64(0.038216031074523926), 'tokens_per_sec': 26.167029172912546, 'avg_layers_used': np.float64(12.0), 'num_samples': 500}





In [None]:
from evaluation.dataset_loaders.sst2 import load_sst2
from evaluation.dataset_loaders.agnews import load_agnews
from evaluation.dataset_loaders.cnn_dm import load_cnndm
from evaluation.dataset_loaders.squad import load_squad
from evaluation.dataset_loaders.wmt_en_fr import load_wmt_enfr

datasets = [
    ("sst2", load_sst2, "classification"),
    ("agnews", load_agnews, "classification"),
    ("cnn_dm", load_cnndm, "summarization"),
    ("wmt14_enfr", load_wmt_enfr, "translation"),
    ("squad", load_squad, "qa"),
]

for name, loader, task in datasets:
    print(f"Testing {name}...")

    dataset = loader(fraction=0.10)

    # ---------- IMPORTANT: pass dataset_name for classification ----------
    if task == "classification":
        result = evaluator.evaluate(
            model=model,
            strategy=strategy,
            dataset=dataset,
            task_type=task,
            dataset_name=name,      # e.g. "sst2" or "agnews"
        )
    else:
        result = evaluator.evaluate(
            model=model,
            strategy=strategy,
            dataset=dataset,
            task_type=task,
        )

    print(name, result)

Testing sst2...


Evaluating: 100%|███████████████████████████████| 87/87 [00:03<00:00, 23.93it/s]


sst2 {'metric': 'accuracy', 'score': np.float64(0.4367816091954023), 'avg_latency_sec': np.float64(0.04162762082856277), 'tokens_per_sec': 24.022511498275456, 'avg_layers_used': np.float64(12.0), 'num_samples': 87}
Testing agnews...


Evaluating: 100%|█████████████████████████████| 760/760 [00:36<00:00, 20.71it/s]


agnews {'metric': 'accuracy', 'score': np.float64(0.2723684210526316), 'avg_latency_sec': np.float64(0.04812421390884801), 'tokens_per_sec': 20.77956020007097, 'avg_layers_used': np.float64(12.0), 'num_samples': 760}
Testing cnn_dm...


Token indices sequence length is longer than the specified maximum sequence length for this model (1032 > 1024). Running this sequence through the model will result in indexing errors
Evaluating:  16%|████▌                       | 186/1149 [00:53<03:43,  4.30it/s]

In [None]:
### Strategy 2 - Confidence threshold should be (meet) in Continous layers

In [None]:
from strategies.continous_confidence_exit import ContinuousConfidenceExit

strategy = ContinuousConfidenceExit(
    threshold=0.75,
    required_consecutive=2,
    allowed_layers=[3, 6, 9, 11]
)

model = GPT2WithEarlyExit("gpt2", strategy, tokenizer)
evaluator = EarlyExitEvaluator(tokenizer)

In [None]:
datasets = [
    ("sst2", load_sst2, "classification"),
    ("agnews", load_agnews, "classification"),
    ("cnn_dm", load_cnndm, "summarization"),
    ("wmt14_enfr", load_wmt_enfr, "translation"),
    ("squad", load_squad, "qa"),
]

for name, loader, task in datasets:
    print(f"\n========== Testing {name.upper()} ==========\n")

    # Use 2% of dataset
    dataset = loader(fraction=0.02)

    result = evaluator.evaluate(
        model=model,
        strategy=strategy,
        dataset=dataset,
        task_type=task,
    )

    print(name, result)