In [None]:
!pip install --quiet stanza transformers datasets tokenizers torch

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.1 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.5/1.1 MB[0m [31m14.6 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m1.1/1.1 MB[0m [31m20.3 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m13.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.2/491.2 kB[0m [31m26.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m56.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m24.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━

In [1]:
!unzip -q lessons.zip -d ./lessons


In [None]:
# !pip install --quiet stanza transformers datasets tokenizers torch

import os, pickle, random
import torch
from torch import nn
from datasets import Dataset
from transformers import (
    GPT2Config, GPT2TokenizerFast,
    Trainer, TrainingArguments, GPT2LMHeadModel
)
from sklearn.model_selection import train_test_split

# STEP 1 – Setup
import stanza
stanza.download("en", verbose=False)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

MAX_COMBOS = 1024
combo2id = {"<UNK>": 0}
_next_id = 1
def dynamic_get_combo_id(pos, dep, ner, feats):
    global _next_id
    key = (pos, dep, ner, feats)
    if key not in combo2id and _next_id < MAX_COMBOS:
        combo2id[key] = _next_id
        _next_id += 1
    return combo2id.get(key, 0)

tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token

import re

# STEP 2 – Load pre-parsed lessons
pkl_folder = "./lessons/lessons"
lessons = {}

def extract_vol_lesson(filename):
    match = re.match(r'v(\d+)_l(\d+)\.pkl', filename)
    return (int(match.group(1)), int(match.group(2))) if match else (999, 999)

all_files = os.listdir(pkl_folder)
pkl_files = sorted(
    [f for f in all_files if re.match(r'v\d+_l\d+\.pkl', f)],
    key=extract_vol_lesson
)

for fname in pkl_files:
    with open(os.path.join(pkl_folder, fname), "rb") as f:
        lessons[fname] = pickle.load(f)
    print(f"✅ Loaded {fname} ({len(lessons[fname])} docs)")


# STEP 3 – Combo extraction and encoding
def scan_doc(doc):
    ents = [(e.start_char, e.end_char, e.type) for e in doc.ents]
    for w in [w for s in doc.sentences for w in s.words]:
        ner = next((t for s0, e0, t in ents if s0 <= w.start_char < e0), "O")
        dynamic_get_combo_id(w.upos, w.deprel, ner, w.feats)

def encode_doc(doc):
    text = doc.text
    enc = tokenizer(text, return_offsets_mapping=True, add_special_tokens=False)
    ents = [(e.start_char, e.end_char, e.type) for e in doc.ents]
    words = [w for s in doc.sentences for w in s.words]

    combo_ids = []
    for start, end in enc["offset_mapping"]:
        if start == end or end <= start:
            combo_ids.append(0)
            continue

        match = None
        for w in words:
            # allow small overlap fuzziness (fixes edge mismatches)
            if not (w.end_char <= start or w.start_char >= end):
                match = w
                break

        if match:
            ner = next((t for s0, e0, t in ents if s0 <= match.start_char < e0), "O")
            combo_ids.append(dynamic_get_combo_id(match.upos, match.deprel, ner, match.feats))
        else:
            combo_ids.append(0)
    num_total, num_missed = 0, 0
    num_total += len(enc["offset_mapping"])
    num_missed += sum(1 for c in combo_ids if c == 0)

    # print(f"💡 Miss rate: {100 * num_missed / num_total:.2f}%")

    return enc["input_ids"], combo_ids

def chunkify_docs(docs, max_len=64):
    """Each doc becomes one padded (or truncated) sample."""
    samples = []
    for doc in docs:
        ids, cmb = encode_doc(doc)
        ids = ids[:max_len]
        cmb = cmb[:max_len]
        pad = max_len - len(ids)
        if pad:
            ids += [tokenizer.pad_token_id] * pad
            cmb += [0] * pad
        samples.append({"input_ids": ids, "combo_ids": cmb})
    return Dataset.from_list(samples)


def collate(batch, max_len=64):
    ids, cmb = [], []
    for b in batch:
        input_ids = b["input_ids"][:max_len]
        combo_ids = b["combo_ids"][:max_len]
        pad = max_len - len(input_ids)
        input_ids += [tokenizer.pad_token_id] * pad
        combo_ids += [0] * pad
        ids.append(input_ids)
        cmb.append(combo_ids)

    ids = torch.tensor(ids)
    cmb = torch.tensor(cmb)
    mask = (ids != tokenizer.pad_token_id).long()

    # Mask out loss for padding
    labels = ids.clone()
    labels[ids == tokenizer.pad_token_id] = -100

    return {
        "input_ids": ids,
        "combo_ids": cmb,
        "attention_mask": mask,
        "labels": labels,
    }

# STEP 4 – Model
class SyntaxGPT(GPT2LMHeadModel):
    def __init__(self, cfg):
        super().__init__(cfg)
        H = cfg.n_embd
        self.combo_embed = nn.Embedding(MAX_COMBOS, H)
        self.merge = nn.Linear(2 * H, H, bias=False)
        self.stored_combo_ids = None

    def prepare_inputs_for_generation(self, input_ids, past_key_values=None, attention_mask=None, **kwargs):
        if past_key_values is None:
            combo_ids = self.stored_combo_ids.to(input_ids.device)
        else:
            combo_ids = input_ids.new_zeros(input_ids.shape)
            attention_mask = None
        return {
            "input_ids": input_ids,
            "attention_mask": attention_mask,
            "past_key_values": past_key_values,
            "combo_ids": combo_ids,
        }

    def forward(self, input_ids=None, combo_ids=None, attention_mask=None,
                labels=None, inputs_embeds=None, past_key_values=None, **kwargs):
        if inputs_embeds is None:
            if combo_ids is None or combo_ids.size(1) != input_ids.size(1):
                raise ValueError("combo_ids must align with input_ids")
            tok = self.transformer.wte(input_ids)
            cmb = self.combo_embed(combo_ids.to(tok.device))
            inputs_embeds = self.merge(torch.cat([tok, cmb], dim=-1))
        return super().forward(
            inputs_embeds=inputs_embeds,
            attention_mask=attention_mask,
            labels=labels,
            past_key_values=past_key_values,
            **kwargs,
        )

cfg = GPT2Config.from_pretrained("gpt2", vocab_size=tokenizer.vocab_size)
model = SyntaxGPT(cfg).to(device)
model.config.pad_token_id = tokenizer.pad_token_id

from collections import defaultdict
from sklearn.model_selection import train_test_split

combined_val_docs = []
base_lr = 1e-3
n_epochs = 1
TOTAL = len(lessons)

for idx, (lesson_name, docs) in enumerate(lessons.items(), 1):
    train_docs, val_docs = train_test_split(docs, test_size=0.1, random_state=42)
    combined_val_docs += val_docs

    print(f"\n▶ Lesson {idx}/{TOTAL}: {lesson_name} | "
          f"{len(train_docs)} train | {len(val_docs)} val | epochs={n_epochs} | lr={base_lr:g}")

    train_set = chunkify_docs(train_docs, max_len=64)
    val_set   = chunkify_docs(val_docs, max_len=64)

    args = TrainingArguments(
        output_dir=f"ckpt_{lesson_name}",
        num_train_epochs=n_epochs,
        learning_rate=base_lr,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=8,
        eval_strategy="epoch",
        logging_strategy="epoch",
        logging_steps=n_epochs // 5,
        save_strategy="no",
        report_to="none",
        fp16=torch.cuda.is_available(),
        remove_unused_columns=False
    )

    trainer = Trainer(
        model=model,
        args=args,
        train_dataset=train_set,
        eval_dataset=val_set,
        data_collator=collate,
    )
    trainer.train()

# STEP 6 – Final evaluation on combined validation set
final_val = chunkify_docs(combined_val_docs, max_len=64)
metrics = trainer.evaluate(eval_dataset=final_val)

print("\n📊 Combined Val Performance:")
for k, v in metrics.items():
    print(f"{k:20s}: {v:.4f}")


✅ Loaded v3_l1.pkl (61 docs)
✅ Loaded v3_l10.pkl (59 docs)
✅ Loaded v3_l11.pkl (62 docs)
✅ Loaded v3_l12.pkl (71 docs)
✅ Loaded v3_l13.pkl (62 docs)
✅ Loaded v3_l14.pkl (60 docs)
✅ Loaded v3_l15.pkl (61 docs)
✅ Loaded v3_l16.pkl (65 docs)
✅ Loaded v3_l17.pkl (59 docs)
✅ Loaded v3_l18.pkl (50 docs)
✅ Loaded v3_l19.pkl (52 docs)
✅ Loaded v3_l2.pkl (62 docs)
✅ Loaded v3_l20.pkl (66 docs)
✅ Loaded v3_l21.pkl (73 docs)
✅ Loaded v3_l22.pkl (60 docs)
✅ Loaded v3_l23.pkl (64 docs)
✅ Loaded v3_l24.pkl (57 docs)
✅ Loaded v3_l25.pkl (67 docs)
✅ Loaded v3_l26.pkl (49 docs)
✅ Loaded v3_l27.pkl (63 docs)
✅ Loaded v3_l28.pkl (64 docs)
✅ Loaded v3_l29.pkl (60 docs)
✅ Loaded v3_l3.pkl (48 docs)
✅ Loaded v3_l30.pkl (66 docs)
✅ Loaded v3_l31.pkl (64 docs)
✅ Loaded v3_l32.pkl (62 docs)
✅ Loaded v3_l33.pkl (70 docs)
✅ Loaded v3_l34.pkl (60 docs)
✅ Loaded v3_l35.pkl (59 docs)
✅ Loaded v3_l36.pkl (53 docs)
✅ Loaded v3_l37.pkl (74 docs)
✅ Loaded v3_l38.pkl (42 docs)
✅ Loaded v3_l39.pkl (69 docs)
✅ Loaded v3_l

Epoch,Training Loss,Validation Loss
1,8.264,7.371078



▶ Lesson 2/60: v3_l10.pkl | 53 train | 6 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,7.5974,7.361845



▶ Lesson 3/60: v3_l11.pkl | 55 train | 7 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,7.2632,7.018043



▶ Lesson 4/60: v3_l12.pkl | 63 train | 8 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,7.3166,6.932099



▶ Lesson 5/60: v3_l13.pkl | 55 train | 7 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,6.4617,6.60278



▶ Lesson 6/60: v3_l14.pkl | 54 train | 6 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,7.4573,6.906945



▶ Lesson 7/60: v3_l15.pkl | 54 train | 7 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,6.5877,6.211781



▶ Lesson 8/60: v3_l16.pkl | 58 train | 7 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,6.3565,6.138539



▶ Lesson 9/60: v3_l17.pkl | 53 train | 6 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,7.1046,5.929887



▶ Lesson 10/60: v3_l18.pkl | 45 train | 5 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,7.1248,7.651666



▶ Lesson 11/60: v3_l19.pkl | 46 train | 6 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,6.7697,6.971068



▶ Lesson 12/60: v3_l2.pkl | 55 train | 7 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,6.6317,6.717757



▶ Lesson 13/60: v3_l20.pkl | 59 train | 7 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,7.0754,6.811217



▶ Lesson 14/60: v3_l21.pkl | 65 train | 8 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,6.7259,7.075511



▶ Lesson 15/60: v3_l22.pkl | 54 train | 6 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,6.9419,6.712872



▶ Lesson 16/60: v3_l23.pkl | 57 train | 7 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,6.6272,7.086113



▶ Lesson 17/60: v3_l24.pkl | 51 train | 6 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,6.6985,7.061755



▶ Lesson 18/60: v3_l25.pkl | 60 train | 7 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,6.8835,7.239116



▶ Lesson 19/60: v3_l26.pkl | 44 train | 5 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,7.1153,6.5106



▶ Lesson 20/60: v3_l27.pkl | 56 train | 7 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,6.6857,6.833851



▶ Lesson 21/60: v3_l28.pkl | 57 train | 7 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,6.2784,6.750649



▶ Lesson 22/60: v3_l29.pkl | 54 train | 6 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,6.6541,6.731446



▶ Lesson 23/60: v3_l3.pkl | 43 train | 5 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,7.0967,7.225299



▶ Lesson 24/60: v3_l30.pkl | 59 train | 7 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,6.4713,6.524619



▶ Lesson 25/60: v3_l31.pkl | 57 train | 7 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,6.7644,7.00015



▶ Lesson 26/60: v3_l32.pkl | 55 train | 7 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,6.479,6.737301



▶ Lesson 27/60: v3_l33.pkl | 63 train | 7 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,6.78,7.059406



▶ Lesson 28/60: v3_l34.pkl | 54 train | 6 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,6.6478,6.64082



▶ Lesson 29/60: v3_l35.pkl | 53 train | 6 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,6.8005,7.20274



▶ Lesson 30/60: v3_l36.pkl | 47 train | 6 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,6.6238,6.599499



▶ Lesson 31/60: v3_l37.pkl | 66 train | 8 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,6.5151,6.597776



▶ Lesson 32/60: v3_l38.pkl | 37 train | 5 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,6.7503,7.157273



▶ Lesson 33/60: v3_l39.pkl | 62 train | 7 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,6.5616,6.780403



▶ Lesson 34/60: v3_l4.pkl | 53 train | 6 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,6.9112,6.920814



▶ Lesson 35/60: v3_l40.pkl | 53 train | 6 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,6.5564,6.961342



▶ Lesson 36/60: v3_l41.pkl | 49 train | 6 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,6.6347,6.605858



▶ Lesson 37/60: v3_l42.pkl | 50 train | 6 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,6.8215,7.422834



▶ Lesson 38/60: v3_l43.pkl | 54 train | 6 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,6.4037,6.75625



▶ Lesson 39/60: v3_l44.pkl | 59 train | 7 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,7.0331,7.337955



▶ Lesson 40/60: v3_l45.pkl | 52 train | 6 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,6.941,7.342658



▶ Lesson 41/60: v3_l46.pkl | 63 train | 7 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,6.8684,7.198781



▶ Lesson 42/60: v3_l47.pkl | 49 train | 6 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,7.3729,7.685129



▶ Lesson 43/60: v3_l48.pkl | 61 train | 7 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,6.6269,6.573182



▶ Lesson 44/60: v3_l49.pkl | 45 train | 5 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,7.3979,7.295111



▶ Lesson 45/60: v3_l5.pkl | 50 train | 6 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,6.6635,6.665859



▶ Lesson 46/60: v3_l50.pkl | 52 train | 6 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,6.9232,7.129655



▶ Lesson 47/60: v3_l51.pkl | 54 train | 7 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,7.0034,7.802368



▶ Lesson 48/60: v3_l52.pkl | 58 train | 7 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,6.9092,6.645852



▶ Lesson 49/60: v3_l53.pkl | 50 train | 6 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,6.8028,6.845119



▶ Lesson 50/60: v3_l54.pkl | 55 train | 7 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,6.9643,7.342903



▶ Lesson 51/60: v3_l55.pkl | 59 train | 7 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,6.921,7.173839



▶ Lesson 52/60: v3_l56.pkl | 51 train | 6 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,6.6484,7.233677



▶ Lesson 53/60: v3_l57.pkl | 54 train | 7 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,6.3344,6.664428



▶ Lesson 54/60: v3_l58.pkl | 44 train | 5 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,6.7667,6.982005



▶ Lesson 55/60: v3_l59.pkl | 48 train | 6 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,6.6225,7.434433



▶ Lesson 56/60: v3_l6.pkl | 46 train | 6 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,6.8507,6.666709



▶ Lesson 57/60: v3_l60.pkl | 58 train | 7 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,6.6253,6.477075



▶ Lesson 58/60: v3_l7.pkl | 57 train | 7 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,6.4849,6.8979



▶ Lesson 59/60: v3_l8.pkl | 56 train | 7 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,6.7043,7.352586



▶ Lesson 60/60: v3_l9.pkl | 60 train | 7 val | epochs=1 | lr=0.001


Epoch,Training Loss,Validation Loss
1,6.8792,7.179168



📊 Combined Val Performance:
eval_loss           : 7.3898
eval_runtime        : 1.4099
eval_samples_per_second: 275.1960
eval_steps_per_second: 34.7540
epoch               : 1.0000


In [None]:
# Assuming `model` and `tokenizer` are already loaded/fine-tuned
save_dir = "my_syntax_gpt_model"

model.save_pretrained(save_dir)
tokenizer.save_pretrained(save_dir)


('my_syntax_gpt_model/tokenizer_config.json',
 'my_syntax_gpt_model/special_tokens_map.json',
 'my_syntax_gpt_model/vocab.json',
 'my_syntax_gpt_model/merges.txt',
 'my_syntax_gpt_model/added_tokens.json',
 'my_syntax_gpt_model/tokenizer.json')

In [None]:
save_dir = "my_syntax_gpt_model"

model = SyntaxGPT.from_pretrained(save_dir).to(device)
tokenizer = GPT2TokenizerFast.from_pretrained(save_dir)


In [None]:
# ─────────────────────────────────────────────────────────────
# imports & setup
# ─────────────────────────────────────────────────────────────
import torch
from torch.utils.data import Dataset
from transformers import Trainer, TrainingArguments
from datasets import load_dataset
import pickle
from tqdm import tqdm

# assume tokenizer & model already loaded and moved to device:
#   tokenizer.pad_token = tokenizer.eos_token
#   model.to(device)
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

# ─────────────────────────────────────────────────────────────
# load TREC and Stanza‑parsed docs
# ─────────────────────────────────────────────────────────────
trec = load_dataset("trec")
label_map = trec["train"].features["coarse_label"].names

with open("trec_train_docs.pkl", "rb") as f:
    parsed_train_docs = pickle.load(f)
with open("trec_test_docs.pkl", "rb") as f:
    parsed_test_docs = pickle.load(f)

assert len(parsed_train_docs) == len(trec["train"])
assert len(parsed_test_docs)  == len(trec["test"])

# ─────────────────────────────────────────────────────────────
# encode_doc_with_label: builds input_ids, combo_ids, labels
# ─────────────────────────────────────────────────────────────
def encode_doc_with_label(doc, label_text, max_len=64):
    # prompt tokens
    prompt_text = doc.text.strip()
    prompt_ids  = tokenizer(prompt_text, add_special_tokens=False)["input_ids"]
    # label tokens
    label_ids   = tokenizer.encode(label_text.strip(), add_special_tokens=False)

    # full input = prompt + label
    full_input_ids = prompt_ids + label_ids
    # mask prompt for loss, keep label tokens
    labels = [-100]*len(prompt_ids) + label_ids

    # get combo_ids for prompt only, pad to full length
    _, combo_ids = encode_doc(doc)
    combo_ids = combo_ids[:len(full_input_ids)]
    combo_ids += [0]*(len(full_input_ids) - len(combo_ids))

    # truncate/pad to max_len
    full_input_ids = full_input_ids[:max_len]
    labels         = labels[:max_len]
    combo_ids      = combo_ids[:max_len]

    pad = max_len - len(full_input_ids)
    full_input_ids += [tokenizer.pad_token_id]*pad
    labels         += [-100]*pad
    combo_ids      += [0]*pad

    return {
        "input_ids": full_input_ids,
        "combo_ids": combo_ids,
        "labels":    labels,
    }

# ─────────────────────────────────────────────────────────────
# dataset wrapper
# ─────────────────────────────────────────────────────────────
class TrecSyntaxDataset(Dataset):
    def __init__(self, parsed_docs, label_ids, max_len=64):
        self.samples = []
        for doc, label_id in tqdm(zip(parsed_docs, label_ids), total=len(parsed_docs)):
            sample = encode_doc_with_label(doc, label_map[label_id], max_len)
            self.samples.append(sample)

    def __getitem__(self, idx):
        return self.samples[idx]

    def __len__(self):
        return len(self.samples)

# ─────────────────────────────────────────────────────────────
# build datasets & collator
# ─────────────────────────────────────────────────────────────
train_set = TrecSyntaxDataset(parsed_train_docs, trec["train"]["coarse_label"])
val_set   = TrecSyntaxDataset(parsed_test_docs,  trec["test"]["coarse_label"])

def collate_trec(batch):
    ids   = torch.tensor([b["input_ids"] for b in batch])
    cmb   = torch.tensor([b["combo_ids"] for b in batch])
    labs  = torch.tensor([b["labels"]    for b in batch])
    mask  = (ids != tokenizer.pad_token_id).long()
    return {
        "input_ids":      ids,
        "combo_ids":      cmb,
        "attention_mask": mask,
        "labels":         labs,
    }

# ─────────────────────────────────────────────────────────────
# compute_metrics for Trainer
# ─────────────────────────────────────────────────────────────
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

# def compute_metrics(eval_pred):
#     logits, labels = eval_pred
#     preds = logits.argmax(-1)
#     acc   = accuracy_score(labels, preds)
#     p, r, f1, _ = precision_recall_fscore_support(labels, preds, average="macro")
#     return {"accuracy": acc, "precision": p, "recall": r, "f1": f1}

# ─────────────────────────────────────────────────────────────
# Trainer setup & train
# ─────────────────────────────────────────────────────────────
args = TrainingArguments(
    output_dir="syntaxgpt_trec_ckpt",
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    learning_rate=5e-4,
    eval_strategy="epoch",
    logging_strategy="epoch",
    save_strategy="no",
    report_to="none",
    fp16=torch.cuda.is_available(),
    remove_unused_columns=False,
)

trainer = Trainer(
    model=model,
    args=args,
    train_dataset=train_set,
    eval_dataset=val_set,
    data_collator=collate_trec,
    # compute_metrics=compute_metrics,
)

trainer.train()

# ─────────────────────────────────────────────────────────────
# fixed inference: never feed the true label into generate()
# ─────────────────────────────────────────────────────────────
def predict_label(sample):
    # find where labels start (first non -100)
    prompt_len = next(i for i, x in enumerate(sample["labels"]) if x != -100)

    # slice off just the prompt
    input_ids = torch.tensor([ sample["input_ids"][:prompt_len] ]).to(device)
    combo_ids = torch.tensor([ sample["combo_ids"][:prompt_len] ]).to(device)
    model.stored_combo_ids = combo_ids

    # generate exactly one new token
    print(f"prompt: {tokenizer.decode(input_ids[0], skip_special_tokens=True).strip()}")
    with torch.no_grad():
        out = model.generate(
            input_ids=input_ids,
            attention_mask=(input_ids != tokenizer.pad_token_id).long(),
            max_new_tokens=4
        )[0].tolist()

    # decode only the newly produced token(s)
    new_ids = out[prompt_len:]
    return tokenizer.decode(new_ids, skip_special_tokens=True).strip().upper()

# run through the test set
model.eval()


100%|██████████| 5452/5452 [00:03<00:00, 1752.28it/s]
100%|██████████| 500/500 [00:00<00:00, 1931.13it/s]


Epoch,Training Loss,Validation Loss
1,1.556,1.223099
2,1.2513,1.07373
3,1.0063,0.873699


SyntaxGPT(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D(nf=2304, nx=768)
          (c_proj): Conv1D(nf=768, nx=768)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=3072, nx=768)
          (c_proj): Conv1D(nf=768, nx=3072)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
  (combo_embed): Embedding(1024, 768)
  (merge): Linear(in_fe

In [None]:
from sklearn.metrics import precision_recall_fscore_support
import torch.nn.functional as F

correct = 0
total = len(val_set)
all_preds = []
all_golds = []
losses = []

label_name_to_id = {name.upper(): i for i, name in enumerate(label_map)}

for sample, gold_idx in zip(val_set, trec["test"]["coarse_label"]):
    pred = predict_label(sample)
    true = label_map[gold_idx].upper()

    print(f"prompt: {tokenizer.decode(sample['input_ids'], skip_special_tokens=True).strip()}")
    print(f" predict: {pred}\n")

    if pred.startswith(true):  # ✅ soft match
        correct += 1
        pred = true  # overwrite pred to ensure consistency in metrics

    all_preds.append(pred)
    all_golds.append(true)

    # Optional: token-level loss
    try:
        input_ids = torch.tensor([sample["input_ids"]]).to(device)
        labels = torch.tensor([sample["labels"]]).to(device)
        attn_mask = (input_ids != tokenizer.pad_token_id).long()
        model.stored_combo_ids = torch.tensor([sample["combo_ids"]]).to(device)

        with torch.no_grad():
            output = model(input_ids=input_ids, attention_mask=attn_mask, labels=labels)
            losses.append(output.loss.item())
    except Exception:
        pass

# 🎯 Accuracy
acc = correct / total

# 🎯 Classification Metrics
gold_ids = [label_name_to_id.get(label, -1) for label in all_golds]
pred_ids = [label_name_to_id.get(p, -1) for p in all_preds]

filtered_gold_ids = []
filtered_pred_ids = []
for g, p in zip(gold_ids, pred_ids):
    if g != -1 and p != -1:
        filtered_gold_ids.append(g)
        filtered_pred_ids.append(p)

p, r, f1, _ = precision_recall_fscore_support(filtered_gold_ids, filtered_pred_ids, average="macro")

# 📢 Final Results
print(f"\n🔍 Evaluation Results (max_new_tokens=4 + startswith logic):")
print(f"Accuracy:  {acc:.4f}")
print(f"Avg Loss:  {sum(losses)/len(losses):.4f}" if losses else "Loss: (not computed)")
print(f"Recall:    {r:.4f}")
print(f"F1 Score:  {f1:.4f}")


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: How far is it from Denver to Aspen ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: How far is it from Denver to Aspen ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: NUMCCC

prompt: Here is a question: What county is Modesto , California in ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What county is Modesto , California in ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: DESCCC

prompt: Here is a question: Who was Galileo ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: Who was Galileo ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: HUMUMUM

prompt: Here is a question: What is an atom ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is an atom ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: When did Hawaii become a state ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: When did Hawaii become a state ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: NUMCCC

prompt: Here is a question: How tall is the Sears Building ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: How tall is the Sears Building ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: NUMCCC

prompt: Here is a question: George Bush purchased a small interest in which baseball team ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: George Bush purchased a small interest in which baseball team ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: ENTYYY

prompt: Here is a question: What is Australia 's national flower ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is Australia 's national flower ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTUMUMUM

prompt: Here is a question: Why does the moon turn orange ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: Why does the moon turn orange ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What is autism ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is autism ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What city had a world fair in 1900 ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What city had a world fair in 1900 ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: HUMUMUM

prompt: Here is a question: What person 's head is on a dime ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What person 's head is on a dime ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: HYYY

prompt: Here is a question: What is the average weight of a Yellow Labrador ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the average weight of a Yellow Labrador ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: ENTYYY

prompt: Here is a question: Who was the first man to fly across the Pacific Ocean ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: Who was the first man to fly across the Pacific Ocean ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: HUMUMUM

prompt: Here is a question: When did Idaho become a state ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: When did Idaho become a state ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: NUMCCC

prompt: Here is a question: What is the life expectancy for crickets ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the life expectancy for crickets ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: ENTCCC

prompt: Here is a question: What metal has the highest melting point ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What metal has the highest melting point ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTYYY

prompt: Here is a question: Who developed the vaccination against polio ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: Who developed the vaccination against polio ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: HUMUMUM

prompt: Here is a question: What is epilepsy ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is epilepsy ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What year did the Titanic sink ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What year did the Titanic sink ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: ENTYYY

prompt: Here is a question: Who was the first American to walk in space ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: Who was the first American to walk in space ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: HUMUMUM

prompt: Here is a question: What is a biosphere ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is a biosphere ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What river in the US is known as the Big Muddy ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What river in the US is known as the Big Muddy ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: ENTYYY

prompt: Here is a question: What is bipolar disorder ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is bipolar disorder ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: ENTYYY

prompt: Here is a question: What is cholesterol ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is cholesterol ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: Who developed the Macintosh computer ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: Who developed the Macintosh computer ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: HUMUMUM

prompt: Here is a question: What is caffeine ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is caffeine ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What imaginary line is halfway between the North and South Poles ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What imaginary line is halfway between the North and South Poles ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: HUMUMUM



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: Where is John Wayne airport ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: Where is John Wayne airport ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: ENTYYY

prompt: Here is a question: What hemisphere is the Philippines in ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What hemisphere is the Philippines in ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: DESCCC

prompt: Here is a question: What is the average speed of the horses at the Kentucky Derby ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is the average speed of the horses at the Kentucky Derby ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: ENTYYY

prompt: Here is a question: Where are the Rocky Mountains ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: Where are the Rocky Mountains ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: DESCCC

prompt: Here is a question: What are invertebrates ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What are invertebrates ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What is the temperature at the center of the earth ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is the temperature at the center of the earth ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: ENTCCC

prompt: Here is a question: When did John F. Kennedy get elected as President ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: When did John F. Kennedy get elected as President ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: NUMCCC

prompt: Here is a question: How old was Elvis Presley when he died ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: How old was Elvis Presley when he died ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: NUMCCC

prompt: Here is a question: Where is the Orinoco River ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: Where is the Orinoco River ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: DESCCC

prompt: Here is a question: How far is the service line from the net in tennis ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: How far is the service line from the net in tennis ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: NUMCCC

prompt: Here is a question: How much fiber should you have per day ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: How much fiber should you have per day ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: NUMCCC

prompt: Here is a question: How many Great Lakes are there ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: How many Great Lakes are there ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: NUMCCC

prompt: Here is a question: Material called linen is made from what plant ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: Material called linen is made from what plant ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTYYY

prompt: Here is a question: What is Teflon ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is Teflon ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What is amitriptyline ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is amitriptyline ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What is a shaman ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is a shaman ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What is the proper name for a female walrus ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is the proper name for a female walrus ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTYYY

prompt: Here is a question: What is a group of turkeys called ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is a group of turkeys called ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTYYY

prompt: Here is a question: How long did Rip Van Winkle sleep ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: How long did Rip Van Winkle sleep ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: NUMCCC

prompt: Here is a question: What are triglycerides ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What are triglycerides ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: How many liters in a gallon ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: How many liters in a gallon ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: DESCCC

prompt: Here is a question: What is the name of the chocolate company in San Francisco ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the name of the chocolate company in San Francisco ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: ENTCCC



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What are amphibians ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What are amphibians ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: Who discovered x-rays ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: Who discovered x-rays ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: HUMUMUM

prompt: Here is a question: Which comedian 's signature line is `` Can we talk '' ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: Which comedian 's signature line is `` Can we talk '' ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: HYYY

prompt: Here is a question: What is fibromyalgia ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is fibromyalgia ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What is done with worn or outdated flags ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is done with worn or outdated flags ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: ENTYYY

prompt: Here is a question: What does cc in engines mean ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What does cc in engines mean ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: When did Elvis Presley die ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: When did Elvis Presley die ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: NUMCCC

prompt: Here is a question: What is the capital of Yugoslavia ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the capital of Yugoslavia ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: DESCCC

prompt: Here is a question: Where is Milan ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: Where is Milan ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: DESCCC

prompt: Here is a question: What is the speed hummingbirds fly ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the speed hummingbirds fly ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: DESCCC

prompt: Here is a question: What is the oldest city in the United States ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is the oldest city in the United States ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: DESCCC

prompt: Here is a question: What was W.C. Fields ' real name ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What was W.C. Fields ' real name ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: HUMUMUM

prompt: Here is a question: What river flows between Fargo , North Dakota and Moorhead , Minnesota ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What river flows between Fargo , North Dakota and Moorhead , Minnesota ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: DESCCC

prompt: Here is a question: What do bats eat ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What do bats eat ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: DESCCC

prompt: Here is a question: What state did the Battle of Bighorn take place in ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What state did the Battle of Bighorn take place in ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: ENTYYY

prompt: Here is a question: Who was Abraham Lincoln ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: Who was Abraham Lincoln ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: HUMUMUM

prompt: Here is a question: What do you call a newborn kangaroo ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What do you call a newborn kangaroo ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTYYY

prompt: Here is a question: What are spider veins ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What are spider veins ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What day and month did John Lennon die ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What day and month did John Lennon die ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: ENTYYY

prompt: Here is a question: What strait separates North America from Asia ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What strait separates North America from Asia ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: DESCCC

prompt: Here is a question: What is the population of Seattle ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the population of Seattle ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: DESCCC

prompt: Here is a question: How much was a ticket for the Titanic ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: How much was a ticket for the Titanic ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: NUMCCC

prompt: Here is a question: What is the largest city in the world ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the largest city in the world ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: DESCCC

prompt: Here is a question: What American composer wrote the music for `` West Side Story '' ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What American composer wrote the music for `` West Side Story '' ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: HUMUMUM



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: Where is the Mall of the America ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: Where is the Mall of the America ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: DESCCC

prompt: Here is a question: What is the pH scale ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the pH scale ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What type of currency is used in Australia ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What type of currency is used in Australia ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTYYY

prompt: Here is a question: How tall is the Gateway Arch in St. Louis , MO ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: How tall is the Gateway Arch in St. Louis , MO ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: NUMCCC



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: How much does the human adult female brain weigh ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: How much does the human adult female brain weigh ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: NUMCCC

prompt: Here is a question: Who was the first governor of Alaska ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: Who was the first governor of Alaska ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: HUMUMUM

prompt: Here is a question: What is a prism ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is a prism ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: When was the first liver transplant ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: When was the first liver transplant ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: HUMUMUM

prompt: Here is a question: Who was elected president of South Africa in 1994 ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: Who was elected president of South Africa in 1994 ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: HUMUMUM



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is the population of China ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the population of China ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: DESCCC

prompt: Here is a question: When was Rosa Parks born ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: When was Rosa Parks born ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: NUMCCC

prompt: Here is a question: Why is a ladybug helpful ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: Why is a ladybug helpful ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What is amoxicillin ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is amoxicillin ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: Who was the first female United States Representative ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: Who was the first female United States Representative ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: HUMUMUM

prompt: Here is a question: What are xerophytes ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What are xerophytes ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What country did Ponce de Leon come from ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What country did Ponce de Leon come from ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: ENTYYY

prompt: Here is a question: The U.S. Department of Treasury first issued paper currency for the U.S. during which war ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: The U.S. Department of Treasury first issued paper currency for the U.S. during which war ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTYYY

prompt: Here is a question: What is desktop publishing ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is desktop publishing ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What is the temperature of the sun 's surface ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the temperature of the sun 's surface ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: HYYY

prompt: Here is a question: What year did Canada join the United Nations ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What year did Canada join the United Nations ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: ENTYYY

prompt: Here is a question: What is the oldest university in the US ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the oldest university in the US ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: DESCCC

prompt: Here is a question: Where is Prince Edward Island ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: Where is Prince Edward Island ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: HUMUMUM

prompt: Here is a question: Mercury , what year was it discovered ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: Mercury , what year was it discovered ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: HUMUMUM

prompt: Here is a question: What is cryogenics ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is cryogenics ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What are coral reefs ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What are coral reefs ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: ENTYYY

prompt: Here is a question: What is the longest major league baseball-winning streak ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is the longest major league baseball-winning streak ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTYYY

prompt: Here is a question: What is neurology ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is neurology ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: Who invented the calculator ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: Who invented the calculator ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: HUMUMUM

prompt: Here is a question: How do you measure earthquakes ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: How do you measure earthquakes ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: NUMCCC

prompt: Here is a question: Who is Duke Ellington ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: Who is Duke Ellington ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: HUMUMUM

prompt: Here is a question: What county is Phoenix , AZ in ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What county is Phoenix , AZ in ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: DESCCC

prompt: Here is a question: What is a micron ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is a micron ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: The sun 's core , what is the temperature ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: The sun 's core , what is the temperature ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: ENTYYY

prompt: Here is a question: What is the Ohio state bird ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the Ohio state bird ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: DESCCC

prompt: Here is a question: When were William Shakespeare 's twins born ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: When were William Shakespeare 's twins born ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: NUMCCC

prompt: Here is a question: What is the highest dam in the U.S. ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is the highest dam in the U.S. ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: DESCCC

prompt: Here is a question: What color is a poison arrow frog ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What color is a poison arrow frog ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: DESCCC

prompt: Here is a question: What is acupuncture ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is acupuncture ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What is the length of the coastline of the state of Alaska ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the length of the coastline of the state of Alaska ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: DESCCC

prompt: Here is a question: What is the name of Neil Armstrong 's wife ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is the name of Neil Armstrong 's wife ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: ENTUMUMUM

prompt: Here is a question: What is Hawaii 's state flower ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is Hawaii 's state flower ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: HYYY

prompt: Here is a question: Who won Ms. American in 1989 ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: Who won Ms. American in 1989 ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: HUMUMUM

prompt: Here is a question: When did the Hindenberg crash ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: When did the Hindenberg crash ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: NUMCCC

prompt: Here is a question: What mineral helps prevent osteoporosis ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What mineral helps prevent osteoporosis ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTYYY



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What was the last year that the Chicago Cubs won the World Series ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What was the last year that the Chicago Cubs won the World Series ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: HUMUMUM

prompt: Here is a question: Where is Perth ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: Where is Perth ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: DESCCC

prompt: Here is a question: What year did WWII begin ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What year did WWII begin ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: ENTYYY

prompt: Here is a question: What is the diameter of a golf ball ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the diameter of a golf ball ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: DESCCC

prompt: Here is a question: What is an eclipse ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is an eclipse ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: Who discovered America ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: Who discovered America ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: HUMUMUM

prompt: Here is a question: What is the earth 's diameter ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the earth 's diameter ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: HYYY

prompt: Here is a question: Which president was unmarried ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: Which president was unmarried ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: HUMUMUM



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: How wide is the Milky Way galaxy ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: How wide is the Milky Way galaxy ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: NUMCCC

prompt: Here is a question: During which season do most thunderstorms occur ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: During which season do most thunderstorms occur ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: ENTYYY

prompt: Here is a question: What is Wimbledon ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is Wimbledon ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What is the gestation period for a cat ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the gestation period for a cat ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: DESCCC

prompt: Here is a question: How far is a nautical mile ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: How far is a nautical mile ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: NUMCCC

prompt: Here is a question: Who was the abolitionist who led the raid on Harper 's Ferry in 1859 ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: Who was the abolitionist who led the raid on Harper 's Ferry in 1859 ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: HUMUMUM

prompt: Here is a question: What does target heart rate mean ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What does target heart rate mean ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What was the first satellite to go into space ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What was the first satellite to go into space ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: HUMUMUM

prompt: Here is a question: What is foreclosure ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is foreclosure ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What is the major fault line near Kentucky ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the major fault line near Kentucky ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTYYY

prompt: Here is a question: Where is the Holland Tunnel ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: Where is the Holland Tunnel ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: DESCCC

prompt: Here is a question: Who wrote the hymn `` Amazing Grace '' ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: Who wrote the hymn `` Amazing Grace '' ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: HUMUMUM

prompt: Here is a question: What position did Willie Davis play in baseball ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What position did Willie Davis play in baseball ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: ENTYYY

prompt: Here is a question: What are platelets ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What are platelets ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What is severance pay ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is severance pay ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is the name of Roy Roger 's dog ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the name of Roy Roger 's dog ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTUMUMUM

prompt: Here is a question: Where are the National Archives ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: Where are the National Archives ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: DESCCC

prompt: Here is a question: What is a baby turkey called ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is a baby turkey called ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTYYY

prompt: Here is a question: What is poliomyelitis ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is poliomyelitis ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What is the longest bone in the human body ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is the longest bone in the human body ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTYYY

prompt: Here is a question: Who is a German philosopher ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: Who is a German philosopher ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: DESCCC

prompt: Here is a question: What were Christopher Columbus ' three ships ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What were Christopher Columbus ' three ships ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: HUMUMUM

prompt: Here is a question: What does Phi Beta Kappa mean ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What does Phi Beta Kappa mean ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What is nicotine ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is nicotine ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What is another name for vitamin B1 ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is another name for vitamin B1 ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTCCC

prompt: Here is a question: Who discovered radium ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: Who discovered radium ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: HUMUMUM

prompt: Here is a question: What are sunspots ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What are sunspots ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: When was Algeria colonized ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: When was Algeria colonized ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: NUMCCC

prompt: Here is a question: What baseball team was the first to make numbers part of their uniform ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What baseball team was the first to make numbers part of their uniform ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: HUMUMUM

prompt: Here is a question: What continent is Egypt on ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What continent is Egypt on ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: DESCCC

prompt: Here is a question: What is the capital of Mongolia ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the capital of Mongolia ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: DESCCC



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is nanotechnology ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is nanotechnology ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: In the late 1700 's British convicts were used to populate which colony ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: In the late 1700 's British convicts were used to populate which colony ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: HUMUMUM

prompt: Here is a question: What state is the geographic center of the lower 48 states ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What state is the geographic center of the lower 48 states ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: ENTYYY

prompt: Here is a question: What is an obtuse angle ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is an obtuse angle ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: ENTYYY

prompt: Here is a question: What are polymers ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What are polymers ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: When is hurricane season in the Caribbean ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: When is hurricane season in the Caribbean ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: NUMCCC

prompt: Here is a question: Where is the volcano Mauna Loa ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: Where is the volcano Mauna Loa ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: DESCCC



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is another astronomic term for the Northern Lights ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is another astronomic term for the Northern Lights ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTYYY

prompt: Here is a question: What peninsula is Spain part of ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What peninsula is Spain part of ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: DESCCC

prompt: Here is a question: When was Lyndon B. Johnson born ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: When was Lyndon B. Johnson born ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: NUMCCC

prompt: Here is a question: What is acetaminophen ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is acetaminophen ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What state has the least amount of rain per year ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What state has the least amount of rain per year ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: ENTYYY

prompt: Here is a question: Who founded American Red Cross ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: Who founded American Red Cross ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: HUMUMUM

prompt: Here is a question: What year did the Milwaukee Braves become the Atlanta Braves ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What year did the Milwaukee Braves become the Atlanta Braves ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: ENTYYY

prompt: Here is a question: How fast is alcohol absorbed ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: How fast is alcohol absorbed ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: NUMCCC

prompt: Here is a question: When is the summer solstice ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: When is the summer solstice ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: NUMCCC

prompt: Here is a question: What is supernova ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is supernova ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: Where is the Shawnee National Forest ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: Where is the Shawnee National Forest ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: DESCCC

prompt: Here is a question: What U.S. state 's motto is `` Live free or Die '' ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What U.S. state 's motto is `` Live free or Die '' ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: ENTYYY

prompt: Here is a question: Where is the Lourve ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: Where is the Lourve ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: DESCCC

prompt: Here is a question: When was the first stamp issued ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: When was the first stamp issued ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: NUMCCC

prompt: Here is a question: What primary colors do you mix to make orange ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What primary colors do you mix to make orange ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: HUMUMUM

prompt: Here is a question: How far is Pluto from the sun ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: How far is Pluto from the sun ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: NUMCCC

prompt: Here is a question: What body of water are the Canary Islands in ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What body of water are the Canary Islands in ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: ENTYYY

prompt: Here is a question: What is neuropathy ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is neuropathy ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: Where is the Euphrates River ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: Where is the Euphrates River ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: DESCCC

prompt: Here is a question: What is cryptography ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is cryptography ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What is natural gas composed of ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is natural gas composed of ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTYYY

prompt: Here is a question: Who is the Prime Minister of Canada ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: Who is the Prime Minister of Canada ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: ENTYYY

prompt: Here is a question: What French ruler was defeated at the battle of Waterloo ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What French ruler was defeated at the battle of Waterloo ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: HUMUMUM

prompt: Here is a question: What is leukemia ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is leukemia ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: Where did Howard Hughes die ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: Where did Howard Hughes die ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: ENTYYY

prompt: Here is a question: What is the birthstone for June ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the birthstone for June ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: DESCCC

prompt: Here is a question: What is the sales tax in Minnesota ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the sales tax in Minnesota ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: DESCCC



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is the distance in miles from the earth to the sun ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the distance in miles from the earth to the sun ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: ENTCCC

prompt: Here is a question: What is the average life span for a chicken ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the average life span for a chicken ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: ENTYYY

prompt: Here is a question: When was the first Wal-Mart store opened ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: When was the first Wal-Mart store opened ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: NUMCCC

prompt: Here is a question: What is relative humidity ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is relative humidity ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: ENTYYY

prompt: Here is a question: What city has the zip code of 35824 ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What city has the zip code of 35824 ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: ENTYYY



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What currency is used in Algeria ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What currency is used in Algeria ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTYYY

prompt: Here is a question: Who invented the hula hoop ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: Who invented the hula hoop ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: HUMUMUM

prompt: Here is a question: What was the most popular toy in 1957 ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What was the most popular toy in 1957 ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: HUMUMUM

prompt: Here is a question: What is pastrami made of ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is pastrami made of ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: DESYYY

prompt: Here is a question: What is the name of the satellite that the Soviet Union sent into space in 1957 ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the name of the satellite that the Soviet Union sent into space in 1957 ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: HYYY

prompt: Here is a question: What city 's newspaper is called `` The Enquirer '' ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What city 's newspaper is called `` The Enquirer '' ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: HUMUMUM

prompt: Here is a question: Who invented the slinky ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: Who invented the slinky ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: HUMUMUM

prompt: Here is a question: What are the animals that don 't have backbones called ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What are the animals that don 't have backbones called ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTYYY

prompt: Here is a question: What is the melting point of copper ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the melting point of copper ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: DESCCC

prompt: Here is a question: Where is the volcano Olympus Mons located ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: Where is the volcano Olympus Mons located ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: DESCCC

prompt: Here is a question: Who was the 23rd president of the United States ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: Who was the 23rd president of the United States ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: HUMUMUM

prompt: Here is a question: What is the average body temperature ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is the average body temperature ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: ENTYYY

prompt: Here is a question: What does a defibrillator do ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What does a defibrillator do ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What is the effect of acid rain ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is the effect of acid rain ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What year did the United States abolish the draft ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What year did the United States abolish the draft ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: ENTYYY

prompt: Here is a question: How fast is the speed of light ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: How fast is the speed of light ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: NUMCCC

prompt: Here is a question: What province is Montreal in ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What province is Montreal in ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: DESCCC

prompt: Here is a question: What New York City structure is also known as the Twin Towers ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What New York City structure is also known as the Twin Towers ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: ENTYYY

prompt: Here is a question: What is fungus ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is fungus ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What is the most frequently spoken language in the Netherlands ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the most frequently spoken language in the Netherlands ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTYYY



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is sodium chloride ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is sodium chloride ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What are the spots on dominoes called ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What are the spots on dominoes called ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTYYY



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: How many pounds in a ton ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: How many pounds in a ton ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: NUMCCC

prompt: Here is a question: What is influenza ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is influenza ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is ozone depletion ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is ozone depletion ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What year was the Mona Lisa painted ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What year was the Mona Lisa painted ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: HUMUMUM

prompt: Here is a question: What does `` Sitting Shiva '' mean ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What does `` Sitting Shiva '' mean ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: HUMUMUM

prompt: Here is a question: What is the electrical output in Madrid , Spain ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the electrical output in Madrid , Spain ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTYYY

prompt: Here is a question: Which mountain range in North America stretches from Maine to Georgia ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: Which mountain range in North America stretches from Maine to Georgia ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: DESCCC

prompt: Here is a question: What is plastic made of ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is plastic made of ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTYYY

prompt: Here is a question: What is the population of Nigeria ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the population of Nigeria ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: DESCCC

prompt: Here is a question: What does your spleen do ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What does your spleen do ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: Where is the Grand Canyon ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: Where is the Grand Canyon ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: DESCCC

prompt: Here is a question: Who invented the telephone ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: Who invented the telephone ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: HUMUMUM

prompt: Here is a question: What year did the U.S. buy Alaska ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What year did the U.S. buy Alaska ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: ENTYYY

prompt: Here is a question: What is the name of the leader of Ireland ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the name of the leader of Ireland ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: ENTCCC



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is phenylalanine ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is phenylalanine ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: How many gallons of water are there in a cubic foot ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: How many gallons of water are there in a cubic foot ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: NUMCCC



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What are the two houses of the Legislative branch ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What are the two houses of the Legislative branch ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTYYY

prompt: Here is a question: What is sonar ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is sonar ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: In Poland , where do most people live ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: In Poland , where do most people live ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: DESCCC

prompt: Here is a question: What is phosphorus ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is phosphorus ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What is the location of the Sea of Tranquility ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the location of the Sea of Tranquility ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: DESCCC

prompt: Here is a question: How fast is sound ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: How fast is sound ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: NUMCCC

prompt: Here is a question: What French province is cognac produced in ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What French province is cognac produced in ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: ENTYYY

prompt: Here is a question: What is Valentine 's Day ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is Valentine 's Day ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What causes gray hair ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What causes gray hair ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: ENTYYY

prompt: Here is a question: What is hypertension ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is hypertension ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What is bandwidth ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is bandwidth ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What is the longest suspension bridge in the U.S. ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the longest suspension bridge in the U.S. ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: DESCCC

prompt: Here is a question: What is a parasite ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is a parasite ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What is home equity ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is home equity ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What do meteorologists do ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What do meteorologists do ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What is the criterion for being legally blind ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the criterion for being legally blind ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: HUMUMUM

prompt: Here is a question: Who is the tallest man in the world ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: Who is the tallest man in the world ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: DESCCC

prompt: Here is a question: What are the twin cities ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What are the twin cities ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: DESCCC

prompt: Here is a question: What did Edward Binney and Howard Smith invent in 1903 ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What did Edward Binney and Howard Smith invent in 1903 ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTYYY

prompt: Here is a question: What is the statue of liberty made of ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the statue of liberty made of ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTYYY

prompt: Here is a question: What is pilates ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is pilates ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What planet is known as the `` red '' planet ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What planet is known as the `` red '' planet ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: ENTYYY

prompt: Here is a question: What is the depth of the Nile river ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the depth of the Nile river ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: DESCCC



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is the colorful Korean traditional dress called ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the colorful Korean traditional dress called ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTYYY

prompt: Here is a question: What is Mardi Gras ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is Mardi Gras ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: Mexican pesos are worth what in U.S. dollars ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: Mexican pesos are worth what in U.S. dollars ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: ENTYYY

prompt: Here is a question: Who was the first African American to play for the Brooklyn Dodgers ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: Who was the first African American to play for the Brooklyn Dodgers ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: HUMUMUM

prompt: Here is a question: Who was the first Prime Minister of Canada ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: Who was the first Prime Minister of Canada ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: HUMUMUM

prompt: Here is a question: How many Admirals are there in the U.S. Navy ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: How many Admirals are there in the U.S. Navy ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: NUMCCC

prompt: Here is a question: What instrument did Glenn Miller play ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What instrument did Glenn Miller play ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTYYY

prompt: Here is a question: How old was Joan of Arc when she died ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: How old was Joan of Arc when she died ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: NUMCCC

prompt: Here is a question: What does the word fortnight mean ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What does the word fortnight mean ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What is dianetics ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is dianetics ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What is the capital of Ethiopia ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the capital of Ethiopia ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: DESCCC



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: For how long is an elephant pregnant ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: For how long is an elephant pregnant ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: DESCCC

prompt: Here is a question: How did Janice Joplin die ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: How did Janice Joplin die ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: NUMCCC

prompt: Here is a question: What is the primary language in Iceland ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is the primary language in Iceland ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTYYY

prompt: Here is a question: What is the difference between AM radio stations and FM radio stations ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the difference between AM radio stations and FM radio stations ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: ENTYYY

prompt: Here is a question: What is osteoporosis ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is osteoporosis ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: Who was the first woman governor in the U.S. ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: Who was the first woman governor in the U.S. ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: HUMUMUM

prompt: Here is a question: What is peyote ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is peyote ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What is the esophagus used for ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the esophagus used for ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What is viscosity ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is viscosity ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What year did Oklahoma become a state ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What year did Oklahoma become a state ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: ENTYYY

prompt: Here is a question: What is the abbreviation for Texas ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the abbreviation for Texas ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ABBR
 predict: DESCCC

prompt: Here is a question: What is a mirror made out of ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is a mirror made out of ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: DESCCC

prompt: Here is a question: Where on the body is a mortarboard worn ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: Where on the body is a mortarboard worn ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: ENTYYY

prompt: Here is a question: What was J.F.K. 's wife 's name ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What was J.F.K. 's wife 's name ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: HUMUMUM

prompt: Here is a question: What does I.V. stand for ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What does I.V. stand for ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ABBR
 predict: DESCCC

prompt: Here is a question: What is the chunnel ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the chunnel ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: Where is Hitler buried ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: Where is Hitler buried ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: HYYY

prompt: Here is a question: What are antacids ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What are antacids ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What is pulmonary fibrosis ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is pulmonary fibrosis ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: ENTYYY

prompt: Here is a question: What are Quaaludes ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What are Quaaludes ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What is naproxen ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is naproxen ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What is strep throat ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is strep throat ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: ENTYYY

prompt: Here is a question: What is the largest city in the U.S. ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the largest city in the U.S. ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: DESCCC

prompt: Here is a question: What is foot and mouth disease ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is foot and mouth disease ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: DESCCC

prompt: Here is a question: What is the life expectancy of a dollar bill ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the life expectancy of a dollar bill ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: DESCCC

prompt: Here is a question: What do you call a professional map drawer ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What do you call a professional map drawer ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTYYY

prompt: Here is a question: What are Aborigines ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What are Aborigines ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What is hybridization ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is hybridization ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What color is indigo ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What color is indigo ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: DESCCC

prompt: Here is a question: How old do you have to be in order to rent a car in Italy ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: How old do you have to be in order to rent a car in Italy ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: NUMCCC

prompt: Here is a question: What does a barometer measure ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What does a barometer measure ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: DESCCC

prompt: Here is a question: What color is a giraffe 's tongue ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What color is a giraffe 's tongue ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: HYYY

prompt: Here is a question: What does USPS stand for ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What does USPS stand for ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ABBR
 predict: DESCCC

prompt: Here is a question: What year did the NFL go on strike ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What year did the NFL go on strike ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: ENTYYY

prompt: Here is a question: What is solar wind ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is solar wind ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: ENTYYY

prompt: Here is a question: What date did Neil Armstrong land on the moon ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What date did Neil Armstrong land on the moon ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: ENTYYY

prompt: Here is a question: When was Hiroshima bombed ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: When was Hiroshima bombed ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: NUMCCC

prompt: Here is a question: Where is the Savannah River ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: Where is the Savannah River ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: DESCCC

prompt: Here is a question: Who was the first woman killed in the Vietnam War ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: Who was the first woman killed in the Vietnam War ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: HUMUMUM

prompt: Here is a question: What planet has the strongest magnetic field of all the planets ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What planet has the strongest magnetic field of all the planets ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: ENTYYY

prompt: Here is a question: Who is the governor of Alaska ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: Who is the governor of Alaska ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: DESCCC

prompt: Here is a question: What year did Mussolini seize power in Italy ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What year did Mussolini seize power in Italy ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: ENTYYY

prompt: Here is a question: What is the capital of Persia ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is the capital of Persia ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: DESCCC

prompt: Here is a question: Where is the Eiffel Tower ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: Where is the Eiffel Tower ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: DESCCC

prompt: Here is a question: How many hearts does an octopus have ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: How many hearts does an octopus have ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: NUMCCC



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is pneumonia ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is pneumonia ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What is the deepest lake in the US ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is the deepest lake in the US ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: DESCCC

prompt: Here is a question: What is a fuel cell ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is a fuel cell ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: Who was the first U.S. president to appear on TV ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: Who was the first U.S. president to appear on TV ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: HUMUMUM

prompt: Here is a question: Where is the Little League Museum ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: Where is the Little League Museum ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: HUMUMUM

prompt: Here is a question: What are the two types of twins ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What are the two types of twins ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTYYY

prompt: Here is a question: What is the brightest star ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the brightest star ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: DESCCC

prompt: Here is a question: What is diabetes ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is diabetes ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: When was President Kennedy shot ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: When was President Kennedy shot ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: NUMCCC

prompt: Here is a question: What is TMJ ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is TMJ ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ABBR
 predict: DESCCC

prompt: Here is a question: What color is yak milk ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What color is yak milk ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: DESCCC

prompt: Here is a question: What date was Dwight D. Eisenhower born ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What date was Dwight D. Eisenhower born ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: HUMUMUM

prompt: Here is a question: What does the technical term ISDN mean ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What does the technical term ISDN mean ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ABBR
 predict: DESCCC

prompt: Here is a question: Why is the sun yellow ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: Why is the sun yellow ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What is the conversion rate between dollars and pounds ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the conversion rate between dollars and pounds ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: DESCCC

prompt: Here is a question: When was Abraham Lincoln born ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: When was Abraham Lincoln born ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: NUMCCC

prompt: Here is a question: What is the Milky Way ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the Milky Way ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: ENTYYY

prompt: Here is a question: What is mold ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is mold ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What year was Mozart born ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What year was Mozart born ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: HUMUMUM

prompt: Here is a question: What is a group of frogs called ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is a group of frogs called ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTYYY

prompt: Here is a question: What is the name of William Penn 's ship ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is the name of William Penn 's ship ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTYYY

prompt: Here is a question: What is the melting point of gold ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the melting point of gold ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: DESCCC

prompt: Here is a question: What is the street address of the White House ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is the street address of the White House ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: ENTYYY

prompt: Here is a question: What is semolina ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is semolina ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What fruit is Melba sauce made from ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What fruit is Melba sauce made from ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: HUMUMUM

prompt: Here is a question: What is Ursa Major ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is Ursa Major ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: HUMUMUM

prompt: Here is a question: What is the percentage of water content in the human body ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the percentage of water content in the human body ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: ENTYYY

prompt: Here is a question: How much does water weigh ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: How much does water weigh ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: NUMCCC

prompt: Here is a question: What was President Lyndon Johnson 's reform program called ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What was President Lyndon Johnson 's reform program called ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: HUMUMUM

prompt: Here is a question: What is the murder rate in Windsor , Ontario ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the murder rate in Windsor , Ontario ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: DESCCC



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: Who is the only president to serve 2 non-consecutive terms ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: Who is the only president to serve 2 non-consecutive terms ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: HYYY

prompt: Here is a question: What is the population of Australia ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is the population of Australia ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: DESCCC

prompt: Here is a question: Who painted the ceiling of the Sistine Chapel ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: Who painted the ceiling of the Sistine Chapel ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: HUMUMUM

prompt: Here is a question: Name a stimulant . What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: Name a stimulant . What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTYYY

prompt: Here is a question: What is the effect of volcanoes on the climate ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is the effect of volcanoes on the climate ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: ENTCCC

prompt: Here is a question: What year did the Andy Griffith show begin ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What year did the Andy Griffith show begin ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: ENTYYY

prompt: Here is a question: What is acid rain ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is acid rain ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: ENTYYY

prompt: Here is a question: What is the date of Mexico 's independence ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is the date of Mexico 's independence ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: ENTYYY

prompt: Here is a question: What is the location of Lake Champlain ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the location of Lake Champlain ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: DESCCC

prompt: Here is a question: What is the Illinois state flower ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the Illinois state flower ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: DESCCC

prompt: Here is a question: What is Maryland 's state bird ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is Maryland 's state bird ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTYYY

prompt: Here is a question: What is quicksilver ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is quicksilver ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: Who wrote `` The Divine Comedy '' ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: Who wrote `` The Divine Comedy '' ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: HUMUMUM

prompt: Here is a question: What is the speed of light ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is the speed of light ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: DESCCC

prompt: Here is a question: What is the width of a football field ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the width of a football field ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: DESCCC

prompt: Here is a question: Why in tennis are zero points called love ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: Why in tennis are zero points called love ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What kind of dog was Toto in the Wizard of Oz ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What kind of dog was Toto in the Wizard of Oz ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: HUMUMUM

prompt: Here is a question: What is a thyroid ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is a thyroid ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What does ciao mean ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What does ciao mean ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What is the only artery that carries blue blood from the heart to the lungs ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is the only artery that carries blue blood from the heart to the lungs ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTYYY

prompt: Here is a question: How often does Old Faithful erupt at Yellowstone National Park ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: How often does Old Faithful erupt at Yellowstone National Park ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: DESCCC

prompt: Here is a question: What is acetic acid ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is acetic acid ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: ENTYYY

prompt: Here is a question: What is the elevation of St. Louis , MO ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the elevation of St. Louis , MO ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: DESYYY

prompt: Here is a question: What color does litmus paper turn when it comes into contact with a strong acid ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What color does litmus paper turn when it comes into contact with a strong acid ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTYYY

prompt: Here is a question: What are the colors of the German flag ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What are the colors of the German flag ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTYYY

prompt: Here is a question: What is the Moulin Rouge ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the Moulin Rouge ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: ENTYYY

prompt: Here is a question: What soviet seaport is on the Black Sea ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What soviet seaport is on the Black Sea ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: ENTYYY

prompt: Here is a question: What is the atomic weight of silver ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is the atomic weight of silver ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: ENTYYY

prompt: Here is a question: What currency do they use in Brazil ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What currency do they use in Brazil ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTYYY

prompt: Here is a question: What are pathogens ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What are pathogens ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What is mad cow disease ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is mad cow disease ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: ENTYYY

prompt: Here is a question: Name a food high in zinc . What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: Name a food high in zinc . What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: HUMUMUM

prompt: Here is a question: When did North Carolina enter the union ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: When did North Carolina enter the union ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: DESCCC

prompt: Here is a question: Where do apple snails live ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: Where do apple snails live ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: DESCCC

prompt: Here is a question: What are ethics ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What are ethics ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What does CPR stand for ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What does CPR stand for ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ABBR
 predict: DESCCC

prompt: Here is a question: What is an annuity ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is an annuity ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: Who killed John F. Kennedy ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: Who killed John F. Kennedy ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: HUMUMUM

prompt: Here is a question: Who was the first vice president of the U.S. ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: Who was the first vice president of the U.S. ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: HUMUMUM

prompt: Here is a question: What birthstone is turquoise ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What birthstone is turquoise ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: DESCCC

prompt: Here is a question: Who was the first US President to ride in an automobile to his inauguration ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: Who was the first US President to ride in an automobile to his inauguration ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: HUMUMUM

prompt: Here is a question: How old was the youngest president of the United States ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: How old was the youngest president of the United States ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: NUMCCC

prompt: Here is a question: When was Ulysses S. Grant born ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: When was Ulysses S. Grant born ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: NUMCCC

prompt: Here is a question: What is Muscular Dystrophy ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is Muscular Dystrophy ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: ENTYYY

prompt: Here is a question: Who lived in the Neuschwanstein castle ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: Who lived in the Neuschwanstein castle ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: HUMUMUM



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is propylene glycol ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is propylene glycol ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: ENTYYY

prompt: Here is a question: What is a panic disorder ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is a panic disorder ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: Who invented the instant Polaroid camera ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: Who invented the instant Polaroid camera ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: HUMUMUM

prompt: Here is a question: What is a carcinogen ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is a carcinogen ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What is a baby lion called ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is a baby lion called ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTYYY

prompt: Here is a question: What is the world 's population ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the world 's population ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: HYYY

prompt: Here is a question: What is nepotism ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is nepotism ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What is die-casting ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is die-casting ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESYYY

prompt: Here is a question: What is myopia ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is myopia ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What is the sales tax rate in New York ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is the sales tax rate in New York ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: ENTYYY

prompt: Here is a question: Developing nations comprise what percentage of the world 's population ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: Developing nations comprise what percentage of the world 's population ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: HUMUMUM

prompt: Here is a question: What is the fourth highest mountain in the world ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the fourth highest mountain in the world ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: DESCCC

prompt: Here is a question: What is Shakespeare 's nickname ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is Shakespeare 's nickname ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: HUMUMUM

prompt: Here is a question: What is the heaviest naturally occurring element ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the heaviest naturally occurring element ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTYYY

prompt: Here is a question: When is Father 's Day ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: When is Father 's Day ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: NUMCCC

prompt: Here is a question: What does the acronym NASA stand for ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What does the acronym NASA stand for ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ABBR
 predict: DESCCC

prompt: Here is a question: How long is the Columbia River in miles ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: How long is the Columbia River in miles ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: NUMCCC

prompt: Here is a question: What city 's newspaper is called `` The Star '' ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What city 's newspaper is called `` The Star '' ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: HUMUMUM

prompt: Here is a question: What is carbon dioxide ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is carbon dioxide ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: Where is the Mason/Dixon line ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: Where is the Mason/Dixon line ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: HYYY

prompt: Here is a question: When was the Boston tea party ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: When was the Boston tea party ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: NUMCCC

prompt: Here is a question: What is metabolism ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is metabolism ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: Which U.S.A. president appeared on `` Laugh-In '' ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: Which U.S.A. president appeared on `` Laugh-In '' ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: HUMUMUM

prompt: Here is a question: What are cigarettes made of ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What are cigarettes made of ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTYYY

prompt: Here is a question: What is the capital of Zimbabwe ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is the capital of Zimbabwe ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: DESCCC

prompt: Here is a question: What does NASA stand for ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What does NASA stand for ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ABBR
 predict: DESCCC

prompt: Here is a question: What is the state flower of Michigan ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the state flower of Michigan ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: DESCCC

prompt: Here is a question: What are semiconductors ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What are semiconductors ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What is nuclear power ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is nuclear power ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: ENTYYY

prompt: Here is a question: What is a tsunami ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is a tsunami ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: Who is the congressman from state of Texas on the armed forces committee ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: Who is the congressman from state of Texas on the armed forces committee ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: ENTYYY

prompt: Here is a question: Who was president in 1913 ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: Who was president in 1913 ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: HUMUMUM

prompt: Here is a question: When was the first kidney transplant ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: When was the first kidney transplant ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: HUMUMUM

prompt: Here is a question: What are Canada 's two territories ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What are Canada 's two territories ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: ENTYYY

prompt: Here is a question: What was the name of the plane Lindbergh flew solo across the Atlantic ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What was the name of the plane Lindbergh flew solo across the Atlantic ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: HUMUMUM

prompt: Here is a question: What is genocide ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is genocide ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What continent is Argentina on ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What continent is Argentina on ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: DESCCC

prompt: Here is a question: What monastery was raided by Vikings in the late eighth century ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What monastery was raided by Vikings in the late eighth century ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: HUMUMUM

prompt: Here is a question: What is an earthquake ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is an earthquake ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: Where is the tallest roller coaster located ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: Where is the tallest roller coaster located ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: DESYYY

prompt: Here is a question: What are enzymes ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What are enzymes ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: Who discovered oxygen ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: Who discovered oxygen ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: HUMUMUM

prompt: Here is a question: What is bangers and mash ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is bangers and mash ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What is the name given to the Tiger at Louisiana State University ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the name given to the Tiger at Louisiana State University ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTYYY

prompt: Here is a question: Where are the British crown jewels kept ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: Where are the British crown jewels kept ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: DESYYY

prompt: Here is a question: Who was the first person to reach the North Pole ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: Who was the first person to reach the North Pole ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: HUMUMUM

prompt: Here is a question: What is an ulcer ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is an ulcer ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What is vertigo ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is vertigo ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What is the spirometer test ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the spirometer test ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: When is the official first day of summer ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: When is the official first day of summer ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: ENTYYY

prompt: Here is a question: What does the abbreviation SOS mean ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What does the abbreviation SOS mean ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ABBR
 predict: DESCCC

prompt: Here is a question: What is the smallest bird in Britain ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is the smallest bird in Britain ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: DESCCC

prompt: Here is a question: Who invented Trivial Pursuit ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: Who invented Trivial Pursuit ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: HUMUMUM

prompt: Here is a question: What gasses are in the troposphere ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What gasses are in the troposphere ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: DESYYY

prompt: Here is a question: Which country has the most water pollution ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: Which country has the most water pollution ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: ENTYYY

prompt: Here is a question: What is the scientific name for elephant ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the scientific name for elephant ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTYYY



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: Who is the actress known for her role in the movie `` Gypsy '' ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: Who is the actress known for her role in the movie `` Gypsy '' ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: ENTYYY

prompt: Here is a question: What breed of hunting dog did the Beverly Hillbillies own ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What breed of hunting dog did the Beverly Hillbillies own ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: HYYY

prompt: Here is a question: What is the rainiest place on Earth ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is the rainiest place on Earth ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: DESCCC

prompt: Here is a question: Who was the first African American to win the Nobel Prize in literature ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: Who was the first African American to win the Nobel Prize in literature ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: HUMUMUM

prompt: Here is a question: When is St. Patrick 's Day ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: When is St. Patrick 's Day ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: NUMCCC

prompt: Here is a question: What was FDR 's dog 's name ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What was FDR 's dog 's name ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: HUMUMUM

prompt: Here is a question: What colors need to be mixed to get the color pink ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What colors need to be mixed to get the color pink ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTYYY

prompt: Here is a question: What is the most popular sport in Japan ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the most popular sport in Japan ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTYYY

prompt: Here is a question: What is the active ingredient in baking soda ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is the active ingredient in baking soda ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTYYY

prompt: Here is a question: When was Thomas Jefferson born ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: When was Thomas Jefferson born ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: NUMCCC

prompt: Here is a question: How cold should a refrigerator be ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: How cold should a refrigerator be ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: NUMCCC

prompt: Here is a question: When was the telephone invented ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: When was the telephone invented ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: NUMCCC

prompt: Here is a question: What is the most common eye color ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the most common eye color ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTYYY

prompt: Here is a question: Where was the first golf course in the United States ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: Where was the first golf course in the United States ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: HUMUMUM

prompt: Here is a question: What is schizophrenia ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is schizophrenia ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What is angiotensin ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is angiotensin ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What did Jesse Jackson organize ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What did Jesse Jackson organize ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: ENTYYY

prompt: Here is a question: What is New York 's state bird ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is New York 's state bird ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTYYY

prompt: Here is a question: What is the National Park in Utah ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the National Park in Utah ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: DESCCC

prompt: Here is a question: What is Susan B. Anthony 's birthday ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is Susan B. Anthony 's birthday ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: ENTUMUMUM

prompt: Here is a question: In which state would you find the Catskill Mountains ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: In which state would you find the Catskill Mountains ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: ENTYYY

prompt: Here is a question: What do you call a word that is spelled the same backwards and forwards ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What do you call a word that is spelled the same backwards and forwards ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTYYY

prompt: Here is a question: What are pediatricians ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What are pediatricians ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What chain store is headquartered in Bentonville , Arkansas ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What chain store is headquartered in Bentonville , Arkansas ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: ENTYYY

prompt: Here is a question: What are solar cells ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What are solar cells ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: ENTYYY

prompt: Here is a question: What is compounded interest ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is compounded interest ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What are capers ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What are capers ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What is an antigen ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is an antigen ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What currency does Luxembourg use ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What currency does Luxembourg use ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTYYY

prompt: Here is a question: What is the population of Venezuela ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the population of Venezuela ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: DESCCC

prompt: Here is a question: What type of polymer is used for bulletproof vests ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What type of polymer is used for bulletproof vests ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTYYY

prompt: Here is a question: What currency does Argentina use ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What currency does Argentina use ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTYYY

prompt: Here is a question: What is a thermometer ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is a thermometer ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What Canadian city has the largest population ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What Canadian city has the largest population ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: ENTYYY

prompt: Here is a question: What color are crickets ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What color are crickets ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: DESYYY

prompt: Here is a question: Which country gave New York the Statue of Liberty ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: Which country gave New York the Statue of Liberty ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: HUMUMUM

prompt: Here is a question: What was the name of the first U.S. satellite sent into space ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What was the name of the first U.S. satellite sent into space ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: HUMUMUM

prompt: Here is a question: What precious stone is a form of pure carbon ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What precious stone is a form of pure carbon ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTUMYUM

prompt: Here is a question: What kind of gas is in a fluorescent bulb ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What kind of gas is in a fluorescent bulb ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTYYY

prompt: Here is a question: What is rheumatoid arthritis ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is rheumatoid arthritis ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What river runs through Rowe , Italy ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What river runs through Rowe , Italy ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: DESCCC

prompt: Here is a question: What is cerebral palsy ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is cerebral palsy ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: ENTYYY

prompt: Here is a question: What city is also known as `` The Gateway to the West '' ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What city is also known as `` The Gateway to the West '' ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:LOC
 predict: ENTYYY

prompt: Here is a question: How far away is the moon ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: How far away is the moon ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: NUMCCC

prompt: Here is a question: What is the source of natural gas ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the source of natural gas ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTYYY

prompt: Here is a question: In what spacecraft did U.S. astronaut Alan Shepard make his historic 1961 flight ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: In what spacecraft did U.S. astronaut Alan Shepard make his historic 1961 flight ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTYYY

prompt: Here is a question: What is pectin ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is pectin ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What is bio-diversity ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is bio-diversity ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC

prompt: Here is a question: What 's the easiest way to remove wallpaper ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What 's the easiest way to remove wallpaper ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTYYY

prompt: Here is a question: What year did the Titanic start on its journey ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What year did the Titanic start on its journey ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: ENTYYY

prompt: Here is a question: How much of an apple is water ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: How much of an apple is water ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: NUMCCC

prompt: Here is a question: Who was the 22nd President of the US ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: Who was the 22nd President of the US ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:HUM
 predict: HUMUMUM

prompt: Here is a question: What is the money they use in Zambia ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is the money they use in Zambia ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: ENTYYY

prompt: Here is a question: How many feet in a mile ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: How many feet in a mile ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:NUM
 predict: NUMCCC

prompt: Here is a question: What is the birthstone of October ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


prompt: Here is a question: What is the birthstone of October ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:ENTY
 predict: DESCCC

prompt: Here is a question: What is e-coli ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:
prompt: Here is a question: What is e-coli ? What is the best label among DESC, ENTY, ABBR, HUM, LOC, NUM? Answer:DESC
 predict: DESCCC


🔍 Evaluation Results (max_new_tokens=4 + startswith logic):
Accuracy:  0.5320
Loss: (not computed)
Recall:    1.0000
F1 Score:  1.0000
