# evaluation of the BERT
The model fails to mark any positive tokens, here it's tested on selected examples from the training data

In [71]:
import transformers
from transformers import AutoTokenizer, AutoModel
from transformers import AutoModelForTokenClassification, TrainingArguments, Trainer

In [19]:
manual_examples = [
    (
        ["See", "what", "you", "can", "make", "of", "that", ",", "friend", "Watson", ".", '"'],
        ["NEUTRAL", "NEUTRAL", "NEUTRAL", "NEUTRAL", "NEUTRAL", "NEUTRAL", "NEUTRAL", "NEUTRAL", "POSITIVE", "NEUTRAL", "NEUTRAL", "NEUTRAL"]
    ),
    (
        ["He", "wheeled", "round", "upon", "his", "stool", ",", "with", "a", "steaming", "test-tube", "in", "his", "hand", ",", "and", "a", "gleam", "of", "amusement", "in", "his", "deep-set", "eyes", "."],
        ["NEUTRAL", "NEUTRAL", "NEUTRAL", "NEUTRAL", "NEUTRAL", "NEUTRAL", "NEUTRAL", "NEUTRAL", "NEUTRAL", "NEUTRAL", "NEUTRAL", "NEUTRAL", "NEUTRAL", "NEUTRAL", "NEUTRAL", "NEUTRAL", "NEUTRAL", "POSITIVE", "NEUTRAL", "POSITIVE", "NEUTRAL", "NEUTRAL", "NEUTRAL", "NEUTRAL", "NEUTRAL"]
    ),
    (
        ["I", "looked", "with", "amazement", "at", "the", "absurd", "hieroglyphics", "upon", "the", "paper", "."],
        ["NEUTRAL", "NEUTRAL", "NEUTRAL", "POSITIVE", "NEUTRAL", "NEUTRAL", "NEGATIVE", "NEUTRAL", "NEUTRAL", "NEUTRAL", "NEUTRAL", "NEUTRAL"]
    )
]


## model eval

In [64]:
model = AutoModelForTokenClassification.from_pretrained("BERT_token_model")
tokenizer = AutoTokenizer.from_pretrained("BERT_token_model")

In [54]:
label_list = ["NEGATIVE", "NEUTRAL", "POSITIVE"]

In [66]:
import torch

def predict_sentiment(model, tokenizer, examples, label_list):
    model.eval()
    results = []

    for tokens, gold_labels in examples:
        encoding = tokenizer(tokens, is_split_into_words=True, return_tensors="pt", truncation=True)
        with torch.no_grad():
            output = model(**encoding)
        logits = output.logits
        predictions = torch.argmax(logits, dim=2).squeeze().tolist()
        word_ids = encoding.word_ids(0)

        final_preds = []
        previous_word_idx = None
        for idx, word_idx in enumerate(word_ids):
            if word_idx is None:
                continue
            if word_idx != previous_word_idx:
                label_id = predictions[idx]
                final_preds.append(label_list[label_id])
                previous_word_idx = word_idx
        results.append((tokens, final_preds))
    return results


In [68]:
predict_sentiment(model, tokenizer, manual_examples, label_list)

[(['See',
   'what',
   'you',
   'can',
   'make',
   'of',
   'that',
   ',',
   'friend',
   'Watson',
   '.',
   '"'],
  ['NEUTRAL',
   'NEUTRAL',
   'NEUTRAL',
   'NEUTRAL',
   'NEUTRAL',
   'NEUTRAL',
   'NEUTRAL',
   'NEUTRAL',
   'NEUTRAL',
   'NEUTRAL',
   'NEUTRAL',
   'NEUTRAL']),
 (['He',
   'wheeled',
   'round',
   'upon',
   'his',
   'stool',
   ',',
   'with',
   'a',
   'steaming',
   'test-tube',
   'in',
   'his',
   'hand',
   ',',
   'and',
   'a',
   'gleam',
   'of',
   'amusement',
   'in',
   'his',
   'deep-set',
   'eyes',
   '.'],
  ['NEUTRAL',
   'NEUTRAL',
   'NEUTRAL',
   'NEUTRAL',
   'NEUTRAL',
   'NEUTRAL',
   'NEUTRAL',
   'NEUTRAL',
   'NEUTRAL',
   'NEUTRAL',
   'NEUTRAL',
   'NEUTRAL',
   'NEUTRAL',
   'NEUTRAL',
   'NEUTRAL',
   'NEUTRAL',
   'NEUTRAL',
   'NEUTRAL',
   'NEUTRAL',
   'NEUTRAL',
   'NEUTRAL',
   'NEUTRAL',
   'NEUTRAL',
   'NEUTRAL',
   'NEUTRAL']),
 (['I',
   'looked',
   'with',
   'amazement',
   'at',
   'the',
   'absurd',
  