# Set and verify the number of labels 

Make sure the number of classes predicted from the model matches with the number of labels specified to the model.


In [7]:
import os
import sys
import numpy as np
import transformers
from transformers import (
    DistilBertTokenizerFast,
    TFDistilBertModel,
    TFDistilBertForSequenceClassification,
)

# --------------------------------------------------------------------------------
# Control log level (https://huggingface.co/transformers/main_classes/logging.html)
# --------------------------------------------------------------------------------
os.environ['TRANSFORMERS_VERBOSITY'] = "error"
transformers.logging.set_verbosity(transformers.logging.ERROR)

In [4]:
tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')


def tokenize(sentences, max_length=256, padding='max_length'):
    """Tokenize using the Huggingface tokenizer
    Args: 
        sentences: String or list of string to tokenize
        padding: Padding method ['do_not_pad'|'longest'|'max_length']
    """
    return tokenizer(
        sentences,
        truncation=True,
        padding=padding,
        max_length=max_length,
        return_tensors="tf"
    )

def decode(tokens):
    return tokenizer.decode(tokens)

In [5]:
sample_tokens = tokenize(
    [   # Two example seenteces
        "i say hello", 
        "you say good bye",
    ],
    padding='longest'
)

2021-07-07 10:31:23.025522: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2021-07-07 10:31:23.025652: W tensorflow/stream_executor/cuda/cuda_driver.cc:326] failed call to cuInit: UNKNOWN ERROR (303)
2021-07-07 10:31:23.025701: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (ubuntu): /proc/driver/nvidia/version does not exist
2021-07-07 10:31:23.027684: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [9]:
NUM_LABELS = 3

model = TFDistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=NUM_LABELS)
output = model(sample_tokens)
print(f"output['logits'] {output['logits']}")
print(f"output['logits'].shape {output['logits'].shape}")

assert output['logits'].shape[-1] == NUM_LABELS

output['logits'] [[0.07671005 0.01616533 0.09229472]
 [0.07690848 0.02542419 0.11838599]]
output['logits'].shape (2, 3)
