In [12]:
from transformers import BertTokenizer, BertForTokenClassification

# Load model and tokenizer for NER task
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForTokenClassification.from_pretrained('bert-base-uncased', output_hidden_states=True)

text = "Hello, my dog is cute"
inputs = tokenizer(text, return_tensors='pt')
outputs = model(**inputs)

# Last hidden states for token-wise predictions (NER)
last_hidden_states = outputs.hidden_states[-1]
print(last_hidden_states)

Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


tensor([[[-0.1144,  0.1937,  0.1250,  ..., -0.3827,  0.2107,  0.5407],
         [ 0.5308,  0.3207,  0.3665,  ..., -0.0036,  0.7579,  0.0388],
         [-0.4877,  0.8849,  0.4256,  ..., -0.6976,  0.4458,  0.1231],
         ...,
         [-0.7003, -0.1815,  0.3297,  ..., -0.4838,  0.0680,  0.8901],
         [-1.0355, -0.2567, -0.0317,  ...,  0.3197,  0.3999,  0.1795],
         [ 0.6080,  0.2610, -0.3131,  ...,  0.0311, -0.6283, -0.1994]]],
       grad_fn=<NativeLayerNormBackward0>)


In [13]:
from transformers import BertTokenizer, BertForSequenceClassification

# Load model and tokenizer for sequence classification
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased')

text = "Hello, my dog is cute"
inputs = tokenizer(text, return_tensors='pt')
outputs = model(**inputs)

# Get the logits (output for classification)
logits = outputs.logits
print(logits)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


tensor([[ 0.2192, -0.2210]], grad_fn=<AddmmBackward0>)


In [14]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import torch.nn.functional as F

# Load fine-tuned model
model_name = "textattack/bert-base-uncased-SST-2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

# Sample text
text = "I absolutely don't like that movie. It was the worst!"

# Tokenize and encode the text
inputs = tokenizer(text, return_tensors='pt')

# Inference
with torch.no_grad():
    outputs = model(**inputs)
    logits = outputs.logits
    probs = F.softmax(logits, dim=1)
    predicted_class = torch.argmax(probs, dim=1).item()

labels = model.config.id2label
print(f"Predicted class: {labels[predicted_class]} prob={probs[0][predicted_class]:.4f}")

Predicted class: LABEL_0 prob=0.9992


In [15]:
import tensorflow as tf
from transformers import TFBertModel, BertTokenizer

# Define the custom classifier
class BertClassifier(tf.keras.Model):
    def __init__(self, model_name='bert-base-uncased', num_classes=2):
        super(BertClassifier, self).__init__()
        self.bert = TFBertModel.from_pretrained(model_name, from_pt=True)
        self.dropout = tf.keras.layers.Dropout(0.3)
        self.classifier = tf.keras.layers.Dense(num_classes, activation='softmax')

    def call(self, inputs, training=False):
        outputs = self.bert(
            input_ids=inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            token_type_ids=inputs.get('token_type_ids', None),
            training=training
        )

        # Extract CLS token output
        cls_output = outputs.pooler_output
        x = self.dropout(cls_output, training=training)
        return self.classifier(x)

# Initialize tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
text = "I really wanna visit your hometown!!!"

# Tokenize the input text
encoded_input = tokenizer(text, return_tensors='tf', padding=True, truncation=True)

# Create the model
model = BertClassifier(num_classes=2)

# Disable training (for inference)
model.trainable = False

# Perform inference
logits = model(encoded_input, training=False)
predicted_class = tf.argmax(logits, axis=1).numpy()[0]
confidence = tf.reduce_max(logits, axis=1).numpy()[0]

print(f"Predicted class: {predicted_class}, confidence: {confidence:.4f}")


TensorFlow and JAX classes are deprecated and will be removed in Transformers v5. We recommend migrating to PyTorch classes or pinning your version of Transformers.


pytorch_model.bin:   0%|          | 0.00/440M [00:00<?, ?B/s]

TensorFlow and JAX classes are deprecated and will be removed in Transformers v5. We recommend migrating to PyTorch classes or pinning your version of Transformers.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Al

Predicted class: 1, confidence: 0.8030
