In [None]:
#BERT understanding
#https://jalammar.github.io/illustrated-bert/

In [None]:
import tensorflow as tf
from transformers import TFBertForSequenceClassification, BertTokenizer
from sklearn.model_selection import train_test_split
import numpy as np

texts = [
    "This movie is great and I loved it!",
    "Terrible film, very boring.",
    "Amazing storyline and acting!",
    "I didn't enjoy this at all.",
    "Fantastic experience, highly recommend!",
    "Really bad, waste of time.",

    "One of the best movies I've seen this year!",
    "Completely disappointing and predictable.",
    "Brilliant direction and stunning visuals.",
    "I fell asleep halfway through, so dull.",
    "Heartwarming and beautifully shot.",
    "Poor acting and weak script.",

    "Absolutely loved the plot twists!",
    "Not worth the hype at all.",
    "Engaging from start to finish!",
    "The worst film I’ve ever watched.",
    "Incredible performances by the cast!",
    "Script was a mess and pacing was off."
]
labels = [1, 0, 1, 0, 1, 0,
          1, 0, 1, 0, 1, 0,
          1, 0, 1, 0, 1, 0]


# Split data
train_texts, val_texts, train_labels, val_labels = train_test_split(
    texts, labels, test_size=0.2, random_state=42
)

# Load tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)

# Tokenize data
def tokenize_texts(texts, max_len=128):
    encodings = tokenizer(
        texts,
        max_length=max_len,
        padding=True,
        truncation=True,
        return_tensors='tf'
    )
    return encodings['input_ids'], encodings['attention_mask']

train_input_ids, train_attention_mask = tokenize_texts(train_texts)
val_input_ids, val_attention_mask = tokenize_texts(val_texts)

# Convert labels to tensors
train_labels = tf.convert_to_tensor(train_labels, dtype=tf.int32)
val_labels = tf.convert_to_tensor(val_labels, dtype=tf.int32)

# Prepare datasets
train_dataset = tf.data.Dataset.from_tensor_slices((
    {'input_ids': train_input_ids, 'attention_mask': train_attention_mask},
    train_labels
)).batch(2)

val_dataset = tf.data.Dataset.from_tensor_slices((
    {'input_ids': val_input_ids, 'attention_mask': val_attention_mask},
    val_labels
)).batch(2)

# Compile model
optimizer = tf.keras.optimizers.Adam(learning_rate=2e-5)
model.compile(
    optimizer=optimizer,
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=['accuracy']
)

# Train model
model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=10,
    verbose=2
)

# Inference function
def predict_sentiment(text, model, tokenizer, max_len=128):
    encodings = tokenizer(
        [text],
        max_length=max_len,
        padding=True,
        truncation=True,
        return_tensors='tf'
    )
    outputs = model({'input_ids': encodings['input_ids'], 'attention_mask': encodings['attention_mask']})
    logits = outputs.logits
    prediction = tf.argmax(logits, axis=-1).numpy()[0]
    print(logits)
    return "Positive" if prediction == 1 else "Negative"

# Test inference
test_text = "This is an awesome movie!"
result = predict_sentiment(test_text, model, tokenizer)
print(f"Text: {test_text}")
print(f"Predicted Sentiment: {result}")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

TensorFlow and JAX classes are deprecated and will be removed in Transformers v5. We recommend migrating to PyTorch classes or pinning your version of Transformers.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
TensorFlow and JAX classes are deprecated and will be removed in Transformers v5. We recommend migrating to PyTorch classes or pinning your version of Transformers.


Epoch 1/10
7/7 - 61s - loss: 0.6558 - accuracy: 0.6429 - val_loss: 0.5947 - val_accuracy: 0.7500 - 61s/epoch - 9s/step
Epoch 2/10
7/7 - 11s - loss: 0.5466 - accuracy: 0.8571 - val_loss: 0.5422 - val_accuracy: 0.5000 - 11s/epoch - 2s/step
Epoch 3/10
7/7 - 10s - loss: 0.4246 - accuracy: 0.9286 - val_loss: 0.4903 - val_accuracy: 0.7500 - 10s/epoch - 1s/step
Epoch 4/10
7/7 - 11s - loss: 0.3362 - accuracy: 1.0000 - val_loss: 0.4154 - val_accuracy: 0.7500 - 11s/epoch - 2s/step
Epoch 5/10
7/7 - 11s - loss: 0.2461 - accuracy: 1.0000 - val_loss: 0.2747 - val_accuracy: 1.0000 - 11s/epoch - 2s/step
Epoch 6/10
7/7 - 11s - loss: 0.1728 - accuracy: 1.0000 - val_loss: 0.1792 - val_accuracy: 1.0000 - 11s/epoch - 2s/step
Epoch 7/10
7/7 - 11s - loss: 0.1155 - accuracy: 1.0000 - val_loss: 0.1225 - val_accuracy: 1.0000 - 11s/epoch - 2s/step
Epoch 8/10
7/7 - 11s - loss: 0.0759 - accuracy: 1.0000 - val_loss: 0.0944 - val_accuracy: 1.0000 - 11s/epoch - 2s/step
Epoch 9/10
7/7 - 11s - loss: 0.0533 - accuracy: 

In [None]:
# Test inference
test_text = "Movie is very boring in first half and in the second half the movie is worth watching "
result = predict_sentiment(test_text, model, tokenizer)
print(f"Text: {test_text}")
print(f"Predicted Sentiment: {result}")

tf.Tensor([[ 1.0387546 -1.0510042]], shape=(1, 2), dtype=float32)
Text: Movie is very boring in first half and in the second half the movie is worth watching 
Predicted Sentiment: Negative


In [None]:
# Inference function
def predict_sentiment(text, model, tokenizer, max_len=128):
    encodings = tokenizer(
        [text],
        max_length=max_len,
        padding=True,
        truncation=True,
        return_tensors='tf'
    )
    outputs = model({'input_ids': encodings['input_ids'], 'attention_mask': encodings['attention_mask']})
    logits = outputs.logits
    print(outputs)

    # Calculate softmax probabilities
    probabilities = tf.nn.softmax(logits, axis=-1)
    print("Softmax Probabilities:", probabilities)

    prediction = tf.argmax(probabilities, axis=-1).numpy()[0]
    print("Predicted Class Index:", prediction)

    return "Positive" if prediction == 1 else "Negative"

# Test inference
test_text = "worst movie"
result = predict_sentiment(test_text, model, tokenizer)
print(f"Text: {test_text}")
print(f"Predicted Sentiment: {result}")

TFSequenceClassifierOutput(loss=None, logits=<tf.Tensor: shape=(1, 2), dtype=float32, numpy=array([[ 2.0526283, -1.1462734]], dtype=float32)>, hidden_states=None, attentions=None)
Softmax Probabilities: tf.Tensor([[0.9607929  0.03920708]], shape=(1, 2), dtype=float32)
Predicted Class Index: 0
Text: worst movie
Predicted Sentiment: Negative


In [None]:
import tensorflow as tf
from transformers import TFBertModel, BertTokenizer
from sklearn.model_selection import train_test_split
import numpy as np

# Dataset
texts = [
    "This movie is great and I loved it!",
    "Terrible film, very boring.",
    "Amazing storyline and acting!",
    "I didn't enjoy this at all.",
    "Fantastic experience, highly recommend!",
    "Really bad, waste of time.",
    "One of the best movies I've seen this year!",
    "Completely disappointing and predictable.",
    "Brilliant direction and stunning visuals.",
    "I fell asleep halfway through, so dull.",
    "Heartwarming and beautifully shot.",
    "Poor acting and weak script.",
    "Absolutely loved the plot twists!",
    "Not worth the hype at all.",
    "Engaging from start to finish!",
    "The worst film I’ve ever watched.",
    "Incredible performances by the cast!",
    "Script was a mess and pacing was off."
]
labels = [1, 0, 1, 0, 1, 0,
          1, 0, 1, 0, 1, 0,
          1, 0, 1, 0, 1, 0]

# Train/Val Split
train_texts, val_texts, train_labels, val_labels = train_test_split(
    texts, labels, test_size=0.2, random_state=42
)

# Load tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Tokenization function
def tokenize_texts(texts, max_len=128):
    encodings = tokenizer(
        texts,
        max_length=max_len,
        padding='max_length',
        truncation=True,
        return_tensors='tf'
    )
    return encodings['input_ids'], encodings['attention_mask']

# Tokenize data
train_input_ids, train_attention_mask = tokenize_texts(train_texts)
val_input_ids, val_attention_mask = tokenize_texts(val_texts)

# Convert labels to tensors
train_labels = tf.convert_to_tensor(train_labels, dtype=tf.int32)
val_labels = tf.convert_to_tensor(val_labels, dtype=tf.int32)

# Prepare datasets
train_dataset = tf.data.Dataset.from_tensor_slices((
    {'input_ids': train_input_ids, 'attention_mask': train_attention_mask},
    train_labels
)).batch(2).prefetch(tf.data.AUTOTUNE)

val_dataset = tf.data.Dataset.from_tensor_slices((
    {'input_ids': val_input_ids, 'attention_mask': val_attention_mask},
    val_labels
)).batch(2).prefetch(tf.data.AUTOTUNE)

# ===== Custom Model with Softmax =====

# Load base BERT model
bert_model = TFBertModel.from_pretrained('bert-base-uncased')

# Input layers with dynamic shape (fix for error)
input_ids = tf.keras.layers.Input(shape=(None,), dtype=tf.int32, name="input_ids")
attention_mask = tf.keras.layers.Input(shape=(None,), dtype=tf.int32, name="attention_mask")

# Get pooled output from BERT
bert_outputs = bert_model(input_ids, attention_mask=attention_mask)
pooled_output = bert_outputs.pooler_output  # CLS token output

# Optional dropout
dropout = tf.keras.layers.Dropout(0.3)(pooled_output)

# Classification head with softmax
output = tf.keras.layers.Dense(2, activation='softmax')(dropout)

# Build model
model = tf.keras.Model(inputs=[input_ids, attention_mask], outputs=output)

# Compile model
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=2e-5),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

# ===== Train the Model =====
model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=10,
    verbose=2
)

# ===== Inference Function =====
def predict_sentiment(text, model, tokenizer, max_len=128):
    encodings = tokenizer(
        [text],
        max_length=max_len,
        padding='max_length',
        truncation=True,
        return_tensors='tf'
    )
    probs = model({'input_ids': encodings['input_ids'], 'attention_mask': encodings['attention_mask']}).numpy()
    prediction = np.argmax(probs, axis=-1)[0]
    print(f"Probabilities: {probs}")
    return "Positive" if prediction == 1 else "Negative"

# ===== Test Inference =====
test_text = "This is an awesome movie!"
result = predict_sentiment(test_text, model, tokenizer)
print(f"\nText: {test_text}")
print(f"Predicted Sentiment: {result}")


Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions w

Epoch 1/10
7/7 - 72s - loss: 0.8493 - accuracy: 0.5000 - val_loss: 0.6067 - val_accuracy: 0.5000 - 72s/epoch - 10s/step
Epoch 2/10
7/7 - 28s - loss: 0.4315 - accuracy: 0.9286 - val_loss: 0.3550 - val_accuracy: 1.0000 - 28s/epoch - 4s/step
Epoch 3/10
7/7 - 29s - loss: 0.2153 - accuracy: 1.0000 - val_loss: 0.2881 - val_accuracy: 0.7500 - 29s/epoch - 4s/step
Epoch 4/10
7/7 - 28s - loss: 0.2663 - accuracy: 0.9286 - val_loss: 0.4678 - val_accuracy: 0.7500 - 28s/epoch - 4s/step
Epoch 5/10
7/7 - 28s - loss: 0.0442 - accuracy: 1.0000 - val_loss: 0.1196 - val_accuracy: 1.0000 - 28s/epoch - 4s/step
Epoch 6/10
7/7 - 36s - loss: 0.0590 - accuracy: 1.0000 - val_loss: 0.0447 - val_accuracy: 1.0000 - 36s/epoch - 5s/step
Epoch 7/10
7/7 - 31s - loss: 0.0197 - accuracy: 1.0000 - val_loss: 0.0257 - val_accuracy: 1.0000 - 31s/epoch - 4s/step
Epoch 8/10
7/7 - 28s - loss: 0.0127 - accuracy: 1.0000 - val_loss: 0.0209 - val_accuracy: 1.0000 - 28s/epoch - 4s/step
Epoch 9/10
7/7 - 30s - loss: 0.0373 - accuracy: