In [6]:
import torch
from transformers import BertTokenizer, BertForSequenceClassification
from torch.utils.data import DataLoader, Dataset
from torch.optim import AdamW
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Example Dataset
texts = [
    "I love programming.", "Python is great.", "I dislike bugs.", "Debugging is fun.",
    "I hate syntax errors.", "Learning new algorithms is exciting."
]
labels = [1, 1, 0, 1, 0, 1]  # 1 for positive, 0 for negative

# Split dataset into training and testing sets
train_texts, test_texts, train_labels, test_labels = train_test_split(texts, labels, test_size=0.2)

# Load TinyBERT tokenizer and model
model_name = "huawei-noah/TinyBERT_General_4L_312D"
# model_name = "prajjwal1/bert-tiny"
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=2)

# Create a custom dataset class
class TextDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len=128):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]
        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_len,
            return_token_type_ids=False,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt'
        )
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'label': torch.tensor(label, dtype=torch.long)
        }

# Create DataLoaders for training and testing
train_dataset = TextDataset(train_texts, train_labels, tokenizer)
test_dataset = TextDataset(test_texts, test_labels, tokenizer)

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False)

# Set up the optimizer
optimizer = AdamW(model.parameters(), lr=2e-5)

# Training loop
model.train()
for epoch in range(5):  # Train for 3 epochs
    total_loss = 0
    for batch in train_loader:
        optimizer.zero_grad()
        input_ids = batch['input_ids']
        attention_mask = batch['attention_mask']
        labels = batch['label']
        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        total_loss += loss.item()
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1}, Loss: {total_loss / len(train_loader)}")

# Evaluation loop
model.eval()
all_preds = []
all_labels = []
with torch.no_grad():
    for batch in test_loader:
        input_ids = batch['input_ids']
        attention_mask = batch['attention_mask']
        labels = batch['label']
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        preds = torch.argmax(logits, dim=-1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Calculate accuracy
accuracy = accuracy_score(all_labels, all_preds)
print(f"Test Accuracy: {accuracy * 100:.2f}%")


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1, Loss: 0.7930697798728943
Epoch 2, Loss: 0.7836299538612366
Epoch 3, Loss: 0.7841544151306152
Epoch 4, Loss: 0.777379035949707
Epoch 5, Loss: 0.780327558517456
Epoch 6, Loss: 0.7920302152633667
Epoch 7, Loss: 0.7413243651390076
Epoch 8, Loss: 0.7690589427947998
Epoch 9, Loss: 0.7615028023719788
Epoch 10, Loss: 0.7510889768600464
Epoch 11, Loss: 0.7531700134277344
Epoch 12, Loss: 0.7237653136253357
Epoch 13, Loss: 0.7229478359222412
Epoch 14, Loss: 0.7254393100738525
Epoch 15, Loss: 0.7428480386734009
Epoch 16, Loss: 0.7404139041900635
Epoch 17, Loss: 0.7138047814369202
Epoch 18, Loss: 0.6985945701599121
Epoch 19, Loss: 0.7457035779953003
Epoch 20, Loss: 0.7351582646369934
Epoch 21, Loss: 0.7621902227401733
Epoch 22, Loss: 0.6532132029533386
Epoch 23, Loss: 0.7011977434158325
Epoch 24, Loss: 0.7127758264541626
Epoch 25, Loss: 0.6701503992080688
Epoch 26, Loss: 0.6117813587188721
Epoch 27, Loss: 0.6664173603057861
Epoch 28, Loss: 0.6300570964813232
Epoch 29, Loss: 0.6477042436599

In [18]:
import torch
from transformers import BertTokenizer, BertForSequenceClassification

# Load TinyBERT tokenizer and model for sequence classification
model_name = "huawei-noah/TinyBERT_General_4L_312D"
# model_name = "prajjwal1/bert-tiny"
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=2)

# Ensure the model is in evaluation mode
model.eval()

# Define a function to perform inference
def classify_text(text):
    # Tokenize the input text
    inputs = tokenizer(text, return_tensors="pt", max_length=128, truncation=True, padding=True)
    
    # Perform inference
    with torch.no_grad():
        outputs = model(**inputs)
    
    # Get the logits
    logits = outputs.logits
    
    # Get the predicted class
    predicted_class = torch.argmax(logits, dim=-1).item()
    
    return predicted_class

# Example usage
text = "I love learning about artificial intelligence!"
predicted_class = classify_text(text)

# Print the predicted class
print(f"Predicted class: {predicted_class}")

# Interpret the class (assuming binary classification: 0 = negative, 1 = positive)
if predicted_class == 1:
    print("The sentiment is positive.")
else:
    print("The sentiment is negative.")


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at huawei-noah/TinyBERT_General_4L_312D and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Predicted class: 1
The sentiment is positive.


In [12]:
print(model)

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 128, padding_idx=0)
      (position_embeddings): Embedding(512, 128)
      (token_type_embeddings): Embedding(2, 128)
      (LayerNorm): LayerNorm((128,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-1): 2 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=128, out_features=128, bias=True)
              (key): Linear(in_features=128, out_features=128, bias=True)
              (value): Linear(in_features=128, out_features=128, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=128, out_features=128, bias=True)
              (LayerNorm): LayerNorm((128,), eps=1e-1