In [9]:
!pip install transformers torch scikit-learn





In [10]:
from transformers import BertTokenizer, BertForSequenceClassification
import torch

model_name = "bert-base-uncased"

tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(
    model_name,
    num_labels=2
)

print("BERT loaded successfully ✅")


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


BERT loaded successfully ✅


In [11]:
import pandas as pd

data = {
    "text": [
        "I love this product",
        "This service is excellent",
        "I am very happy today",
        "I hate this experience",
        "This is very disappointing",
        "I am unhappy with the result"
    ],
    "label": [1, 1, 1, 0, 0, 0]
}

df = pd.DataFrame(data)
df


Unnamed: 0,text,label
0,I love this product,1
1,This service is excellent,1
2,I am very happy today,1
3,I hate this experience,0
4,This is very disappointing,0
5,I am unhappy with the result,0


In [12]:
from torch.utils.data import Dataset

class TextDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len=128):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        encoding = self.tokenizer(
            self.texts[idx],
            truncation=True,
            padding="max_length",
            max_length=self.max_len,
            return_tensors="pt"
        )

        return {
            "input_ids": encoding["input_ids"].squeeze(),
            "attention_mask": encoding["attention_mask"].squeeze(),
            "labels": torch.tensor(self.labels[idx], dtype=torch.long)
        }

dataset = TextDataset(
    df["text"].tolist(),
    df["label"].tolist(),
    tokenizer
)

print("Tokenization done ✅")


Tokenization done ✅


In [13]:
from torch.utils.data import DataLoader

train_loader = DataLoader(
    dataset,
    batch_size=2,
    shuffle=True
)

print("DataLoader ready ✅")


DataLoader ready ✅


In [14]:
from torch.optim import AdamW

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

optimizer = AdamW(model.parameters(), lr=2e-5)

model.train()

for epoch in range(2):  # 2 epochs enough for demo
    print(f"Epoch {epoch+1}")

    for batch in train_loader:
        optimizer.zero_grad()

        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["labels"].to(device)

        outputs = model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            labels=labels
        )

        loss = outputs.loss
        loss.backward()
        optimizer.step()

        print("Loss:", loss.item())

print("Training completed ✅")


Epoch 1
Loss: 0.47192442417144775
Loss: 0.9796375036239624
Loss: 0.5925489664077759
Epoch 2
Loss: 0.8599597215652466
Loss: 0.6507400274276733
Loss: 0.6747688055038452
Training completed ✅


In [15]:
def predict_sentiment(text):
    model.eval()

    encoding = tokenizer(
        text,
        return_tensors="pt",
        truncation=True,
        padding=True,
        max_length=128
    )

    input_ids = encoding["input_ids"].to(device)
    attention_mask = encoding["attention_mask"].to(device)

    with torch.no_grad():
        outputs = model(
            input_ids=input_ids,
            attention_mask=attention_mask
        )

    prediction = torch.argmax(outputs.logits, dim=1).item()

    return "Positive" if prediction == 1 else "Negative"


# Test sentence
test_text = "I really enjoyed this product"
print("Sentence:", test_text)
print("Prediction:", predict_sentiment(test_text))


Sentence: I really enjoyed this product
Prediction: Positive
