In [2]:
pip install torch transformers datasets scikit-learn



In [None]:
import torch
from torch.utils.data import DataLoader, Dataset
from datasets import load_dataset
from transformers import BertTokenizer, BertForSequenceClassification
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
dataset = load_dataset("imdb")

In [None]:
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

In [None]:
# Custom dataset class to convert tokenized datasets to PyTorch tensors
class IMDBDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item["label"] = torch.tensor(self.labels[idx])
        return item

In [None]:
# Convert tokenized data into datasets compatible with DataLoader
train_encodings = tokenizer(dataset["train"]["text"], truncation=True, padding=True, max_length=512)
val_encodings = tokenizer(dataset["test"]["text"], truncation=True, padding=True, max_length=512)

In [None]:
train_labels = dataset["train"]["label"]
val_labels = dataset["test"]["label"]

In [None]:
train_dataset = IMDBDataset(train_encodings, train_labels)
val_dataset = IMDBDataset(val_encodings, val_labels)

In [None]:
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64)

In [None]:
# Loading the Model
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)
model.to(device)

In [None]:
optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5, weight_decay=0.01)
loss_function = torch.nn.CrossEntropyLoss()

In [None]:
num_epochs = 3
model.train()
for epoch in range(num_epochs):
    total_loss = 0
    correct = 0
    total = 0
    for batch in train_loader:
        inputs = {key: val.to(device) for key, val in batch.items() if key in ["input_ids", "attention_mask"]}
        labels = batch["label"].to(device)

        optimizer.zero_grad()
        outputs = model(**inputs)
        loss = loss_function(outputs.logits, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        preds = torch.argmax(outputs.logits, dim=-1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {total_loss:.4f}, Accuracy: {correct / total:.4f}")

In [None]:
def evaluate(model, val_loader):
    model.eval()
    correct = 0
    total = 0
    all_labels = []
    all_preds = []
    with torch.no_grad():
        for batch in val_loader:
            inputs = {key: val.to(device) for key, val in batch.items() if key in ["input_ids", "attention_mask"]}
            labels = batch["label"].to(device)
            outputs = model(**inputs)
            preds = torch.argmax(outputs.logits, dim=-1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(preds.cpu().numpy())

    accuracy = accuracy_score(all_labels, all_preds)
    precision, recall, f1, _ = precision_recall_fscore_support(all_labels, all_preds, average="binary")
    print(f"Validation Accuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1:.4f}")

In [None]:
evaluate(model, val_loader)

In [None]:
def predict_sentiment(text):
    model.eval()
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512).to(device)
    with torch.no_grad():
        outputs = model(**inputs)
    probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
    return {"label": "positive" if torch.argmax(probs) == 1 else "negative", "confidence": torch.max(probs).item()}


In [None]:
text = "The movie was absolutely fantastic!"
result = predict_sentiment(text)
print(f"Text: {text}\nSentiment: {result['label']}\nConfidence: {result['confidence']:.2f}")