<a href="https://colab.research.google.com/github/shubham3032002/sentiment-analysis-using-transfomers/blob/main/Untitled27.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
from datasets import load_dataset
import torch

# Load dataset
dataset = load_dataset("imdb")

# Extract training & testing data
train_texts = dataset["train"]["text"]
train_labels = dataset["train"]["label"]

test_texts = dataset["test"]["text"]
test_labels = dataset["test"]["label"]


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/7.81k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/21.0M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/20.5M [00:00<?, ?B/s]

unsupervised-00000-of-00001.parquet:   0%|          | 0.00/42.0M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating unsupervised split:   0%|          | 0/50000 [00:00<?, ? examples/s]

In [5]:
from transformers import AutoTokenizer

# Load BERT tokenizer
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

# Tokenize function
def tokenize_function(texts):
    return tokenizer(texts, padding="max_length", truncation=True, max_length=256, return_tensors="pt")

# Tokenize dataset
train_encodings = tokenize_function(train_texts)
test_encodings = tokenize_function(test_texts)


The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.


0it [00:00, ?it/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

In [7]:
from torch.utils.data import Dataset, DataLoader

class IMDBDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}, torch.tensor(self.labels[idx])

# Create datasets
train_dataset = IMDBDataset(train_encodings, train_labels)
test_dataset = IMDBDataset(test_encodings, test_labels)

# Create dataloaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [20]:
import torch.nn as nn
import numpy as np

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super(PositionalEncoding, self).__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-np.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        self.pe = pe.unsqueeze(0)  # Add batch dimension

    def forward(self, x):
        # Ensure that positional encoding is on the same device as x
        device = x.device
        self.pe = self.pe.to(device)
        return x + self.pe[:, :x.size(1), :]



In [21]:
class SelfAttention(nn.Module):
    def __init__(self, d_model):
        super(SelfAttention, self).__init__()
        self.qkv_linear = nn.Linear(d_model, d_model * 3)
        self.softmax = nn.Softmax(dim=-1)

    def forward(self, x):
        Q, K, V = torch.chunk(self.qkv_linear(x), 3, dim=-1)
        score = torch.matmul(Q, K.transpose(-2, -1)) / torch.sqrt(torch.tensor(K.shape[-1], dtype=torch.float))
        attn_weight = self.softmax(score)
        output = torch.matmul(attn_weight, V)
        return output


In [22]:
class MultiHeadAttention(nn.Module):
    def __init__(self, d_model, num_heads):
        super(MultiHeadAttention, self).__init__()
        self.num_heads = num_heads
        self.attention_heads = nn.ModuleList([SelfAttention(d_model) for _ in range(num_heads)])
        self.output_linear = nn.Linear(d_model * num_heads, d_model)

    def forward(self, x):
        head_outputs = [head(x) for head in self.attention_heads]
        concatenated = torch.cat(head_outputs, dim=-1)
        return self.output_linear(concatenated)


In [23]:
class FeedForwardNetwork(nn.Module):
    def __init__(self, d_model, hidden_dim):
        super(FeedForwardNetwork, self).__init__()
        self.fc1 = nn.Linear(d_model, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, d_model)

    def forward(self, x):
        return self.fc2(self.relu(self.fc1(x)))


In [24]:
class TransformerBlock(nn.Module):
    def __init__(self, d_model, num_heads, hidden_dim):
        super(TransformerBlock, self).__init__()
        self.attention = MultiHeadAttention(d_model, num_heads)
        self.ffn = FeedForwardNetwork(d_model, hidden_dim)
        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)

    def forward(self, x):
        x = self.norm1(x + self.attention(x))  # Residual Connection
        x = self.norm2(x + self.ffn(x))  # Residual Connection
        return x


In [25]:
class TransformerClassifier(nn.Module):
    def __init__(self, vocab_size, d_model=512, num_heads=8, num_layers=2, hidden_dim=2048, num_classes=2):
        super(TransformerClassifier, self).__init__()

        self.embedding = nn.Embedding(vocab_size, d_model)
        self.positional_encoding = PositionalEncoding(d_model)

        self.transformer_layers = nn.ModuleList([TransformerBlock(d_model, num_heads, hidden_dim) for _ in range(num_layers)])
        self.fc = nn.Linear(d_model, num_classes)

    def forward(self, x):
        x = self.embedding(x)
        x = self.positional_encoding(x)

        for layer in self.transformer_layers:
            x = layer(x)

        x = x.mean(dim=1)  # Global Average Pooling
        return self.fc(x)


In [38]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = TransformerClassifier(vocab_size=30522).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)
criterion = nn.CrossEntropyLoss()

def train_model(model, train_loader, optimizer, criterion, epochs=10):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for batch, labels in train_loader:
            batch, labels = batch["input_ids"].to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(batch)

            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss/len(train_loader):.4f}")

train_model(model, train_loader, optimizer, criterion)
torch.save(model.state_dict(), "transformer_model.pth")


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}, torch.tensor(self.labels[idx])


Epoch 1/10, Loss: 0.6120
Epoch 2/10, Loss: 0.4898
Epoch 3/10, Loss: 0.4519
Epoch 4/10, Loss: 0.4219
Epoch 5/10, Loss: 0.3976
Epoch 6/10, Loss: 0.3781
Epoch 7/10, Loss: 0.3681
Epoch 8/10, Loss: 0.3553
Epoch 9/10, Loss: 0.3639
Epoch 10/10, Loss: 0.3291


In [45]:
import torch
from transformers import AutoTokenizer

# Load model & tokenizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = TransformerClassifier(vocab_size=30522).to(device)
model.load_state_dict(torch.load("transformer_model.pth", map_location=device))
model.eval()

tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

# Prediction function
def predict_sentiment(text):
    encoding = tokenizer(text, padding="max_length", truncation=True, max_length=256, return_tensors="pt")
    input_ids = encoding["input_ids"].to(device)

    with torch.no_grad():
        output = model(input_ids)
        prediction = torch.argmax(output, dim=1).item()

    return "Positive 😊" if prediction == 1 else "Negative 😞"

# Example usage
if __name__ == "__main__":
    text = "The movie was a complete bad "
    print(f"Sentiment: {predict_sentiment(text)}")


  model.load_state_dict(torch.load("transformer_model.pth", map_location=device))


Sentiment: Negative 😞
