# Sentiment Analysis

## Using build-from-scratch model and libraries

In [40]:
from bert import BertForSequenceClassification

In [41]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
from typing import Dict, List, Tuple, Optional
import random

## Configuration

In [42]:
class Config:
    # Model settings for our custom BERT
    hidden_size = 256  # Smaller for demonstration
    num_hidden_layers = 4
    num_attention_heads = 8
    intermediate_size = 1024
    max_length = 128
    num_labels = 2
    
    # Training settings
    batch_size = 16
    learning_rate = 2e-5
    num_epochs = 3
    warmup_steps = 100
    
    # Device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

config = Config()
print(f"Using device: {config.device}")

Using device: cpu


## Initialize customized BERT model

In [43]:
# Create config for custom BERT
bert_config = BertConfig(
    vocab_size=30522,  # This should match tokenizer's vocab_size
    hidden_size=config.hidden_size,
    num_hidden_layers=config.num_hidden_layers,
    num_attention_heads=config.num_attention_heads,
    intermediate_size=config.intermediate_size,
    max_position_embeddings=config.max_length
)

print("Initializing custom BERT model...")

Initializing custom BERT model...


In [44]:
class CustomBertForSentiment(nn.Module):
    """
    Wrapper to use custom BERT for sentiment analysis
    """
    def __init__(self, config: BertConfig, num_labels: int):
        super().__init__()
        
        self.bert_classifier = BertForSequenceClassification(config, num_labels)
        
        print(f"Custom BERT initialized with:")
        print(f"  Hidden size: {config.hidden_size}")
        print(f"  Layers: {config.num_hidden_layers}")
        print(f"  Attention heads: {config.num_attention_heads}")
        print(f"  Vocab size: {config.vocab_size}")
        print(f"  Max position: {config.max_position_embeddings}")
        
    def forward(self, input_ids, attention_mask=None, token_type_ids=None, labels=None):
        self.bert_classifier.forward()
        

## Dataset class

In [45]:
class SentimentDataset(Dataset):
    """Dataset compatible with our custom BERT implementation"""
    
    def __init__(self, texts: List[str], labels: List[int], tokenizer, max_length: int):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length
    
    def __len__(self):
        return len(self.texts)
    
    def __getitem__(self, idx):
        text = str(self.texts[idx])
        label = self.labels[idx]
        
        # Tokenize in a way compatible with our custom BERT
        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )
        
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'token_type_ids': encoding.get('token_type_ids', torch.zeros_like(encoding['input_ids'])).flatten(),
            'labels': torch.tensor(label, dtype=torch.long)
        }

## Training functions

In [46]:
# helper function
def get_cosine_schedule_with_warmup(optimizer, num_warmup_steps, num_training_steps):
    """Cosine learning rate schedule with warmup"""
    def lr_lambda(current_step):
        if current_step < num_warmup_steps:
            return float(current_step) / float(max(1, num_warmup_steps))
        progress = float(current_step - num_warmup_steps) / float(max(1, num_training_steps - num_warmup_steps))
        return max(0.0, 0.5 * (1.0 + np.cos(np.pi * progress)))
    

In [47]:
def train_custom_bert(model, train_loader, val_loader, config, num_epochs=3):
    """Training loop specifically for our custom BERT implementation"""
    
    # Optimizer setup
    optimizer = torch.optim.AdamW(model.parameters(), lr=config.learning_rate)
    
    # Learning rate scheduler
    total_steps = len(train_loader) * num_epochs
    scheduler = get_cosine_schedule_with_warmup(
        optimizer, 
        num_warmup_steps=config.warmup_steps,
        num_training_steps=total_steps
    )
    
    train_losses = []
    val_losses = []
    
    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch + 1}/{num_epochs}")
        
        # Training
        model.train()
        total_train_loss = 0
        
        for batch in tqdm(train_loader, desc="Training"):
            # Move to device
            input_ids = batch['input_ids'].to(config.device)
            attention_mask = batch['attention_mask'].to(config.device)
            token_type_ids = batch['token_type_ids'].to(config.device)
            labels = batch['labels'].to(config.device)
            
            # Forward pass using our custom BERT
            outputs = model(
                input_ids=input_ids,
                attention_mask=attention_mask,
                token_type_ids=token_type_ids,
                labels=labels
            )
            
            loss = outputs['loss']
            
            # Backward pass
            optimizer.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()
            scheduler.step()
            
            total_train_loss += loss.item()
        
        avg_train_loss = total_train_loss / len(train_loader)
        train_losses.append(avg_train_loss)
        
        # Validation
        model.eval()
        total_val_loss = 0
        predictions = []
        true_labels = []
        
        with torch.no_grad():
            for batch in tqdm(val_loader, desc="Validation"):
                input_ids = batch['input_ids'].to(config.device)
                attention_mask = batch['attention_mask'].to(config.device)
                token_type_ids = batch['token_type_ids'].to(config.device)
                labels = batch['labels'].to(config.device)
                
                outputs = model(
                    input_ids=input_ids,
                    attention_mask=attention_mask,
                    token_type_ids=token_type_ids,
                    labels=labels
                )
                
                loss = outputs['loss']
                logits = outputs['logits']
                
                total_val_loss += loss.item()
                
                preds = torch.argmax(logits, dim=-1)
                predictions.extend(preds.cpu().numpy())
                true_labels.extend(labels.cpu().numpy())
        
        avg_val_loss = total_val_loss / len(val_loader)
        val_losses.append(avg_val_loss)
        
        val_accuracy = accuracy_score(true_labels, predictions)
        
        print(f"Train Loss: {avg_train_loss:.4f}")
        print(f"Val Loss: {avg_val_loss:.4f}")
        print(f"Val Accuracy: {val_accuracy:.4f}")
    
    return train_losses, val_losses

## Pipeline

In [48]:
def load_pretrained_weights_to_custom_bert(custom_model, pretrained_model_name='bert-base-uncased'):
    """
    Shows how to load pre-trained weights into our custom BERT implementation
    This bridges the gap between our implementation and existing weights
    """
    
    print("\nLoading pre-trained weights into custom BERT...")
    
    # Load pre-trained model
    from transformers import BertModel
    pretrained = BertModel.from_pretrained(pretrained_model_name)
    
    # Map the weights
    custom_model.bert.embeddings.word_embeddings.weight = pretrained.embeddings.word_embeddings.weight
    custom_model.bert.embeddings.position_embeddings.weight = pretrained.embeddings.position_embeddings.weight
    custom_model.bert.embeddings.token_type_embeddings.weight = pretrained.embeddings.token_type_embeddings.weight
    custom_model.bert.embeddings.LayerNorm.weight = pretrained.embeddings.LayerNorm.weight
    custom_model.bert.embeddings.LayerNorm.bias = pretrained.embeddings.LayerNorm.bias
    
    for i in range(config.num_hidden_layers):
        custom_model.bert.encoder.layer[i].attention.self.query.weight = pretrained.encoder.layer[i].attention.self.query.weight
        custom_model.bert.encoder.layer[i].attention.self.key.weight = pretrained.encoder.layer[i].attention.self.key.weight
        custom_model.bert.encoder.layer[i].attention.self.value.weight = pretrained.encoder.layer[i].attention.self.value.weight
        custom_model.bert.encoder.layer[i].attention.output.dense.weight = pretrained.encoder.layer[i].attention.output.dense.weight
        custom_model.bert.encoder.layer[i].attention.output.LayerNorm.weight = pretrained.encoder.layer[i].attention.output.LayerNorm.weight
        custom_model.bert.encoder.layer[i].attention.output.LayerNorm.bias = pretrained.encoder.layer[i].attention.output.LayerNorm.bias
        custom_model.bert.encoder.layer[i].intermediate.dense.weight = pretrained.encoder.layer[i].intermediate.dense.weight
        custom_model.bert.encoder.layer[i].output.dense.weight = pretrained.encoder.layer[i].output.dense.weight
        custom_model.bert.encoder.layer[i].output.LayerNorm.weight = pretrained.encoder.layer[i].output.LayerNorm.weight
        custom_model.bert.encoder.layer[i].output.LayerNorm.bias = pretrained.encoder.layer[i].output.LayerNorm.bias
    
    custom_model.bert.pooler.dense.weight = pretrained.pooler.dense.weight
    custom_model.bert.pooler.dense.bias = pretrained.pooler.dense.bias
    
    print("Weights loaded successfully!")

    return custom_model

In [49]:
def demonstrate_custom_bert_usage():
    """
    This function demonstrates how to use the custom BERT implementation
    from bert.ipynb for a real task
    """
    
    print("\n" + "="*60)
    print("USING CUSTOM BERT IMPLEMENTATION FOR SENTIMENT ANALYSIS")
    print("="*60)
    
    # 1. Load the custom BERT model
    print("\n1. Loading custom BERT model from bert.ipynb...")
    
    # You would actually do:
    # from bert import BertForSequenceClassification, BertConfig
    # model = BertForSequenceClassification(bert_config, num_labels=2)
    
    # 2. Prepare data
    print("\n2. Preparing sentiment data...")
    
    # Sample data
    texts = [
        "This movie was fantastic! Best I've seen all year.",
        "Terrible film. Complete waste of time.",
        "Not bad, had some good moments.",
        "Absolutely loved every minute of it!",
        "Boring and predictable. Would not recommend."
    ] * 20  # Replicate for larger dataset
    
    labels = [1, 0, 1, 1, 0] * 20  # 1: positive, 0: negative
    
    # 3. Create custom tokenizer or use BERT tokenizer
    print("\n3. Setting up tokenizer...")
    from transformers import BertTokenizer
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    
    # 4. Create datasets
    print("\n4. Creating datasets...")
    X_train, X_val, y_train, y_val = train_test_split(texts, labels, test_size=0.2, random_state=42)
    
    train_dataset = SentimentDataset(X_train, y_train, tokenizer, config.max_length)
    val_dataset = SentimentDataset(X_val, y_val, tokenizer, config.max_length)
    
    train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=config.batch_size, shuffle=False)
    
    # 5. Initialize model
    print("\n5. Initializing custom BERT for sentiment classification...")
    model = CustomBertForSentiment(bert_config, num_labels=2)
    
    # 6. Training would happen here
    print("\n6. Training process (demonstration only)...")
    print("   In practice, you would:")
    print("   - Load your BertForSequenceClassification from this file")
    print("   - Train it using the train_custom_bert function")
    print("   - Monitor loss and accuracy")
    # load BertForSequenceClassification 
    model = BertForSequenceClassification(bert_config, num_labels=2)
    # load pretrained weights
    model = load_pretrained_weights_to_custom_bert(model, bert_config)
    
    # train the model
    train_custom_bert(model, train_loader, val_loader, config)
    
    # 7. Inference example
    print("\n7. Inference with custom BERT:")
    print("   Once trained, you can use it like:")
    print("   ```python")
    print("   model.eval()")
    print("   with torch.no_grad():")
    print("       outputs = model(input_ids, attention_mask, token_type_ids)")
    print("       predictions = torch.argmax(outputs['logits'], dim=-1)")
    print("   ```")
    
    model.eval()
    with torch.no_grad():
        outputs = model(input_ids, attention_mask, token_type_ids)
        predictions = torch.argmax(outputs['logits'], dim=-1)
    print(predictions)
    

    


In [50]:
if __name__ == "__main__":
    demonstrate_custom_bert_usage()
    
    print("\n" + "="*60)
    print("KEY INTEGRATION POINTS:")
    print("="*60)
    print("\n1. Import your custom BERT classes from bert.ipynb")
    print("2. Initialize BertConfig with your desired settings")
    print("3. Create BertForSequenceClassification with num_labels=2")
    print("4. Use the same training loop as shown above")
    print("5. The model architecture is YOUR implementation, not Hugging Face's")
    
    print("\nTo fully integrate:")
    print("- Save bert.ipynb as bert.py")
    print("- Import: from bert import BertForSequenceClassification, BertConfig")
    print("- Use exactly as shown in this demonstration")


USING CUSTOM BERT IMPLEMENTATION FOR SENTIMENT ANALYSIS

1. Loading custom BERT model from bert.ipynb...

2. Preparing sentiment data...

3. Setting up tokenizer...

4. Creating datasets...

5. Initializing custom BERT for sentiment classification...
Custom BERT initialized with:
  Hidden size: 256
  Layers: 4
  Attention heads: 8
  Vocab size: 30522
  Max position: 128

6. Training process (demonstration only)...
   In practice, you would:
   - Load your BertForSequenceClassification from this file
   - Train it using the train_custom_bert function
   - Monitor loss and accuracy

Loading pre-trained weights into custom BERT...


HFValidationError: Repo id must use alphanumeric chars or '-', '_', '.', '--' and '..' are forbidden, '-' and '.' cannot start or end the name, max length is 96: 'BertConfig(vocab_size=30522, hidden_size=256, num_hidden_layers=4, num_attention_heads=8, intermediate_size=1024, hidden_dropout_prob=0.1, attention_probs_dropout_prob=0.1, max_position_embeddings=128, type_vocab_size=2, initializer_range=0.02, layer_norm_eps=1e-12)'.