# Model Architectures

## LSTM (Long Short-Term Memory) Networks Basics

In [None]:
import torch
import torch.nn as nn
import numpy as np

# Define the LSTM model
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size # number of features in the hidden state
        self.num_layers = num_layers # number of LSTM layers
        
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True) # batch_first=True means that the first dimension of the input and output tensors is the batch dimension
        self.fc = nn.Linear(hidden_size, output_size) # fully connected layer
        
    def forward(self, x):
        # Initialize hidden and cell states
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
        
        # Forward propagate LSTM
        out, _ = self.lstm(x, (h0, c0))
        
        # Decode the hidden state of the last time step
        out = self.fc(out[:, -1, :])
        return out

# Example usage for sequence prediction
input_size = 10
hidden_size = 20
num_layers = 2
output_size = 1
seq_length = 30
batch_size = 32

# Initialize the model
model = LSTMModel(input_size, hidden_size, num_layers, output_size)

# Generate a sample input tensor
sample_input = torch.randn(batch_size, seq_length, input_size)

# Compute the output
output = model(sample_input)
print(f"LSTM output shape: {output.shape}")

LSTM output shape: torch.Size([32, 1])


## NLP Sentiment Analysis Example with LSTM

In [None]:
import torch.nn.functional as F
from collections import Counter
import re

# Define the LSTM model
class SentimentLSTM(nn.Module):
    # Set the parameters for the LSTM model
    def __init__(self, vocab_size, embedding_dim, hidden_size, num_layers, output_size, dropout=0.3):
        super(SentimentLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        # Embedding layer to convert word indices to dense vectors
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        
        # LSTM layer
        self.lstm = nn.LSTM(embedding_dim, hidden_size, num_layers, 
                           batch_first=True, dropout=dropout if num_layers > 1 else 0)
        
        # Dropout for regularization
        self.dropout = nn.Dropout(dropout)
        
        # Output layer for classification
        self.fc = nn.Linear(hidden_size, output_size)
        
    def forward(self, x):
        # Convert word indices to embeddings
        embedded = self.embedding(x)
        
        # LSTM forward pass
        lstm_out, (hidden, cell) = self.lstm(embedded)
        
        # Use the last hidden state for classification
        last_hidden = lstm_out[:, -1, :]
        
        # Apply dropout
        output = self.dropout(last_hidden)
        
        # Final classification layer
        output = self.fc(output)
        
        return output


In [4]:
# Text preprocessing utilities
class TextPreprocessor:
    def __init__(self):
        self.word_to_idx = {}
        self.idx_to_word = {}
        self.vocab_size = 0
        
    def build_vocab(self, texts, min_freq=2):
        """Build vocabulary from training texts"""
        # Tokenize and count words
        word_counts = Counter()
        for text in texts:
            tokens = self.tokenize(text)
            word_counts.update(tokens)
        
        # Build vocabulary (words that appear at least min_freq times)
        self.word_to_idx = {'<PAD>': 0, '<UNK>': 1}
        self.idx_to_word = {0: '<PAD>', 1: '<UNK>'}
        idx = 2
        
        for word, count in word_counts.items():
            if count >= min_freq:
                self.word_to_idx[word] = idx
                self.idx_to_word[idx] = word
                idx += 1
        
        self.vocab_size = len(self.word_to_idx)
        
    def tokenize(self, text):
        """Simple tokenization (can be enhanced with NLTK/spaCy)"""
        text = re.sub(r'[^a-zA-Z\s]', '', text.lower())
        return text.split()
    
    def text_to_sequence(self, text, max_length=100):
        """Convert text to sequence of word indices"""
        tokens = self.tokenize(text)
        # Convert words to indices
        sequence = [self.word_to_idx.get(word, self.word_to_idx['<UNK>']) for word in tokens]
        
        # Pad or truncate to max_length
        if len(sequence) > max_length:
            sequence = sequence[:max_length]
        else:
            sequence.extend([self.word_to_idx['<PAD>']] * (max_length - len(sequence)))
            
        return torch.tensor(sequence, dtype=torch.long)


In [5]:
# Sample dataset for sentiment analysis
sample_texts = [
    "I love this movie, it's absolutely amazing!",
    "This film is terrible, worst I've ever seen.",
    "Great acting and wonderful storyline.",
    "Boring and predictable plot.",
    "Fantastic cinematography and excellent performances!",
    "Waste of time, very disappointing.",
    "I really enjoyed watching this.",
    "Not worth the money, very bad quality.",
    "Outstanding movie with brilliant direction.",
    "Poor script and bad acting.",
    "Incredible story that kept me engaged throughout.",
    "Completely overrated and boring.",
    "Beautiful visuals and amazing soundtrack.",
    "Terrible dialogue and poor character development.",
    "One of the best films I've ever watched!",
    "Disappointed with the ending, very confusing.",
    "Excellent cast and brilliant performances.",
    "Wasted two hours of my life on this garbage.",
    "Masterpiece of modern cinema!",
    "Absolutely horrible, avoid at all costs."
]

# Labels: 1 for positive, 0 for negative
sample_labels = [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0]

print(f"Dataset size: {len(sample_texts)} samples")
print(f"Positive samples: {sum(sample_labels)}")
print(f"Negative samples: {len(sample_labels) - sum(sample_labels)}")

# Initialize preprocessor and build vocabulary
preprocessor = TextPreprocessor()
preprocessor.build_vocab(sample_texts, min_freq=1)  # Lower threshold for small dataset

print(f"Vocabulary size: {preprocessor.vocab_size}")
print("Sample vocabulary:", list(preprocessor.word_to_idx.keys())[:10])

Dataset size: 20 samples
Positive samples: 10
Negative samples: 10
Vocabulary size: 86
Sample vocabulary: ['<PAD>', '<UNK>', 'i', 'love', 'this', 'movie', 'its', 'absolutely', 'amazing', 'film']


In [6]:
# Convert texts to sequences and prepare data
max_length = 20
sequences = torch.stack([preprocessor.text_to_sequence(text, max_length) for text in sample_texts])
labels = torch.tensor(sample_labels, dtype=torch.long)

print(f"Sequences shape: {sequences.shape}")
print(f"Labels shape: {labels.shape}")
print(f"Sample sequence: {sequences[0]}")
print(f"Sample text: '{sample_texts[0]}'")

# Model parameters for sentiment analysis
vocab_size = preprocessor.vocab_size
embedding_dim = 100
hidden_size = 128
num_layers = 2
output_size = 2  # binary classification (positive/negative)

# Initialize sentiment model
sentiment_model = SentimentLSTM(vocab_size, embedding_dim, hidden_size, num_layers, output_size)

print(f"\nModel Architecture:")
print(f"- Vocabulary size: {vocab_size}")
print(f"- Embedding dimension: {embedding_dim}")
print(f"- Hidden size: {hidden_size}")
print(f"- Number of layers: {num_layers}")
print(f"- Output classes: {output_size}")
print(f"- Total parameters: {sum(p.numel() for p in sentiment_model.parameters()):,}")

Sequences shape: torch.Size([20, 20])
Labels shape: torch.Size([20])
Sample sequence: tensor([2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
Sample text: 'I love this movie, it's absolutely amazing!'

Model Architecture:
- Vocabulary size: 86
- Embedding dimension: 100
- Hidden size: 128
- Number of layers: 2
- Output classes: 2
- Total parameters: 258,714


In [10]:
# Save the sentiment model (state dict is preferred for PyTorch models)
torch.save(sentiment_model.state_dict(), "sentiment_model.pth")

In [7]:
# Training setup
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(sentiment_model.parameters(), lr=0.001)

def train_model(model, sequences, labels, epochs=200, print_every=50):
    """Train the sentiment analysis model"""
    model.train()
    training_losses = []
    
    for epoch in range(epochs):
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(sequences)
        loss = criterion(outputs, labels)
        
        # Backward pass
        loss.backward()
        optimizer.step()
        
        training_losses.append(loss.item())
        
        if (epoch + 1) % print_every == 0:
            # Calculate accuracy
            with torch.no_grad():
                predictions = torch.argmax(outputs, dim=1)
                accuracy = (predictions == labels).float().mean().item()
                print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}, Accuracy: {accuracy:.4f}')
    
    return model, training_losses

# Train the model
print("Training the sentiment analysis model...")
print("=" * 50)
trained_model, losses = train_model(sentiment_model, sequences, labels)
print("=" * 50)
print("Training completed!")


Training the sentiment analysis model...
Epoch [50/200], Loss: 0.6615, Accuracy: 0.6000
Epoch [100/200], Loss: 0.0025, Accuracy: 1.0000
Epoch [150/200], Loss: 0.0003, Accuracy: 1.0000
Epoch [200/200], Loss: 0.0002, Accuracy: 1.0000
Training completed!


In [8]:
# Evaluation and prediction functions
def predict_sentiment(model, preprocessor, text, max_length=20):
    """Predict sentiment for a single text"""
    model.eval()
    with torch.no_grad():
        sequence = preprocessor.text_to_sequence(text, max_length).unsqueeze(0)  # Add batch dimension
        output = model(sequence)
        probabilities = F.softmax(output, dim=1)
        predicted_class = torch.argmax(probabilities, dim=1).item()
        confidence = probabilities[0][predicted_class].item()
        
        sentiment = "Positive" if predicted_class == 1 else "Negative"
        return sentiment, confidence, probabilities[0].tolist()

def evaluate_model(model, sequences, labels):
    """Evaluate model performance on given data"""
    model.eval()
    with torch.no_grad():
        outputs = model(sequences)
        predictions = torch.argmax(outputs, dim=1)
        accuracy = (predictions == labels).float().mean().item()
        
        # Calculate per-class metrics
        positive_correct = ((predictions == 1) & (labels == 1)).sum().item()
        positive_total = (labels == 1).sum().item()
        negative_correct = ((predictions == 0) & (labels == 0)).sum().item()
        negative_total = (labels == 0).sum().item()
        
        positive_accuracy = positive_correct / positive_total if positive_total > 0 else 0
        negative_accuracy = negative_correct / negative_total if negative_total > 0 else 0
        
        return {
            'overall_accuracy': accuracy,
            'positive_accuracy': positive_accuracy,
            'negative_accuracy': negative_accuracy,
            'predictions': predictions.tolist(),
            'probabilities': F.softmax(outputs, dim=1).tolist()
        }

# Evaluate on training data
eval_results = evaluate_model(trained_model, sequences, labels)
print(f"Training Set Performance:")
print(f"- Overall Accuracy: {eval_results['overall_accuracy']:.4f}")
print(f"- Positive Class Accuracy: {eval_results['positive_accuracy']:.4f}")
print(f"- Negative Class Accuracy: {eval_results['negative_accuracy']:.4f}")


Training Set Performance:
- Overall Accuracy: 1.0000
- Positive Class Accuracy: 1.0000
- Negative Class Accuracy: 1.0000


In [11]:
# Test the model with new examples
test_texts = [
    "This movie is absolutely fantastic and incredible!",
    "I hate this boring and terrible film.",
    "Not bad, could be better though.",
    "Amazing story with great characters and wonderful acting!",
    "The worst movie I have ever seen in my entire life.",
    "Pretty good, I enjoyed it overall.",
    "Mediocre film with some decent moments.",
    "Brilliant masterpiece that everyone should watch!",
    "Complete garbage, total waste of time.",
    "An okay movie, nothing special but watchable.",
    "I'm not sure about this one, it's just okay.",
    "This movie is a masterpiece, I'm blown away!",
    "Terrible, I'm so disappointed.",
    "I'm not sure what to think about this one.",
]

print("=" * 70)
print("SENTIMENT ANALYSIS PREDICTIONS")
print("=" * 70)

for i, text in enumerate(test_texts, 1):
    sentiment, confidence, probs = predict_sentiment(trained_model, preprocessor, text)
    neg_prob, pos_prob = probs
    
    print(f"{i:2d}. Text: '{text}'")
    print(f"    Prediction: {sentiment} (Confidence: {confidence:.3f})")
    print(f"    Probabilities: Negative={neg_prob:.3f}, Positive={pos_prob:.3f}")
    print("-" * 70)


SENTIMENT ANALYSIS PREDICTIONS
 1. Text: 'This movie is absolutely fantastic and incredible!'
    Prediction: Positive (Confidence: 1.000)
    Probabilities: Negative=0.000, Positive=1.000
----------------------------------------------------------------------
 2. Text: 'I hate this boring and terrible film.'
    Prediction: Positive (Confidence: 1.000)
    Probabilities: Negative=0.000, Positive=1.000
----------------------------------------------------------------------
 3. Text: 'Not bad, could be better though.'
    Prediction: Negative (Confidence: 1.000)
    Probabilities: Negative=1.000, Positive=0.000
----------------------------------------------------------------------
 4. Text: 'Amazing story with great characters and wonderful acting!'
    Prediction: Positive (Confidence: 1.000)
    Probabilities: Negative=0.000, Positive=1.000
----------------------------------------------------------------------
 5. Text: 'The worst movie I have ever seen in my entire life.'
    Predictio

### Key Improvements for Real-World Applications

1. **Larger Dataset**: Use thousands of samples instead of 20
2. **Pre-trained Embeddings**: Use GloVe, Word2Vec, or FastText embeddings
3. **Advanced Preprocessing**: Use NLTK/spaCy for tokenization, stemming, lemmatization
4. **Train/Validation Split**: Proper data splitting for model evaluation
5. **Bidirectional LSTM**: Process sequences in both directions
6. **Attention Mechanism**: Focus on important words
7. **Regularization**: Add more dropout, weight decay, early stopping
8. **Hyperparameter Tuning**: Grid search for optimal parameters

### Applications of LSTM in NLP:
- **Sentiment Analysis**: Customer reviews, social media posts
- **Language Modeling**: Next word prediction, text generation
- **Machine Translation**: Sequence-to-sequence models
- **Named Entity Recognition**: Identifying entities in text
- **Text Summarization**: Extracting key information
- **Question Answering**: Understanding and responding to queries
- **Speech Recognition**: Converting audio to text
