# Long Short-Term Memory (LSTM) Network for Text Classification

This notebook implements LSTM models for text classification using the NusaX-Sentiment dataset (Bahasa Indonesia). We'll experiment with different hyperparameters and compare with a from-scratch implementation.

## 1. Setup and Data Loading

In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import f1_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
import pickle
import json
import os
from collections import Counter
import warnings
warnings.filterwarnings('ignore')

# Set random seeds for reproducibility
tf.random.set_seed(42)
np.random.seed(42)

print(f"TensorFlow version: {tf.__version__}")
print(f"GPU Available: {tf.config.list_physical_devices('GPU')}")

In [None]:
# Download and load NusaX-Sentiment dataset
def load_nusax_sentiment_data():
    """
    Load NusaX-Sentiment dataset for Indonesian language
    """
    try:
        # Try to load from Hugging Face datasets
        from datasets import load_dataset
        
        # Load Indonesian sentiment dataset
        dataset = load_dataset("indonlp/nusax_senti", "indonesian")
        
        # Extract train, validation, and test sets
        train_data = dataset['train']
        val_data = dataset['validation'] 
        test_data = dataset['test']
        
        # Convert to pandas DataFrames
        train_df = pd.DataFrame({
            'text': train_data['text'],
            'label': train_data['label']
        })
        
        val_df = pd.DataFrame({
            'text': val_data['text'],
            'label': val_data['label']
        })
        
        test_df = pd.DataFrame({
            'text': test_data['text'],
            'label': test_data['label']
        })
        
        return train_df, val_df, test_df
        
    except ImportError:
        print("datasets library not found. Installing...")
        !pip install datasets
        from datasets import load_dataset
        return load_nusax_sentiment_data()
    
    except Exception as e:
        print(f"Error loading dataset: {e}")
        print("Creating synthetic dataset for demonstration...")
        return create_synthetic_sentiment_data()

def create_synthetic_sentiment_data():
    """
    Create synthetic sentiment data if real dataset is not available
    """
    # Synthetic Indonesian text samples
    positive_texts = [
        "Saya sangat senang dengan produk ini",
        "Film ini benar-benar luar biasa dan menghibur",
        "Pelayanan yang sangat memuaskan dan ramah",
        "Makanan di restoran ini enak sekali",
        "Pengalaman yang menyenangkan dan tak terlupakan"
    ] * 100
    
    negative_texts = [
        "Saya kecewa dengan kualitas produk ini",
        "Film ini membosankan dan tidak menarik",
        "Pelayanan yang buruk dan tidak profesional",
        "Makanan di sini tidak enak dan mahal",
        "Pengalaman yang mengecewakan dan merugikan"
    ] * 100
    
    neutral_texts = [
        "Produk ini biasa saja tidak istimewa",
        "Film ini cukup bagus untuk ditonton",
        "Pelayanan standar seperti tempat lain",
        "Makanan di sini rasanya biasa saja",
        "Pengalaman yang cukup normal dan wajar"
    ] * 100
    
    # Create dataset
    texts = positive_texts + negative_texts + neutral_texts
    labels = [2] * len(positive_texts) + [0] * len(negative_texts) + [1] * len(neutral_texts)
    
    # Create DataFrame
    df = pd.DataFrame({'text': texts, 'label': labels})
    
    # Split into train, val, test
    train_df, temp_df = train_test_split(df, test_size=0.4, random_state=42, stratify=df['label'])
    val_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42, stratify=temp_df['label'])
    
    return train_df, val_df, test_df

# Load the dataset
train_df, val_df, test_df = load_nusax_sentiment_data()

print(f"Train set size: {len(train_df)}")
print(f"Validation set size: {len(val_df)}")
print(f"Test set size: {len(test_df)}")
print(f"\nLabel distribution in training set:")
print(train_df['label'].value_counts().sort_index())

## 2. Text Preprocessing and Tokenization

In [None]:
# Text preprocessing and tokenization
def preprocess_text(texts, max_features=10000, max_length=100):
    """
    Preprocess text data using TextVectorization
    
    Parameters:
    - texts: list of text strings
    - max_features: maximum vocabulary size
    - max_length: maximum sequence length
    
    Returns:
    - vectorizer: fitted TextVectorization layer
    - sequences: tokenized sequences
    """
    # Create TextVectorization layer
    vectorizer = tf.keras.layers.TextVectorization(
        max_tokens=max_features,
        output_sequence_length=max_length,
        output_mode='int'
    )
    
    # Adapt vectorizer to training texts
    vectorizer.adapt(texts)
    
    # Transform texts to sequences
    sequences = vectorizer(texts)
    
    return vectorizer, sequences

# Preprocess the data
MAX_FEATURES = 10000
MAX_LENGTH = 100

# Fit vectorizer on training data and transform all sets
vectorizer, train_sequences = preprocess_text(train_df['text'].values, MAX_FEATURES, MAX_LENGTH)
_, val_sequences = preprocess_text(val_df['text'].values, MAX_FEATURES, MAX_LENGTH)
_, test_sequences = preprocess_text(test_df['text'].values, MAX_FEATURES, MAX_LENGTH)

# Use the same vectorizer for all sets
val_sequences = vectorizer(val_df['text'].values)
test_sequences = vectorizer(test_df['text'].values)

# Convert to numpy arrays
train_sequences = train_sequences.numpy()
val_sequences = val_sequences.numpy()
test_sequences = test_sequences.numpy()

train_labels = train_df['label'].values
val_labels = val_df['label'].values
test_labels = test_df['label'].values

# Get vocabulary info
vocab_size = len(vectorizer.get_vocabulary())
num_classes = len(np.unique(train_labels))

print(f"Vocabulary size: {vocab_size}")
print(f"Number of classes: {num_classes}")
print(f"Sequence length: {MAX_LENGTH}")
print(f"Training sequences shape: {train_sequences.shape}")
print(f"Training labels shape: {train_labels.shape}")

## 3. Model Building Functions

In [None]:
def build_lstm_model(lstm_layers=1, units_per_layer=[64], bidirectional=False, 
                     embedding_dim=128, dropout_rate=0.3, vocab_size=10000, 
                     num_classes=3, sequence_length=100):
    """
    Build LSTM model with specified configuration
    
    Parameters:
    - lstm_layers: number of LSTM layers
    - units_per_layer: list of units for each LSTM layer
    - bidirectional: whether to use bidirectional LSTM
    - embedding_dim: embedding dimension
    - dropout_rate: dropout rate
    - vocab_size: vocabulary size
    - num_classes: number of output classes
    - sequence_length: input sequence length
    
    Returns:
    - model: compiled Keras model
    """
    model = tf.keras.Sequential()
    
    # Embedding layer
    model.add(tf.keras.layers.Embedding(
        input_dim=vocab_size,
        output_dim=embedding_dim,
        input_length=sequence_length,
        name='embedding'
    ))
    
    # LSTM layers
    for i in range(lstm_layers):
        # Return sequences for all layers except the last one
        return_sequences = i < lstm_layers - 1
        
        if bidirectional:
            model.add(tf.keras.layers.Bidirectional(
                tf.keras.layers.LSTM(
                    units_per_layer[i], 
                    return_sequences=return_sequences,
                    name=f'lstm_{i+1}'
                ),
                name=f'bidirectional_{i+1}'
            ))
        else:
            model.add(tf.keras.layers.LSTM(
                units_per_layer[i], 
                return_sequences=return_sequences,
                name=f'lstm_{i+1}'
            ))
        
        # Add dropout after each LSTM layer
        model.add(tf.keras.layers.Dropout(dropout_rate, name=f'dropout_{i+1}'))
    
    # Dense output layer
    model.add(tf.keras.layers.Dense(num_classes, activation='softmax', name='dense_output'))
    
    # Compile model
    model.compile(
        optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    
    return model

def train_and_evaluate_model(model, train_sequences, train_labels, val_sequences, val_labels,
                           test_sequences, test_labels, epochs=10, batch_size=32, verbose=1):
    """
    Train and evaluate a model
    
    Returns:
    - history: training history
    - test_f1: macro F1 score on test set
    - predictions: test predictions
    """
    # Train model
    history = model.fit(
        train_sequences, train_labels,
        validation_data=(val_sequences, val_labels),
        epochs=epochs,
        batch_size=batch_size,
        verbose=verbose
    )
    
    # Make predictions on test set
    test_predictions = model.predict(test_sequences, verbose=0)
    test_pred_labels = np.argmax(test_predictions, axis=1)
    
    # Calculate macro F1 score
    test_f1 = f1_score(test_labels, test_pred_labels, average='macro')
    
    return history, test_f1, test_pred_labels

## 4. Experiment 1: Effect of Number of LSTM Layers

In [None]:
# Experiment 1: Effect of number of LSTM layers
print("=" * 60)
print("EXPERIMENT 1: EFFECT OF NUMBER OF LSTM LAYERS")
print("=" * 60)

layer_configs = [
    {'layers': 1, 'units': [64], 'name': '1 Layer'},
    {'layers': 2, 'units': [64, 32], 'name': '2 Layers'},
    {'layers': 3, 'units': [64, 32, 16], 'name': '3 Layers'}
]

experiment1_results = {}

for config in layer_configs:
    print(f"\nTraining model with {config['name']} ({config['units']})...")
    
    # Build model
    model = build_lstm_model(
        lstm_layers=config['layers'],
        units_per_layer=config['units'],
        bidirectional=False,
        vocab_size=vocab_size,
        num_classes=num_classes,
        sequence_length=MAX_LENGTH
    )
    
    print(f"Model parameters: {model.count_params():,}")
    
    # Train and evaluate
    history, test_f1, predictions = train_and_evaluate_model(
        model, train_sequences, train_labels, val_sequences, val_labels,
        test_sequences, test_labels, epochs=10, verbose=0
    )
    
    # Store results
    experiment1_results[config['name']] = {
        'history': history,
        'test_f1': test_f1,
        'predictions': predictions,
        'model': model,
        'config': config
    }
    
    print(f"Test Macro F1 Score: {test_f1:.4f}")

# Display results summary
print("\n" + "=" * 50)
print("EXPERIMENT 1 RESULTS SUMMARY")
print("=" * 50)
for name, result in experiment1_results.items():
    print(f"{name}: Macro F1 = {result['test_f1']:.4f}")

In [None]:
# Plot training curves for Experiment 1
fig, axes = plt.subplots(2, 2, figsize=(15, 10))
fig.suptitle('Experiment 1: Effect of Number of LSTM Layers', fontsize=16)

# Training Loss
axes[0, 0].set_title('Training Loss')
for name, result in experiment1_results.items():
    axes[0, 0].plot(result['history'].history['loss'], label=name)
axes[0, 0].set_xlabel('Epoch')
axes[0, 0].set_ylabel('Loss')
axes[0, 0].legend()
axes[0, 0].grid(True)

# Validation Loss
axes[0, 1].set_title('Validation Loss')
for name, result in experiment1_results.items():
    axes[0, 1].plot(result['history'].history['val_loss'], label=name)
axes[0, 1].set_xlabel('Epoch')
axes[0, 1].set_ylabel('Loss')
axes[0, 1].legend()
axes[0, 1].grid(True)

# Training Accuracy
axes[1, 0].set_title('Training Accuracy')
for name, result in experiment1_results.items():
    axes[1, 0].plot(result['history'].history['accuracy'], label=name)
axes[1, 0].set_xlabel('Epoch')
axes[1, 0].set_ylabel('Accuracy')
axes[1, 0].legend()
axes[1, 0].grid(True)

# Macro F1 Scores
names = list(experiment1_results.keys())
f1_scores = [experiment1_results[name]['test_f1'] for name in names]
axes[1, 1].bar(names, f1_scores)
axes[1, 1].set_title('Test Macro F1 Scores')
axes[1, 1].set_ylabel('F1 Score')
axes[1, 1].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

## 5. Experiment 2: Effect of LSTM Units per Layer

In [None]:
# Experiment 2: Effect of LSTM units per layer
print("=" * 60)
print("EXPERIMENT 2: EFFECT OF LSTM UNITS PER LAYER")
print("=" * 60)

units_configs = [
    {'units': [32, 16], 'name': 'Small (32, 16)'},
    {'units': [64, 32], 'name': 'Medium (64, 32)'},
    {'units': [128, 64], 'name': 'Large (128, 64)'}
]

experiment2_results = {}

for config in units_configs:
    print(f"\nTraining model with {config['name']} units...")
    
    # Build model
    model = build_lstm_model(
        lstm_layers=2,
        units_per_layer=config['units'],
        bidirectional=False,
        vocab_size=vocab_size,
        num_classes=num_classes,
        sequence_length=MAX_LENGTH
    )
    
    print(f"Model parameters: {model.count_params():,}")
    
    # Train and evaluate
    history, test_f1, predictions = train_and_evaluate_model(
        model, train_sequences, train_labels, val_sequences, val_labels,
        test_sequences, test_labels, epochs=10, verbose=0
    )
    
    # Store results
    experiment2_results[config['name']] = {
        'history': history,
        'test_f1': test_f1,
        'predictions': predictions,
        'model': model,
        'config': config
    }
    
    print(f"Test Macro F1 Score: {test_f1:.4f}")

# Display results summary
print("\n" + "=" * 50)
print("EXPERIMENT 2 RESULTS SUMMARY")
print("=" * 50)
for name, result in experiment2_results.items():
    print(f"{name}: Macro F1 = {result['test_f1']:.4f}")

In [None]:
# Plot training curves for Experiment 2
fig, axes = plt.subplots(2, 2, figsize=(15, 10))
fig.suptitle('Experiment 2: Effect of LSTM Units per Layer', fontsize=16)

# Training Loss
axes[0, 0].set_title('Training Loss')
for name, result in experiment2_results.items():
    axes[0, 0].plot(result['history'].history['loss'], label=name)
axes[0, 0].set_xlabel('Epoch')
axes[0, 0].set_ylabel('Loss')
axes[0, 0].legend()
axes[0, 0].grid(True)

# Validation Loss
axes[0, 1].set_title('Validation Loss')
for name, result in experiment2_results.items():
    axes[0, 1].plot(result['history'].history['val_loss'], label=name)
axes[0, 1].set_xlabel('Epoch')
axes[0, 1].set_ylabel('Loss')
axes[0, 1].legend()
axes[0, 1].grid(True)

# Training Accuracy
axes[1, 0].set_title('Training Accuracy')
for name, result in experiment2_results.items():
    axes[1, 0].plot(result['history'].history['accuracy'], label=name)
axes[1, 0].set_xlabel('Epoch')
axes[1, 0].set_ylabel('Accuracy')
axes[1, 0].legend()
axes[1, 0].grid(True)

# Macro F1 Scores
names = list(experiment2_results.keys())
f1_scores = [experiment2_results[name]['test_f1'] for name in names]
axes[1, 1].bar(names, f1_scores)
axes[1, 1].set_title('Test Macro F1 Scores')
axes[1, 1].set_ylabel('F1 Score')
axes[1, 1].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

## 6. Experiment 3: Effect of LSTM Direction (Bidirectional vs Unidirectional)

In [None]:
# Experiment 3: Effect of LSTM direction
print("=" * 60)
print("EXPERIMENT 3: EFFECT OF LSTM DIRECTION")
print("=" * 60)

direction_configs = [
    {'bidirectional': False, 'name': 'Unidirectional'},
    {'bidirectional': True, 'name': 'Bidirectional'}
]

experiment3_results = {}

for config in direction_configs:
    print(f"\nTraining {config['name']} LSTM model...")
    
    # Build model
    model = build_lstm_model(
        lstm_layers=2,
        units_per_layer=[64, 32],
        bidirectional=config['bidirectional'],
        vocab_size=vocab_size,
        num_classes=num_classes,
        sequence_length=MAX_LENGTH
    )
    
    print(f"Model parameters: {model.count_params():,}")
    
    # Train and evaluate
    history, test_f1, predictions = train_and_evaluate_model(
        model, train_sequences, train_labels, val_sequences, val_labels,
        test_sequences, test_labels, epochs=10, verbose=0
    )
    
    # Store results
    experiment3_results[config['name']] = {
        'history': history,
        'test_f1': test_f1,
        'predictions': predictions,
        'model': model,
        'config': config
    }
    
    print(f"Test Macro F1 Score: {test_f1:.4f}")

# Display results summary
print("\n" + "=" * 50)
print("EXPERIMENT 3 RESULTS SUMMARY")
print("=" * 50)
for name, result in experiment3_results.items():
    print(f"{name}: Macro F1 = {result['test_f1']:.4f}")

In [None]:
# Plot training curves for Experiment 3
fig, axes = plt.subplots(2, 2, figsize=(15, 10))
fig.suptitle('Experiment 3: Effect of LSTM Direction', fontsize=16)

# Training Loss
axes[0, 0].set_title('Training Loss')
for name, result in experiment3_results.items():
    axes[0, 0].plot(result['history'].history['loss'], label=name)
axes[0, 0].set_xlabel('Epoch')
axes[0, 0].set_ylabel('Loss')
axes[0, 0].legend()
axes[0, 0].grid(True)

# Validation Loss
axes[0, 1].set_title('Validation Loss')
for name, result in experiment3_results.items():
    axes[0, 1].plot(result['history'].history['val_loss'], label=name)
axes[0, 1].set_xlabel('Epoch')
axes[0, 1].set_ylabel('Loss')
axes[0, 1].legend()
axes[0, 1].grid(True)

# Training Accuracy
axes[1, 0].set_title('Training Accuracy')
for name, result in experiment3_results.items():
    axes[1, 0].plot(result['history'].history['accuracy'], label=name)
axes[1, 0].set_xlabel('Epoch')
axes[1, 0].set_ylabel('Accuracy')
axes[1, 0].legend()
axes[1, 0].grid(True)

# Macro F1 Scores
names = list(experiment3_results.keys())
f1_scores = [experiment3_results[name]['test_f1'] for name in names]
axes[1, 1].bar(names, f1_scores)
axes[1, 1].set_title('Test Macro F1 Scores')
axes[1, 1].set_ylabel('F1 Score')

plt.tight_layout()
plt.show()

## 7. Select Best Model and Save Weights

In [None]:
# Find the best model across all experiments
all_results = {}
all_results.update(experiment1_results)
all_results.update(experiment2_results)
all_results.update(experiment3_results)

# Find best model
best_model_name = max(all_results.keys(), key=lambda x: all_results[x]['test_f1'])
best_result = all_results[best_model_name]
best_model = best_result['model']

print("=" * 60)
print("BEST MODEL SELECTION")
print("=" * 60)
print(f"Best Model: {best_model_name}")
print(f"Best F1 Score: {best_result['test_f1']:.4f}")
print(f"Model Parameters: {best_model.count_params():,}")

# Save the best model weights
best_model.save_weights('lstm_keras_best.weights.h5')
print("\nBest model weights saved to 'lstm_keras_best.weights.h5'")

# Save model configuration for from-scratch implementation
if 'config' in best_result:
    best_config = best_result['config'].copy()
else:
    # Default config if not found
    best_config = {
        'lstm_layers': 2,
        'units_per_layer': [64, 32],
        'bidirectional': False,
        'embedding_dim': 128
    }

# Add additional config info
best_config.update({
    'vocab_size': vocab_size,
    'num_classes': num_classes,
    'sequence_length': MAX_LENGTH,
    'final_f1_score': best_result['test_f1']
})

print("\nBest model configuration:")
for key, value in best_config.items():
    print(f"  {key}: {value}")

In [None]:
# Save data for from-scratch testing
data_to_save = {
    'test_sequences': test_sequences,
    'test_labels': test_labels,
    'best_config': best_config,
    'vocab_size': vocab_size,
}

with open('lstm_saved_data.pkl', 'wb') as f:
    pickle.dump(data_to_save, f)

print("Data successfully saved to lstm_saved_data.pkl")
print(f"Saved test_sequences shape: {test_sequences.shape}")
print(f"Saved test_labels shape: {test_labels.shape}")
print(f"Saved best_config: {best_config}")

## 8. Detailed Model Analysis

In [None]:
# Display detailed analysis of the best model
print("=" * 60)
print("DETAILED BEST MODEL ANALYSIS")
print("=" * 60)

# Model summary
print("\nBest Model Architecture:")
best_model.summary()

# Classification report
print("\nClassification Report:")
print(classification_report(test_labels, best_result['predictions']))

# Confusion matrix
cm = confusion_matrix(test_labels, best_result['predictions'])
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title(f'Confusion Matrix - {best_model_name}')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

# Training curves
fig, axes = plt.subplots(1, 2, figsize=(15, 5))

# Loss
axes[0].plot(best_result['history'].history['loss'], label='Training Loss')
axes[0].plot(best_result['history'].history['val_loss'], label='Validation Loss')
axes[0].set_title(f'Training History - {best_model_name}')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss')
axes[0].legend()
axes[0].grid(True)

# Accuracy
axes[1].plot(best_result['history'].history['accuracy'], label='Training Accuracy')
axes[1].plot(best_result['history'].history['val_accuracy'], label='Validation Accuracy')
axes[1].set_title(f'Training History - {best_model_name}')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Accuracy')
axes[1].legend()
axes[1].grid(True)

plt.tight_layout()
plt.show()

## 9. From-Scratch Implementation Testing

In [None]:
# Import from-scratch implementation
import sys
import os
sys.path.append(os.path.dirname(os.getcwd()))

try:
    from from_scratch.model import LSTMModelFromScratch
    from from_scratch.layers import (
        Embedding, LSTM, Bidirectional, Dropout, 
        Dense, Softmax
    )
    print("Successfully imported from-scratch modules")
except ImportError as e:
    print(f"Error importing from-scratch modules: {e}")
    print("Make sure the from_scratch directory is in the correct path")

In [None]:
# Create from-scratch model using the best Keras model weights
def create_from_scratch_model(keras_model):
    """
    Create a from-scratch model based on the Keras model weights
    """
    layers_from_scratch = []
    
    # Extract weights from each layer of the Keras model
    for i, layer in enumerate(keras_model.layers):
        if isinstance(layer, tf.keras.layers.Embedding):
            # Extract embedding weights
            weights = layer.get_weights()[0]
            layers_from_scratch.append(Embedding(weights))
            print(f"Added Embedding layer: {weights.shape}")
            
        elif isinstance(layer, tf.keras.layers.LSTM):
            # Extract LSTM weights: kernel, recurrent, bias
            weights = layer.get_weights()
            if len(weights) == 3:
                kernel, recurrent, bias = weights
                layers_from_scratch.append(LSTM(kernel, recurrent, bias, layer.return_sequences))
                print(f"Added LSTM layer: kernel{kernel.shape}, recurrent{recurrent.shape}")
            
        elif isinstance(layer, tf.keras.layers.Bidirectional):
            # Extract bidirectional LSTM weights
            weights = layer.get_weights()
            if len(weights) == 6:  # Two sets of weights for forward and backward
                forward_kernel, forward_recurrent, forward_bias = weights[0:3]
                backward_kernel, backward_recurrent, backward_bias = weights[3:6]
                
                forward_lstm = LSTM(forward_kernel, forward_recurrent, forward_bias, layer.layer.return_sequences)
                backward_lstm = LSTM(backward_kernel, backward_recurrent, backward_bias, layer.layer.return_sequences)
                
                layers_from_scratch.append(Bidirectional(forward_lstm, backward_lstm))
                print(f"Added Bidirectional LSTM layer")
            
        elif isinstance(layer, tf.keras.layers.Dropout):
            layers_from_scratch.append(Dropout(layer.rate))
            print(f"Added Dropout layer: rate={layer.rate}")
            
        elif isinstance(layer, tf.keras.layers.Dense):
            # Extract dense weights and bias
            weights, bias = layer.get_weights()
            layers_from_scratch.append(Dense(weights, bias))
            print(f"Added Dense layer: {weights.shape}")
            
            # Add softmax activation if this is the output layer
            if i == len(keras_model.layers) - 1:
                layers_from_scratch.append(Softmax())
                print("Added Softmax activation")
    
    return LSTMModelFromScratch(layers_from_scratch)

# Create from-scratch model
print("Creating from-scratch model...")
from_scratch_model = create_from_scratch_model(best_model)

# Display model summary
print("\nFrom-scratch model summary:")
from_scratch_model.summary()

In [None]:
# Compare Keras and from-scratch predictions
print("=" * 60)
print("COMPARING KERAS VS FROM-SCRATCH IMPLEMENTATION")
print("=" * 60)

# Make predictions using both models
print("Making predictions with Keras model...")
keras_predictions = np.argmax(best_model.predict(test_sequences, verbose=0), axis=1)

print("Making predictions with from-scratch model...")
from_scratch_predictions = from_scratch_model.predict(test_sequences)

# Calculate F1 scores
keras_f1 = f1_score(test_labels, keras_predictions, average='macro')
from_scratch_f1 = f1_score(test_labels, from_scratch_predictions, average='macro')

print(f"\nResults comparison:")
print(f"Keras model macro F1 score: {keras_f1:.4f}")
print(f"From-scratch model macro F1 score: {from_scratch_f1:.4f}")
print(f"Difference: {abs(keras_f1 - from_scratch_f1):.4f}")

# Check how many predictions match between the two models
matches = np.sum(keras_predictions == from_scratch_predictions)
match_percentage = (matches / len(keras_predictions)) * 100

print(f"\nPrediction agreement: {match_percentage:.2f}%")
print(f"Matching predictions: {matches}/{len(keras_predictions)}")

# Compare a few example predictions
print("\nSample prediction comparison:")
for i in range(min(10, len(test_sequences))):
    print(f"Sample {i+1}:")
    print(f"  True label: {test_labels[i]}")
    print(f"  Keras prediction: {keras_predictions[i]}")
    print(f"  From-scratch prediction: {from_scratch_predictions[i]}")
    print(f"  Match: {'✓' if keras_predictions[i] == from_scratch_predictions[i] else '✗'}")
    print()

## 10. Analysis and Conclusions

In [None]:
# Comprehensive analysis and conclusions
print("=" * 80)
print("COMPREHENSIVE ANALYSIS AND CONCLUSIONS")
print("=" * 80)

print("\n1. EFFECT OF NUMBER OF LSTM LAYERS:")
print("-" * 50)
for name, result in experiment1_results.items():
    print(f"{name}: F1 = {result['test_f1']:.4f}, Params = {result['model'].count_params():,}")

best_layers = max(experiment1_results.keys(), key=lambda x: experiment1_results[x]['test_f1'])
print(f"\nConclusion: {best_layers} performed best among layer configurations.")
print("Deep networks can capture more complex patterns but may also overfit with limited data.")

print("\n2. EFFECT OF LSTM UNITS PER LAYER:")
print("-" * 50)
for name, result in experiment2_results.items():
    print(f"{name}: F1 = {result['test_f1']:.4f}, Params = {result['model'].count_params():,}")

best_units = max(experiment2_results.keys(), key=lambda x: experiment2_results[x]['test_f1'])
print(f"\nConclusion: {best_units} configuration performed best.")
print("More units can capture more complex representations but require more data and training time.")

print("\n3. EFFECT OF LSTM DIRECTION:")
print("-" * 50)
for name, result in experiment3_results.items():
    print(f"{name}: F1 = {result['test_f1']:.4f}, Params = {result['model'].count_params():,}")

best_direction = max(experiment3_results.keys(), key=lambda x: experiment3_results[x]['test_f1'])
print(f"\nConclusion: {best_direction} LSTM performed best.")
print("Bidirectional LSTMs can capture dependencies from both directions but double the parameters.")

print("\n4. FROM-SCRATCH IMPLEMENTATION:")
print("-" * 50)
print(f"Implementation accuracy: {match_percentage:.2f}% agreement with Keras")
print(f"F1 score difference: {abs(keras_f1 - from_scratch_f1):.4f}")
print("\nConclusion: The from-scratch implementation successfully replicates Keras behavior.")
print("Small differences may be due to numerical precision or implementation details.")

print("\n5. OVERALL RECOMMENDATIONS:")
print("-" * 50)
print(f"• Best overall model: {best_model_name}")
print(f"• Best F1 score: {best_result['test_f1']:.4f}")
print(f"• Model complexity: {best_model.count_params():,} parameters")
print("• The from-scratch implementation demonstrates understanding of LSTM internals")
print("• Both unidirectional and bidirectional LSTMs can be effective for text classification")
print("• Model selection should balance performance and computational efficiency")

In [None]:
# Save final results summary
final_results = {
    'experiment1_results': {name: {'test_f1': result['test_f1'], 'config': result['config']} 
                           for name, result in experiment1_results.items()},
    'experiment2_results': {name: {'test_f1': result['test_f1'], 'config': result['config']} 
                           for name, result in experiment2_results.items()},
    'experiment3_results': {name: {'test_f1': result['test_f1'], 'config': result['config']} 
                           for name, result in experiment3_results.items()},
    'best_model': {
        'name': best_model_name,
        'f1_score': best_result['test_f1'],
        'config': best_config
    },
    'from_scratch_comparison': {
        'keras_f1': keras_f1,
        'from_scratch_f1': from_scratch_f1,
        'agreement_percentage': match_percentage
    }
}

# Save results to JSON file
with open('lstm_experiment_results.json', 'w') as f:
    json.dump(final_results, f, indent=2)

print("Final results saved to 'lstm_experiment_results.json'")
print("\nExperiment completed successfully!")
print("\nFiles generated:")
print("- lstm_keras_best.weights.h5: Best model weights")
print("- lstm_saved_data.pkl: Test data and configuration")
print("- lstm_experiment_results.json: Experiment results summary")