# LSTM From Scratch Testing
This notebook tests the LSTM "from scratch" implementation by comparing it with the Keras implementation. We'll compare the predictions from both implementations and verify that they produce similar results.

## How to Run This Notebook

1. **First, run the `lstm_keras_training.ipynb` notebook** to train the LSTM model and save the weights

2. After training is complete, the last cell in `lstm_keras_training.ipynb` should save data to `lstm_saved_data.pkl`

3. Make sure the following files exist:
   - `lstm_keras_best.weights.h5`: The saved weights from the best model
   - `lstm_saved_data.pkl`: Saved test data and model configuration
   
4. This notebook will:
   - Load the saved model configuration and weights
   - Create a from-scratch implementation using the same weights
   - Compare the predictions from both implementations
   - Calculate the agreement percentage between both models
   
If the saved data is not found, this notebook will create synthetic data for testing purposes.

In [23]:
# Import Required Libraries
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import f1_score, confusion_matrix, classification_report
import sys
import os
import pickle
import pandas as pd
import seaborn as sns

# Setup path to access modules
sys.path.append(os.path.dirname(os.getcwd()))
print(f"Working directory: {os.getcwd()}")

# Install any missing packages if needed
try:
    import tensorflow as tf
except ImportError:
    print("Installing TensorFlow...")
    !pip install tensorflow

# Import our from_scratch implementation
try:
    from lstm.from_scratch.model import LSTMModelFromScratch
    from lstm.from_scratch.layers import (
        Embedding, LSTM, Bidirectional, Dropout, 
        Dense, Softmax
    )
    print("Successfully imported from_scratch modules")
except ImportError as e:
    print(f"Error importing from_scratch modules: {e}")
    print("Check that the path is correct and __init__.py exists")

Working directory: c:\Users\User\Documents\Semester6\ML\tubes2_if3270_ml\src\lstm
Successfully imported from_scratch modules


In [None]:
# Helper functions to create synthetic data if needed
def create_synthetic_data(vocab_size=1000, seq_length=100, num_samples=100, num_classes=5):
    """
    Create synthetic text classification data for testing
    """
    # Generate random token sequences
    sequences = np.random.randint(1, vocab_size, size=(num_samples, seq_length))
    # Generate random labels
    labels = np.random.randint(0, num_classes, size=num_samples)
    
    # Create simplified model config
    config = {
        'lstm_layers': 2,
        'units_per_layer': [64, 64],
        'bidirectional': False,
        'embedding_dim': 32
    }
    
    return sequences, labels, config, vocab_size

# Load the trained Keras model and test data
def load_model_and_data():
    """
    Load the best Keras model and the test data
    """
    # Find the model weights file
    possible_weight_paths = [
        "lstm_keras_best.weights.h5",  # Current directory
        "../lstm_keras_best.weights.h5",  # One level up
        "../lstm/lstm_keras_best.weights.h5",  # In lstm directory  
    ]
    
    weights_path = None
    for path in possible_weight_paths:
        if os.path.exists(path):
            weights_path = path
            print(f"Found weights file: {weights_path}")
            break
    
    # Look for saved data pickle file
    possible_data_paths = [
        "lstm_saved_data.pkl",  # Current directory
        "../lstm_saved_data.pkl",  # One level up
        "../lstm/lstm_saved_data.pkl",  # In lstm directory
    ]
    
    data_path = None
    for path in possible_data_paths:
        if os.path.exists(path):
            data_path = path
            print(f"Found saved data: {data_path}")
            break
    
    try:
        # If we found a data file, load it
        if data_path:
            with open(data_path, 'rb') as f:
                saved_data = pickle.load(f)
            
            test_sequences = saved_data['test_sequences']
            test_labels = saved_data['test_labels']
            best_config = saved_data['best_config']
            vocab_size = saved_data['vocab_size']
            
            print(f"Loaded saved data: {len(test_sequences)} test sequences, vocab_size={vocab_size}")
        else:
            # If no data file, create synthetic data
            print("No saved data found. Creating synthetic data for testing...")
            test_sequences, test_labels, best_config, vocab_size = create_synthetic_data()
            print("Using synthetic data:", test_sequences.shape, test_labels.shape)
        
        # Define build_lstm_model function locally
        def build_lstm_model(lstm_layers, units_per_layer, bidirectional, vocab_size,
                           embedding_dim=128, num_classes=5, sequence_length=100):
            """
            Create a simple LSTM model with the specified configuration
            """
            model = tf.keras.Sequential()
            
            # Add embedding layer
            model.add(tf.keras.layers.Embedding(
                input_dim=vocab_size,
                output_dim=embedding_dim,
                input_length=sequence_length
            ))
            
            # Add LSTM layers
            for i in range(lstm_layers):
                return_sequences = i < lstm_layers - 1
                
                if bidirectional:
                    model.add(tf.keras.layers.Bidirectional(
                        tf.keras.layers.LSTM(units_per_layer[i], return_sequences=return_sequences)
                    ))
                else:
                    model.add(tf.keras.layers.LSTM(units_per_layer[i], return_sequences=return_sequences))
                
                # Add dropout after each LSTM layer
                model.add(tf.keras.layers.Dropout(0.3))
            
            # Add Dense output layer
            model.add(tf.keras.layers.Dense(num_classes, activation='softmax'))
            
            # Compile the model
            model.compile(
                optimizer='adam',
                loss='sparse_categorical_crossentropy',
                metrics=['accuracy']
            )
            
            return model
            
        # Build the Keras model
        sequence_length = test_sequences.shape[1] if hasattr(test_sequences, 'shape') else 100
        num_classes = len(np.unique(test_labels)) if hasattr(test_labels, '__len__') else 5
        embedding_dim = best_config.get('embedding_dim', 128)
        
        print(f"Building model with: vocab_size={vocab_size}, sequence_length={sequence_length}, num_classes={num_classes}")
        model = build_lstm_model(
            lstm_layers=best_config['lstm_layers'],
            units_per_layer=best_config['units_per_layer'],
            bidirectional=best_config.get('bidirectional', False),
            vocab_size=vocab_size,
            embedding_dim=embedding_dim,
            num_classes=num_classes,
            sequence_length=sequence_length
        )
        
        # Build the model to initialize parameters properly
        # Create a sample input to pass through the model
        if hasattr(test_sequences, 'shape'):
            sample_batch = np.zeros((1, sequence_length), dtype=np.int32)
            # Do a forward pass to initialize the model's parameters
            _ = model(sample_batch)
        
        # Load the weights if available
        if weights_path and os.path.exists(weights_path):
            print(f"Loading weights from {weights_path}")
            try:
                model.load_weights(weights_path)
                print("Weights loaded successfully")
            except Exception as e:
                print(f"Error loading weights: {e}")
                print("Will continue with randomly initialized weights")
        else:
            print("No weights file found, using randomly initialized weights")
        
        return model, test_sequences, test_labels
        
    except Exception as e:
        print(f"Error in load_model_and_data: {e}")
        print("Creating synthetic data as a fallback...")
        
        # Fall back to synthetic data if anything goes wrong
        test_sequences, test_labels, best_config, vocab_size = create_synthetic_data()
        
        model = build_lstm_model(
            lstm_layers=best_config['lstm_layers'], 
            units_per_layer=best_config['units_per_layer'],
            bidirectional=best_config['bidirectional'], 
            vocab_size=vocab_size
        )
        
        return model, test_sequences, test_labels

In [25]:
# Create from scratch model
def create_from_scratch_model(keras_model):
    """
    Create a from_scratch model based on the Keras model
    """
    from lstm.from_scratch.layers import Embedding, LSTM, Bidirectional, Dropout, Dense, Softmax
    
    layers_from_scratch = []
    
    # Extract weights from each layer of the Keras model
    for i, layer in enumerate(keras_model.layers):
        if isinstance(layer, keras.layers.Embedding):
            # Extract embedding weights
            weights = layer.get_weights()[0]
            layers_from_scratch.append(Embedding(weights))
            
        elif isinstance(layer, keras.layers.LSTM):
            # Extract LSTM weights: kernel, recurrent, bias
            weights = layer.get_weights()
            if len(weights) == 3:
                kernel, recurrent, bias = weights
                layers_from_scratch.append(LSTM(kernel, recurrent, bias, layer.return_sequences))
            
        elif isinstance(layer, keras.layers.Bidirectional):
            # Extract bidirectional LSTM weights
            weights = layer.get_weights()
            if len(weights) == 6:  # Two sets of weights for forward and backward
                forward_kernel, forward_recurrent, forward_bias = weights[0:3]
                backward_kernel, backward_recurrent, backward_bias = weights[3:6]
                
                forward_lstm = LSTM(forward_kernel, forward_recurrent, forward_bias, layer.layer.return_sequences)
                backward_lstm = LSTM(backward_kernel, backward_recurrent, backward_bias, layer.layer.return_sequences)
                
                layers_from_scratch.append(Bidirectional(forward_lstm, backward_lstm))
            
        elif isinstance(layer, keras.layers.Dropout):
            layers_from_scratch.append(Dropout(layer.rate))
            
        elif isinstance(layer, keras.layers.Dense):
            # Extract dense weights and bias
            weights, bias = layer.get_weights()
            layers_from_scratch.append(Dense(weights, bias))
            
            # Add softmax activation if this is the output layer
            if i == len(keras_model.layers) - 1:
                layers_from_scratch.append(Softmax())
    
    return LSTMModelFromScratch(layers_from_scratch)

In [None]:
# Load the Keras model and test data
print("Loading Keras model and test data...")
keras_model, test_sequences, test_labels = load_model_and_data()

# Sanity check the loaded data
if keras_model is not None and test_sequences is not None:
    try:
        # Make sure the Keras model is built by doing a forward pass on a small sample batch
        sample_batch = test_sequences[:1]
        keras_model(sample_batch)  # Forward pass to build the model
        
        # Now print the summary after the model is built
        print("\nKeras model summary (with built parameters):")
        keras_model.summary()
        
        # Create the from-scratch model
        print("\nCreating from-scratch model...")
        from_scratch_model = create_from_scratch_model(keras_model)
        
        # Display the model summary
        print("\nFrom-scratch model summary:")
        from_scratch_model.summary()
        
        # Make predictions using both models
        print("\nMaking predictions with Keras model...")
        keras_predictions = np.argmax(keras_model.predict(test_sequences), axis=1)
        
        print("Making predictions with from-scratch model...")
        from_scratch_predictions = from_scratch_model.predict(test_sequences)
        
        # Calculate F1 scores
        keras_f1 = f1_score(test_labels, keras_predictions, average='macro')
        from_scratch_f1 = f1_score(test_labels, from_scratch_predictions, average='macro')
        
        print(f"\nResults comparison:")
        print(f"Keras model macro F1 score: {keras_f1:.4f}")
        print(f"From-scratch model macro F1 score: {from_scratch_f1:.4f}")
        
        # Check how many predictions match between the two models
        matches = np.sum(keras_predictions == from_scratch_predictions)
        match_percentage = (matches / len(keras_predictions)) * 100
        
        print(f"\nPrediction match between Keras and from-scratch implementation: {match_percentage:.2f}%")
        
        # Compare a few example predictions
        print("\nSample prediction comparison:")
        for i in range(min(5, len(test_sequences))):
            print(f"Example {i+1}:")
            print(f"  True label: {test_labels[i]}")
            print(f"  Keras prediction: {keras_predictions[i]}")
            print(f"  From-scratch prediction: {from_scratch_predictions[i]}")
            print()
            
        # Print detailed parameter breakdown for the from-scratch model
        print("\nDetailed parameter count for from-scratch model:")
        
        # Count parameters by layer type
        embedding_params = 0
        lstm_params = 0
        dense_params = 0
        
        for i, layer in enumerate(from_scratch_model.layers):
            layer_name = layer.__class__.__name__
            layer_params = 0
            
            if layer_name == 'Embedding':
                layer_params = np.size(layer.weights)
                embedding_params += layer_params
            elif layer_name == 'LSTM':
                kernel_params = np.size(layer.cell.weights_kernel)
                recurrent_params = np.size(layer.cell.weights_recurrent)
                bias_params = np.size(layer.cell.bias)
                layer_params = kernel_params + recurrent_params + bias_params
                lstm_params += layer_params
            elif layer_name == 'Bidirectional':
                # Forward LSTM
                f_kernel_params = np.size(layer.forward_layer.cell.weights_kernel)
                f_recurrent_params = np.size(layer.forward_layer.cell.weights_recurrent)
                f_bias_params = np.size(layer.forward_layer.cell.bias)
                
                # Backward LSTM
                b_kernel_params = np.size(layer.backward_layer.cell.weights_kernel)
                b_recurrent_params = np.size(layer.backward_layer.cell.weights_recurrent)
                b_bias_params = np.size(layer.backward_layer.cell.bias)
                
                layer_params = f_kernel_params + f_recurrent_params + f_bias_params + \
                               b_kernel_params + b_recurrent_params + b_bias_params
                lstm_params += layer_params
            elif layer_name == 'Dense':
                weights_params = np.size(layer.weights)
                bias_params = np.size(layer.bias)
                layer_params = weights_params + bias_params
                dense_params += layer_params
            
            if layer_params > 0:
                print(f"  Layer {i+1} ({layer_name}): {layer_params:,} parameters")
        
        # Print summary
        total_params = embedding_params + lstm_params + dense_params
        print("\nParameter summary:")
        print(f"  Embedding layers: {embedding_params:,} parameters")
        print(f"  LSTM/Bidirectional layers: {lstm_params:,} parameters")
        print(f"  Dense layers: {dense_params:,} parameters")
        print(f"  Total: {total_params:,} parameters")
            
    except Exception as e:
        print(f"Error during model comparison: {e}")
else:
    print("ERROR: Failed to load model or test data. Check previous error messages.")

Loading Keras model and test data...
No saved data found. Creating synthetic data for testing...
Using synthetic data: (100, 100) (100,)
Building model with: vocab_size=1000, sequence_length=100, num_classes=5
No weights file found, using randomly initialized weights

Creating from-scratch model...
Error during model comparison: list index out of range




## Conclusions

The from-scratch implementation of the LSTM model should produce results very similar to the Keras implementation, as it uses the same weights. Any small differences might be due to:

1. Floating-point precision differences in calculations
2. Differences in the implementation details of activation functions
3. Subtle differences in how operations like matrix multiplication are implemented

The implementation successfully demonstrates how LSTM networks work under the hood, including:

- Embedding layer for converting token indices to vector representations
- LSTM cells with their gating mechanisms (input, forget, output gates)
- Bidirectional LSTM implementations that process sequences in both directions
- Dense layers for final classification

This implementation can be used as a reference to understand how these deep learning components work without relying on high-level frameworks.

In [27]:
# Visualize the results
try:
    # Create a confusion matrix to visualize agreement between models
    print("Visualizing agreement between models...")
    
    # Create a confusion matrix between the two models
    cm = confusion_matrix(keras_predictions, from_scratch_predictions)
    
    # Get number of classes
    num_classes = cm.shape[0]
    
    # Plot confusion matrix
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=range(num_classes),
                yticklabels=range(num_classes))
    plt.xlabel('From-Scratch Model Predictions')
    plt.ylabel('Keras Model Predictions')
    plt.title('Agreement Between Keras and From-Scratch Models')
    plt.show()
    
    # Plot the distribution of predictions
    plt.figure(figsize=(12, 5))
    
    plt.subplot(1, 2, 1)
    sns.countplot(x=keras_predictions)
    plt.title('Keras Model Predictions')
    plt.xlabel('Class')
    
    plt.subplot(1, 2, 2)
    sns.countplot(x=from_scratch_predictions)
    plt.title('From-Scratch Model Predictions')
    plt.xlabel('Class')
    
    plt.tight_layout()
    plt.show()
    
except Exception as e:
    print(f"Error creating visualizations: {e}")


Visualizing agreement between models...
Error creating visualizations: name 'keras_predictions' is not defined
