# Next Word Prediction using LSTM and GRU

In [75]:
import os
import re
import pickle
import numpy as np
import requests
import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Embedding, LSTM, GRU, Dense, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from typing import List, Tuple
import matplotlib.pyplot as plt
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

print(f"TensorFlow version: {tf.__version__}")
print(f"GPU Available: {tf.config.list_physical_devices('GPU')}")


TensorFlow version: 2.19.0
GPU Available: []


## 1. Data Collection

In [77]:
class DataCollector:
    """
    Handles collection and combination of text data from various sources.
    """

    def __init__(self, data_dir: str = "data"):
        """
        Initialize the data collector.

        Args:
            data_dir: Directory to store downloaded data
        """
        self.data_dir = data_dir
        os.makedirs(data_dir, exist_ok=True)

    def download_gutenberg_books(self, book_urls: List[str]) -> str:
        """
        Download books from Project Gutenberg.

        Args:
            book_urls: List of URLs to download

        Returns:
            Combined text content
        """
        combined_text = ""

        for i, url in enumerate(book_urls):
            try:
                print(f"Downloading book {i+1}/{len(book_urls)}...")
                response = requests.get(url, timeout=30)
                response.raise_for_status()

                # Basic cleaning of Gutenberg headers/footers
                text = response.text
                start_marker = "*** START OF THE PROJECT GUTENBERG"
                end_marker = "*** END OF THE PROJECT GUTENBERG"

                start_idx = text.find(start_marker)
                end_idx = text.find(end_marker)

                if start_idx != -1 and end_idx != -1:
                    text = text[start_idx:end_idx]

                combined_text += text + "\n\n"

            except Exception as e:
                print(f"Error downloading {url}: {e}")
                continue

        return combined_text

    def save_text_data(self, text: str, filename: str) -> None:
        """Save text data to file."""
        filepath = os.path.join(self.data_dir, filename)
        with open(filepath, 'w', encoding='utf-8') as f:
            f.write(text)
        print(f"Saved text data to {filepath}")

    def load_text_data(self, filename: str) -> str:
        """Load text data from file."""
        filepath = os.path.join(self.data_dir, filename)
        try:
            with open(filepath, 'r', encoding='utf-8') as f:
                return f.read()
        except FileNotFoundError:
            print(f"File {filepath} not found.")
            return ""

# Initialize data collector
data_collector = DataCollector()

# Download sample books from Project Gutenberg
gutenberg_urls = [
    "https://www.gutenberg.org/files/1342/1342-0.txt",  # Pride and Prejudice
    "https://www.gutenberg.org/files/11/11-0.txt",      # Alice in Wonderland
    "https://www.gutenberg.org/files/74/74-0.txt",      # The Adventures of Tom Sawyer
    "https://www.gutenberg.org/files/2701/2701-0.txt",  # Moby Dick
    "https://www.gutenberg.org/files/1661/1661-0.txt",  # The Adventures of Sherlock Holmes
]

# Download and combine text data
print("Collecting training data...")
raw_text = data_collector.download_gutenberg_books(gutenberg_urls)
data_collector.save_text_data(raw_text, "combined_books.txt")

print(f"Total characters collected: {len(raw_text):,}")
print(f"First 500 characters:\n{raw_text[:500]}")


Collecting training data...
Downloading book 1/5...
Downloading book 2/5...
Downloading book 3/5...
Downloading book 4/5...
Downloading book 5/5...
Saved text data to data/combined_books.txt
Total characters collected: 3,095,941
First 500 characters:
*** START OF THE PROJECT GUTENBERG EBOOK 1342 ***
                            [Illustration:

                             GEORGE ALLEN
                               PUBLISHER

                        156 CHARING CROSS ROAD
                                LONDON

                             RUSKIN HOUSE
                                   ]

                            [Illustration:

               _Reading Jane’s Letters._      _Chap 34._
                                   ]



## 2. Data Preprocessing

In [78]:
class TextPreprocessor:
    """
    Handles text preprocessing for next-word prediction models.
    """

    def __init__(self, max_vocab_size: int = 10000, sequence_length: int = 50):
        """
        Initialize the text preprocessor.

        Args:
            max_vocab_size: Maximum vocabulary size
            sequence_length: Length of input sequences
        """
        self.max_vocab_size = max_vocab_size
        self.sequence_length = sequence_length
        self.tokenizer = None
        self.vocab_size = 0

    def clean_text(self, text: str) -> str:
        """
        Clean and normalize text data.

        Args:
            text: Raw text string

        Returns:
            Cleaned text
        """
        # Convert to lowercase
        text = text.lower()

        # Remove special characters but keep basic punctuation
        text = re.sub(r'[^\w\s.,!?;:\'"()-]', ' ', text)

        # Replace multiple spaces with single space
        text = re.sub(r'\s+', ' ', text)

        # Remove extra whitespace
        text = text.strip()

        return text

    def create_tokenizer(self, text: str) -> None:
        """
        Create and fit tokenizer on text data.

        Args:
            text: Cleaned text data
        """
        self.tokenizer = Tokenizer(
            num_words=self.max_vocab_size,
            oov_token="<OOV>",
            filters='',  # We already cleaned the text
        )

        # Split into sentences for better tokenization
        sentences = text.split('.')
        self.tokenizer.fit_on_texts(sentences)

        # Update vocab size (add 1 for OOV token)
        self.vocab_size = min(len(self.tokenizer.word_index) + 1, self.max_vocab_size)

        print(f"Vocabulary size: {self.vocab_size}")
        print(f"Most common words: {list(self.tokenizer.word_index.keys())[:20]}")

    def create_sequences(self, text: str) -> Tuple[np.ndarray, np.ndarray]:
        """
        Create input sequences and targets for training.

        Args:
            text: Cleaned text data

        Returns:
            Tuple of (input_sequences, targets)
        """
        # Convert text to sequences
        sequences = self.tokenizer.texts_to_sequences([text])[0]

        # Create input-target pairs
        input_sequences = []
        targets = []

        print("Creating training sequences...")
        for i in tqdm(range(self.sequence_length, len(sequences))):
            input_seq = sequences[i-self.sequence_length:i]
            target = sequences[i]

            input_sequences.append(input_seq)
            targets.append(target)

        # Convert to numpy arrays
        X = np.array(input_sequences)
        y = np.array(targets)

        print(f"Created {len(X):,} training sequences")
        print(f"Input shape: {X.shape}")
        print(f"Target shape: {y.shape}")

        return X, y

    def save_tokenizer(self, filepath: str) -> None:
        """Save tokenizer to file."""
        with open(filepath, 'wb') as f:
            pickle.dump({
                'tokenizer': self.tokenizer,
                'vocab_size': self.vocab_size,
                'sequence_length': self.sequence_length,
                'max_vocab_size': self.max_vocab_size
            }, f)
        print(f"Tokenizer saved to {filepath}")

    def load_tokenizer(self, filepath: str) -> None:
        """Load tokenizer from file."""
        with open(filepath, 'rb') as f:
            data = pickle.load(f)
            self.tokenizer = data['tokenizer']
            self.vocab_size = data['vocab_size']
            self.sequence_length = data['sequence_length']
            self.max_vocab_size = data['max_vocab_size']
        print(f"Tokenizer loaded from {filepath}")

# Initialize preprocessor
preprocessor = TextPreprocessor(max_vocab_size=15000, sequence_length=40)

# Load and preprocess text data
raw_text = data_collector.load_text_data("combined_books.txt")
if not raw_text:
    print("No data found. Please run the data collection section first.")
else:
    # Clean text
    print("Cleaning text...")
    clean_text = preprocessor.clean_text(raw_text)

    # Create tokenizer
    print("Creating tokenizer...")
    preprocessor.create_tokenizer(clean_text)

    # Create training sequences
    print("Creating training sequences...")
    X, y = preprocessor.create_sequences(clean_text)

    # Save tokenizer
    os.makedirs("models", exist_ok=True)
    preprocessor.save_tokenizer("models/tokenizer.pkl")

# Split data into train and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"Training data shape: {X_train.shape}")
print(f"Validation data shape: {X_val.shape}")
print(f"Training targets shape: {y_train.shape}")
print(f"Validation targets shape: {y_val.shape}")


Cleaning text...
Creating tokenizer...
Vocabulary size: 15000
Most common words: ['<OOV>', 'the', 'and', 'of', 'to', 'a', 'in', 'i', 'that', 'it', 'was', 'he', 'his', 'you', 'as', 'with', 'but', 'for', 'is', 's']
Creating training sequences...
Creating training sequences...


100%|██████████| 551204/551204 [00:00<00:00, 1078196.35it/s]


Created 551,204 training sequences
Input shape: (551204, 40)
Target shape: (551204,)
Tokenizer saved to models/tokenizer.pkl
Training data shape: (440963, 40)
Validation data shape: (110241, 40)
Training targets shape: (440963,)
Validation targets shape: (110241,)


## 3. Model Architecture and Training

In [None]:
class NextWordPredictor:
    """
    Next-word prediction model using LSTM or GRU.
    """

    def __init__(self, vocab_size: int, sequence_length: int, embedding_dim: int = 128):
        """
        Initialize the model.

        Args:
            vocab_size: Size of vocabulary
            sequence_length: Length of input sequences
            embedding_dim: Dimensionality of embeddings
        """
        self.vocab_size = vocab_size
        self.sequence_length = sequence_length
        self.embedding_dim = embedding_dim
        self.model = None
        self.history = None

    def build_lstm_model(self, lstm_units: int = 256, dropout_rate: float = 0.3) -> None:
        """
        Build LSTM-based model.

        Args:
            lstm_units: Number of LSTM units
            dropout_rate: Dropout rate for regularization
        """
        self.model = Sequential([
            Embedding(
                input_dim=self.vocab_size,
                output_dim=self.embedding_dim,
                input_length=self.sequence_length,
                name='embedding'
            ),
            LSTM(lstm_units, return_sequences=True, name='lstm_1'),
            Dropout(dropout_rate, name='dropout_1'),
            LSTM(lstm_units//2, name='lstm_2'),
            Dropout(dropout_rate, name='dropout_2'),
            Dense(self.vocab_size, activation='softmax', name='output')
        ])

        self.model.compile(
            optimizer=Adam(learning_rate=0.001),
            loss='sparse_categorical_crossentropy',
            metrics=['accuracy']
        )

        print("LSTM Model Architecture:")
        self.model.summary()

    def build_gru_model(self, gru_units: int = 256, dropout_rate: float = 0.3) -> None:
        """
        Build GRU-based model.

        Args:
            gru_units: Number of GRU units
            dropout_rate: Dropout rate for regularization
        """
        self.model = Sequential([
            Embedding(
                input_dim=self.vocab_size,
                output_dim=self.embedding_dim,
                input_length=self.sequence_length,
                name='embedding'
            ),
            GRU(gru_units, return_sequences=True, name='gru_1'),
            Dropout(dropout_rate, name='dropout_1'),
            GRU(gru_units//2, name='gru_2'),
            Dropout(dropout_rate, name='dropout_2'),
            Dense(self.vocab_size, activation='softmax', name='output')
        ])

        self.model.compile(
            optimizer=Adam(learning_rate=0.001),
            loss='sparse_categorical_crossentropy',
            metrics=['accuracy']
        )

        print("GRU Model Architecture:")
        self.model.summary()

    def train(self, X_train: np.ndarray, y_train: np.ndarray,
              X_val: np.ndarray, y_val: np.ndarray,
              model_name: str, epochs: int = 50, batch_size: int = 128) -> None:
        """
        Train the model with callbacks.

        Args:
            X_train: Training input sequences
            y_train: Training targets
            X_val: Validation input sequences
            y_val: Validation targets
            model_name: Name for saving model checkpoints
            epochs: Number of training epochs
            batch_size: Training batch size
        """
        # Create models directory
        os.makedirs("models", exist_ok=True)

        # Callbacks
        callbacks = [
            ModelCheckpoint(
                filepath=f"models/{model_name}_best.h5",
                monitor='val_loss',
                save_best_only=True,
                save_weights_only=False,
                verbose=1
            ),
            EarlyStopping(
                monitor='val_loss',
                patience=10,
                restore_best_weights=True,
                verbose=1
            ),
            ReduceLROnPlateau(
                monitor='val_loss',
                factor=0.5,
                patience=5,
                min_lr=1e-7,
                verbose=1
            )
        ]

        print(f"Training {model_name} model...")
        self.history = self.model.fit(
            X_train, y_train,
            validation_data=(X_val, y_val),
            epochs=epochs,
            batch_size=batch_size,
            callbacks=callbacks,
            verbose=1
        )

        # Save final model
        self.model.save(f"models/{model_name}_final.h5")
        print(f"Model saved as models/{model_name}_final.h5")

    def plot_training_history(self) -> None:
        """Plot training history."""
        if self.history is None:
            print("No training history available.")
            return

        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))

        # Plot loss
        ax1.plot(self.history.history['loss'], label='Training Loss')
        ax1.plot(self.history.history['val_loss'], label='Validation Loss')
        ax1.set_title('Model Loss')
        ax1.set_xlabel('Epoch')
        ax1.set_ylabel('Loss')
        ax1.legend()
        ax1.grid(True)

        # Plot accuracy
        ax2.plot(self.history.history['accuracy'], label='Training Accuracy')
        ax2.plot(self.history.history['val_accuracy'], label='Validation Accuracy')
        ax2.set_title('Model Accuracy')
        ax2.set_xlabel('Epoch')
        ax2.set_ylabel('Accuracy')
        ax2.legend()
        ax2.grid(True)

        plt.tight_layout()
        plt.show()

# ## When to Use LSTM vs GRU in Production

print("""
## LSTM vs GRU: When to Use Each in Production

### LSTM (Long Short-Term Memory)
**Use LSTM when:**
- You have complex, long sequences with intricate dependencies
- Memory requirements are less of a concern
- You need the most expressive model (has forget gate, input gate, output gate)
- Working with tasks requiring fine-grained control over information flow
- You have sufficient computational resources

**Advantages:**
- More expressive with separate forget and input gates
- Better at capturing complex patterns in long sequences
- More control over information flow

**Disadvantages:**
- More parameters (slower training and inference)
- Higher memory requirements
- More prone to overfitting on smaller datasets

### GRU (Gated Recurrent Unit)
**Use GRU when:**
- You need faster training and inference
- Working with limited computational resources
- Dealing with shorter to medium-length sequences
- Want to reduce overfitting risk
- Performance is similar to LSTM but with fewer parameters

**Advantages:**
- Fewer parameters (faster training and inference)
- Lower memory requirements
- Less prone to overfitting
- Simpler architecture, easier to tune

**Disadvantages:**
- Less expressive than LSTM
- May not capture very complex, long-term dependencies as well

### Production Decision Guidelines:
1. **Start with GRU** for most applications (simpler, faster)
2. **Switch to LSTM** if you need better performance on complex sequences
3. **Consider computational constraints** in your production environment
4. **A/B test both** if performance is critical
""")

# Train LSTM Model
# print("\n" + "="*50)
# print("TRAINING LSTM MODEL")
# print("="*50)

# lstm_predictor = NextWordPredictor(
#     vocab_size=preprocessor.vocab_size,
#     sequence_length=preprocessor.sequence_length,
#     embedding_dim=128
# )
#
# lstm_predictor.build_lstm_model(lstm_units=256, dropout_rate=0.3)
# lstm_predictor.train(X_train, y_train, X_val, y_val,
#                     model_name="lstm_next_word", epochs=30, batch_size=64)
#
# # Plot LSTM training history
# lstm_predictor.plot_training_history()

# Train GRU Model
print("\n" + "="*50)
print("TRAINING GRU MODEL")
print("="*50)

gru_predictor = NextWordPredictor(
    vocab_size=preprocessor.vocab_size,
    sequence_length=preprocessor.sequence_length,
    embedding_dim=128
)

gru_predictor.build_gru_model(gru_units=256, dropout_rate=0.3)
gru_predictor.train(X_train, y_train, X_val, y_val,
                   model_name="gru_next_word", epochs=30, batch_size=64)

# Plot GRU training history
gru_predictor.plot_training_history()



## LSTM vs GRU: When to Use Each in Production

### LSTM (Long Short-Term Memory)
**Use LSTM when:**
- You have complex, long sequences with intricate dependencies
- Memory requirements are less of a concern
- You need the most expressive model (has forget gate, input gate, output gate)
- Working with tasks requiring fine-grained control over information flow
- You have sufficient computational resources

**Advantages:**
- More expressive with separate forget and input gates
- Better at capturing complex patterns in long sequences
- More control over information flow

**Disadvantages:**
- More parameters (slower training and inference)
- Higher memory requirements
- More prone to overfitting on smaller datasets

### GRU (Gated Recurrent Unit)
**Use GRU when:**
- You need faster training and inference
- Working with limited computational resources
- Dealing with shorter to medium-length sequences
- Want to reduce overfitting risk
- Performance is similar to LSTM but with fewer paramete

Training gru_next_word model...
Epoch 1/30
[1m6806/6891[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m7s[0m 91ms/step - accuracy: 0.1017 - loss: 6.5999

## 4. Model Prediction

In [65]:
class NextWordInference:
    """
    Handles inference for next-word prediction.
    """

    def __init__(self, model_path: str, tokenizer_path: str):
        """
        Initialize inference engine.

        Args:
            model_path: Path to trained model
            tokenizer_path: Path to tokenizer
        """
        self.model_path = model_path
        self.tokenizer_path = tokenizer_path
        self.model = None
        self.tokenizer = None
        self.sequence_length = None
        self.vocab_size = None

    def load_model_and_tokenizer(self) -> None:
        """Load trained model and tokenizer."""
        try:
            # Load model
            self.model = load_model(self.model_path)
            print(f"Model loaded from {self.model_path}")

            # Load tokenizer
            with open(self.tokenizer_path, 'rb') as f:
                data = pickle.load(f)
                self.tokenizer = data['tokenizer']
                self.sequence_length = data['sequence_length']
                self.vocab_size = data['vocab_size']

            print(f"Tokenizer loaded from {self.tokenizer_path}")

        except Exception as e:
            print(f"Error loading model/tokenizer: {e}")
            raise

    def preprocess_input(self, input_text: str) -> np.ndarray:
        """
        Preprocess input text for prediction.

        Args:
            input_text: Input text string

        Returns:
            Processed sequence array
        """
        # Clean and tokenize input
        input_text = input_text.lower().strip()
        sequence = self.tokenizer.texts_to_sequences([input_text])[0]

        # Pad or truncate to required length
        if len(sequence) > self.sequence_length:
            sequence = sequence[-self.sequence_length:]
        else:
            sequence = [0] * (self.sequence_length - len(sequence)) + sequence

        return np.array([sequence])

    def predict_next_word(self, input_text: str, top_k: int = 5) -> List[Tuple[str, float]]:
        """
        Predict next word(s) given input text.

        Args:
            input_text: Input text string
            top_k: Number of top predictions to return

        Returns:
            List of (word, probability) tuples
        """
        if self.model is None or self.tokenizer is None:
            self.load_model_and_tokenizer()

        # Preprocess input
        input_sequence = self.preprocess_input(input_text)

        # Get predictions
        predictions = self.model.predict(input_sequence, verbose=0)[0]

        # Get top k predictions
        top_indices = np.argsort(predictions)[-top_k:][::-1]

        # Convert indices to words
        word_index = self.tokenizer.word_index
        index_word = {v: k for k, v in word_index.items()}

        results = []
        for idx in top_indices:
            if idx in index_word:
                word = index_word[idx]
                probability = predictions[idx]
                results.append((word, float(probability)))

        return results

    def predict_single_word(self, input_text: str) -> str:
        """
        Predict single next word.

        Args:
            input_text: Input text string

        Returns:
            Predicted word
        """
        predictions = self.predict_next_word(input_text, top_k=1)
        return predictions[0][0] if predictions else "<unknown>"

# Test the inference functions
print("\n" + "="*50)
print("TESTING INFERENCE")
print("="*50)

# Test with LSTM model
lstm_inference = NextWordInference(
    model_path="models/lstm_next_word_best.h5",
    tokenizer_path="models/tokenizer.pkl"
)

try:
    lstm_inference.load_model_and_tokenizer()

    # Test sentences
    test_sentences = [
        "the quick brown fox",
        "once upon a time",
        "it was a dark and stormy",
        "to be or not to",
        "i think therefore i"
    ]

    print("LSTM Predictions:")
    print("-" * 40)
    for sentence in test_sentences:
        predictions = lstm_inference.predict_next_word(sentence, top_k=3)
        print(f"Input: '{sentence}'")
        for i, (word, prob) in enumerate(predictions, 1):
            print(f"  {i}. {word} ({prob:.3f})")
        print()

except Exception as e:
    print(f"Error in LSTM inference: {e}")

# Test with GRU model
gru_inference = NextWordInference(
    model_path="models/gru_next_word_best.h5",
    tokenizer_path="models/tokenizer.pkl"
)

try:
    gru_inference.load_model_and_tokenizer()

    print("GRU Predictions:")
    print("-" * 40)
    for sentence in test_sentences:
        predictions = gru_inference.predict_next_word(sentence, top_k=3)
        print(f"Input: '{sentence}'")
        for i, (word, prob) in enumerate(predictions, 1):
            print(f"  {i}. {word} ({prob:.3f})")
        print()

except Exception as e:
    print(f"Error in GRU inference: {e}")





TESTING INFERENCE
Model loaded from models/lstm_next_word_best.h5
Tokenizer loaded from models/tokenizer.pkl
LSTM Predictions:
----------------------------------------
Input: 'the quick brown fox'
  1. the (0.055)
  2. i (0.052)
  3. he (0.047)

Input: 'once upon a time'
  1. of (0.143)
  2. and (0.086)
  3. to (0.053)

Input: 'it was a dark and stormy'
  1. i (0.075)
  2. he (0.058)
  3. she (0.051)





Input: 'to be or not to'
  1. be (0.042)
  2. not (0.037)
  3. <OOV> (0.034)

Input: 'i think therefore i'
  1. be (0.077)
  2. not (0.055)
  3. <OOV> (0.051)

Model loaded from models/gru_next_word_best.h5
Tokenizer loaded from models/tokenizer.pkl
GRU Predictions:
----------------------------------------
Input: 'the quick brown fox'
  1. to (0.067)
  2. and (0.061)
  3. of (0.057)

Input: 'once upon a time'
  1. to (0.066)
  2. of (0.064)
  3. and (0.064)

Input: 'it was a dark and stormy'
  1. to (0.067)
  2. and (0.065)
  3. of (0.065)

Input: 'to be or not to'
  1. <OOV> (0.040)
  2. the (0.023)
  3. be (0.016)

Input: 'i think therefore i'
  1. <OOV> (0.040)
  2. the (0.023)
  3. be (0.017)



## 5. Model Evaluation

In [66]:
def evaluate_models() -> None:
    """Evaluate both models on validation set."""
    print("\n" + "="*50)
    print("MODEL EVALUATION")
    print("="*50)

    # Evaluate LSTM
    try:
        lstm_model = load_model("models/lstm_next_word_best.h5")
        lstm_loss, lstm_acc = lstm_model.evaluate(X_val, y_val, verbose=0)
        print(f"LSTM - Validation Loss: {lstm_loss:.4f}, Accuracy: {lstm_acc:.4f}")
    except Exception as e:
        print(f"Error evaluating LSTM: {e}")

    # Evaluate GRU
    try:
        gru_model = load_model("models/gru_next_word_best.h5")
        gru_loss, gru_acc = gru_model.evaluate(X_val, y_val, verbose=0)
        print(f"GRU - Validation Loss: {gru_loss:.4f}, Accuracy: {gru_acc:.4f}")
    except Exception as e:
        print(f"Error evaluating GRU: {e}")

evaluate_models()

print("\n" + "="*50)
print("TRAINING COMPLETE!")
print("="*50)
print("Files created:")
print("- models/lstm_next_word_best.h5 (Best LSTM model)")
print("- models/gru_next_word_best.h5 (Best GRU model)")
print("- models/tokenizer.pkl (Tokenizer and preprocessing config)")




MODEL EVALUATION




LSTM - Validation Loss: 6.1113, Accuracy: 0.1007
GRU - Validation Loss: 6.4292, Accuracy: 0.0586

TRAINING COMPLETE!
Files created:
- models/lstm_next_word_best.h5 (Best LSTM model)
- models/gru_next_word_best.h5 (Best GRU model)
- models/tokenizer.pkl (Tokenizer and preprocessing config)
