# English LSTM Training (GPU/Metal)

Train LSTM model for English next-word prediction using TensorFlow Metal.

In [None]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import json
import pickle
from pathlib import Path
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

print(f"TensorFlow: {tf.__version__}")
print(f"GPUs: {tf.config.list_physical_devices('GPU')}")
print(f"Metal: {tf.config.list_physical_devices('Metal') if hasattr(tf.config, 'list_physical_devices') else 'N/A'}")

## Configuration

In [None]:
# Paths - adjust these to your setup
INPUT_DIR = Path('processed')   # Directory with prepared data
OUTPUT_DIR = Path('models')     # Directory for output models

# Model parameters (must match Myanmar LSTM for native engine compatibility)
EMBEDDING_DIM = 256
LSTM_UNITS = 256
SEQUENCE_LENGTH = 5

# Training parameters
BATCH_SIZE = 128        # Increase for GPU
EPOCHS = 20
VALIDATION_SPLIT = 0.1
MAX_SEQUENCES = None    # Set to limit sequences (e.g., 1000000)

## Load Data

In [None]:
# Load vocabulary
with open(INPUT_DIR / 'en_word_indices.json', 'r') as f:
    word_to_idx = json.load(f)

# Load sequences
with open(INPUT_DIR / 'en_sequences.pkl', 'rb') as f:
    sequences = pickle.load(f)

vocab_size = len(word_to_idx)
print(f"Vocabulary size: {vocab_size:,}")
print(f"Total sequences: {len(sequences):,}")

## Prepare Training Data

In [None]:
# Optionally limit sequences
if MAX_SEQUENCES and len(sequences) > MAX_SEQUENCES:
    print(f"Limiting to {MAX_SEQUENCES:,} sequences")
    indices = np.random.choice(len(sequences), MAX_SEQUENCES, replace=False)
    sequences = [sequences[i] for i in indices]

# Convert to numpy (sparse labels - no one-hot!)
sequences = np.array(sequences, dtype=np.int32)
X = sequences[:, :-1]  # All but last token
y = sequences[:, -1]   # Last token (sparse)

print(f"X shape: {X.shape} ({X.nbytes / 1024 / 1024:.1f} MB)")
print(f"y shape: {y.shape} ({y.nbytes / 1024 / 1024:.1f} MB)")

## Build Model

In [None]:
model = Sequential([
    Embedding(vocab_size, EMBEDDING_DIM),
    LSTM(LSTM_UNITS),
    Dense(vocab_size, activation='softmax')
])

model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

model.summary()

## Train Model

In [None]:
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

callbacks = [
    ModelCheckpoint(
        str(OUTPUT_DIR / 'en_lstm_best.keras'),
        monitor='val_accuracy',
        save_best_only=True,
        mode='max',
        verbose=1
    ),
    EarlyStopping(
        monitor='val_loss',
        patience=5,
        restore_best_weights=True,
        verbose=1
    ),
    ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=2,
        min_lr=1e-6,
        verbose=1
    )
]

history = model.fit(
    X, y,
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    validation_split=VALIDATION_SPLIT,
    callbacks=callbacks,
    verbose=1
)

## Save Final Model

In [None]:
# Save final model
model.save(str(OUTPUT_DIR / 'en_lstm_final.keras'))
print(f"Saved to {OUTPUT_DIR / 'en_lstm_final.keras'}")

# Save training history
with open(OUTPUT_DIR / 'training_history.json', 'w') as f:
    json.dump({k: [float(v) for v in vals] for k, vals in history.history.items()}, f, indent=2)

## Evaluate with Sample Predictions

In [None]:
idx_to_word = {v: k for k, v in word_to_idx.items()}

sample_indices = np.random.choice(len(X), min(10, len(X)), replace=False)

for idx in sample_indices:
    input_seq = X[idx]
    true_next = y[idx]
    
    pred = model.predict(input_seq.reshape(1, -1), verbose=0)
    pred_idx = np.argmax(pred[0])
    
    input_words = [idx_to_word.get(i, '<UNK>') for i in input_seq if i != 0]
    true_word = idx_to_word.get(true_next, '<UNK>')
    pred_word = idx_to_word.get(pred_idx, '<UNK>')
    
    print(f"'{' '.join(input_words)}' -> True: '{true_word}', Pred: '{pred_word}'")

## Plot Training History

In [None]:
import matplotlib.pyplot as plt

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))

ax1.plot(history.history['loss'], label='Train')
ax1.plot(history.history['val_loss'], label='Val')
ax1.set_title('Loss')
ax1.legend()

ax2.plot(history.history['accuracy'], label='Train')
ax2.plot(history.history['val_accuracy'], label='Val')
ax2.set_title('Accuracy')
ax2.legend()

plt.tight_layout()
plt.show()

## Export to Native Format

Run this after training to convert for mobile:

In [None]:
!python export_to_native.py --model models/en_lstm_best.keras --output output/