In [1]:
# Import necessary libraries
import numpy as np
import json
import os
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import (
    LSTM, Dense, Dropout, BatchNormalization, 
    Input, Attention, Flatten, Permute, Multiply, Lambda
)
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt

2025-05-31 17:26:10.076857: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Configurations and Constants

In [2]:
AI_READY_DATA_DIR = "Dataset/ai_ready_data"  # Preprocessed data directory
MODEL_SAVE_PATH = "lstm_location_predictor.keras"  # Trained Keras model
ONNX_MODEL_SAVE_PATH = "lstm_location_predictor.onnx" # ONNX model

# LSTM Model Hyperparameters
LSTM_UNITS_1 = 128       # Number of units in the first LSTM layer
LSTM_UNITS_2 = 64        # Number of units in the second LSTM layer
DENSE_UNITS_1 = 128      # Number of units in the first Dense layer
DENSE_UNITS_2 = 64       # Number of units in the second Dense layer
DROPOUT_RATE = 0.3       # Dropout rate for regularization
LEARNING_RATE = 0.001    # Learning rate for the Adam optimizer
BATCH_SIZE = 64          # Number of samples per gradient update
EPOCHS = 50              # Maximum number of epochs for training (EarlyStopping will monitor)
USE_ATTENTION = False    # Whether to use an attention mechanism (as per proposal "optional")

# Load the preprocessed Data

In [3]:
def load_data(data_dir):
    try:
        X_train = np.load(os.path.join(data_dir, "X_train.npy"), allow_pickle=True)
        y_train = np.load(os.path.join(data_dir, "y_train.npy"), allow_pickle=True)
        
        # Handle potentially empty validation/test sets if data split was small
        X_val_path = os.path.join(data_dir, "X_val.npy")
        X_val = np.load(X_val_path, allow_pickle=True) if os.path.exists(X_val_path) and os.path.getsize(X_val_path) > 0 else np.array([])
        
        y_val_path = os.path.join(data_dir, "y_val.npy")
        y_val = np.load(y_val_path, allow_pickle=True) if os.path.exists(y_val_path) and os.path.getsize(y_val_path) > 0 else np.array([])
        
        X_test_path = os.path.join(data_dir, "X_test.npy")
        X_test = np.load(X_test_path, allow_pickle=True) if os.path.exists(X_test_path) and os.path.getsize(X_test_path) > 0 else np.array([])
        
        y_test_path = os.path.join(data_dir, "y_test.npy")
        y_test = np.load(y_test_path, allow_pickle=True) if os.path.exists(y_test_path) and os.path.getsize(y_test_path) > 0 else np.array([])

        with open(os.path.join(data_dir, "grid_id_to_index.json"), "r") as f:
            grid_id_to_index = json.load(f)

        print("Data loaded successfully.")
        print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
        if X_val.size > 0:
            print(f"X_val shape: {X_val.shape}, y_val shape: {y_val.shape}")
        if X_test.size > 0:
            print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")
        
        return X_train, y_train, X_val, y_val, X_test, y_test, grid_id_to_index
    except FileNotFoundError as e:
        print(f"Error: Data file not found. {e}")
        exit()
    except Exception as e:
        print(f"An error occurred while loading data: {e}")
        exit()

# Define Model Archtecture

In [4]:
def build_model(sequence_length, num_features, num_classes):
    print("Building LSTM model...")
    
    inputs = Input(shape=(sequence_length, num_features))
    
    # First LSTM layer
    x = LSTM(LSTM_UNITS_1, return_sequences=(LSTM_UNITS_2 > 0 or USE_ATTENTION))(inputs)
    x = BatchNormalization()(x)
    x = Dropout(DROPOUT_RATE)(x)
    
    # Optional Attention Layer (simple self-attention on LSTM output)
    if USE_ATTENTION:
        if not (LSTM_UNITS_2 > 0): # If no second LSTM, LSTM1 output needs to be sequence for attention
            pass
        
        # A simple attention mechanism
        attention_probs = Dense(sequence_length, activation='softmax', name='attention_vec')(x)
        
        # Simpler attention: weighted sum of LSTM outputs
        # This requires LSTM_UNITS_1 to return_sequences=True
        attention_mul = Multiply()([x, Permute((2,1))(Dense(num_features, activation='softmax')(x))]) # Element-wise multiplication after permuting dense output
        attention_mul = Lambda(lambda xin: tf.keras.backend.sum(xin, axis=1))(attention_mul) # Sum over time steps
        
        # If using tf.keras.layers.Attention directly:
        # attention_result = Attention()([x, x])
        # x = Flatten()(attention_result) 

        x = attention_mul # Use the weighted sum as input to the next layer
        
    # Optional second LSTM layer
    if LSTM_UNITS_2 > 0:
        if USE_ATTENTION:
            print("Warning: Using Attention with a second LSTM layer requires careful architecture. Current setup assumes Attention is followed by Dense layers.")
        else: # No attention, standard stacked LSTM
            x = LSTM(LSTM_UNITS_2, return_sequences=False)(x) # return_sequences=False for the last LSTM before Dense
            x = BatchNormalization()(x)
            x = Dropout(DROPOUT_RATE)(x)

    # Dense layers
    x = Dense(DENSE_UNITS_1, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(DROPOUT_RATE)(x)
    
    if DENSE_UNITS_2 > 0:
        x = Dense(DENSE_UNITS_2, activation='relu')(x)
        x = BatchNormalization()(x)
        x = Dropout(DROPOUT_RATE)(x)
        
    # Output layer
    outputs = Dense(num_classes, activation='softmax')(x)
    
    model = Model(inputs=inputs, outputs=outputs)
    
    # Compile the model
    optimizer = Adam(learning_rate=LEARNING_RATE)
    model.compile(optimizer=optimizer, 
                  loss='sparse_categorical_crossentropy', # Use this for integer targets
                  metrics=['accuracy'])
    
    print("Model built and compiled successfully.")
    model.summary()
    return model

# Train the Model

In [5]:
def train_model(model, X_train, y_train, X_val, y_val):
    """Trains the LSTM model."""
    print("Starting model training...")
    
    callbacks = []
    if X_val.size > 0 and y_val.size > 0:
        early_stopping = EarlyStopping(monitor='val_loss', patience=10, verbose=1, restore_best_weights=True)
        callbacks.append(early_stopping)
    else: # No validation set, train for full epochs or use training loss for early stopping (less ideal)
        early_stopping = EarlyStopping(monitor='loss', patience=10, verbose=1, restore_best_weights=True)
        callbacks.append(early_stopping)
        print("Warning: No validation data provided. Early stopping will monitor training loss.")

    
    model_checkpoint = ModelCheckpoint(MODEL_SAVE_PATH, monitor='val_loss' if X_val.size > 0 else 'loss', 
                                       save_best_only=True, verbose=1)
    callbacks.append(model_checkpoint)
    
    history = model.fit(X_train, y_train,
                        epochs=EPOCHS,
                        batch_size=BATCH_SIZE,
                        validation_data=(X_val, y_val) if X_val.size > 0 and y_val.size > 0 else None,
                        callbacks=callbacks,
                        verbose=1)
    
    print("Model training completed.")
    return history

# Evaluation

In [6]:
def evaluate_model(model, X_test, y_test, num_classes):
    """Evaluates the trained model on the test set."""
    if X_test.size == 0 or y_test.size == 0:
        print("No test data to evaluate.")
        return

    print("Evaluating model on test data...")
    loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
    print(f"Test Loss: {loss:.4f}")
    print(f"Test Accuracy: {accuracy:.4f}")

    # Classification report and confusion matrix can be very large for many classes
    if num_classes <= 50: # Arbitrary threshold to avoid excessive output
        try:
            y_pred_probs = model.predict(X_test)
            y_pred_classes = np.argmax(y_pred_probs, axis=1)
            print("\nClassification Report:")
            print(classification_report(y_test, y_pred_classes, zero_division=0))
            # print("\nConfusion Matrix (sample):") # Full matrix too large
            # print(confusion_matrix(y_test, y_pred_classes)) # Might be too large to print meaningfully
        except Exception as e:
            print(f"Could not generate classification report/confusion matrix: {e}")
    else:
        print(f"Skipping classification report due to large number of classes ({num_classes}).")
        print("Consider implementing Top-N accuracy or other relevant metrics for high-cardinality classification.")

# Plot the training history

In [7]:
def plot_history(history):
    """Plots training and validation loss and accuracy."""
    if history is None:
        print("No training history to plot.")
        return

    plt.figure(figsize=(12, 4))
    
    # Plot Loss
    plt.subplot(1, 2, 1)
    plt.plot(history.history['loss'], label='Train Loss')
    if 'val_loss' in history.history:
        plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Model Loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(loc='upper right')
    
    # Plot Accuracy
    plt.subplot(1, 2, 2)
    plt.plot(history.history['accuracy'], label='Train Accuracy')
    if 'val_accuracy' in history.history:
        plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title('Model Accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(loc='lower right')
    
    plt.tight_layout()
    plt.savefig("training_history.png")
    print("Training history plot saved as training_history.png")
    # plt.show() # Uncomment if running in an environment that supports interactive plots

# Save Model in ONNX format

In [8]:
def save_model_onnx(keras_model_path, onnx_model_path, sequence_length, num_features):
    """Converts and saves the Keras model to ONNX format."""
    try:
        import tf2onnx
        import tensorflow as tf # Ensure tf is available for tf.keras.models.load_model

        print(f"Loading Keras model from {keras_model_path} for ONNX conversion...")
        model = tf.keras.models.load_model(keras_model_path) # Load the best saved Keras model

        # Define the input signature for the ONNX model
        # Batch size is dynamic (None), sequence length and num_features are fixed.
        spec = (tf.TensorSpec((None, sequence_length, num_features), tf.float32, name="input"),)
        
        print(f"Converting Keras model to ONNX format (saving to {onnx_model_path})...")
        model_proto, _ = tf2onnx.convert.from_keras(model, input_signature=spec, opset=13) # opset can be adjusted
        
        with open(onnx_model_path, "wb") as f:
            f.write(model_proto.SerializeToString())
        print(f"Model successfully converted and saved to {onnx_model_path}")

    except ImportError:
        print("tf2onnx library not found. Skipping ONNX conversion.")
        print("To install: pip install tf2onnx")
    except Exception as e:
        print(f"An error occurred during ONNX conversion: {e}")

# The main execution segment

In [9]:
if __name__ == "__main__":
    # Load data
    X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, grid_id_to_idx = load_data(AI_READY_DATA_DIR)

    if X_train_scaled.size == 0:
        print("No training data loaded. Exiting.")
        exit()

    # Determine model parameters
    sequence_len = X_train_scaled.shape[1]
    num_feat = X_train_scaled.shape[2]
    num_classes_output = len(grid_id_to_idx)
    
    print(f"Sequence Length: {sequence_len}, Number of Features: {num_feat}, Number of Classes: {num_classes_output}")

    # Build the model
    lstm_model = build_model(sequence_len, num_feat, num_classes_output)
    
    # Train the model
    training_history = train_model(lstm_model, X_train_scaled, y_train, X_val_scaled, y_val)
    
    # Plot training history
    plot_history(training_history)

    # Load the best model saved by ModelCheckpoint for evaluation and ONNX conversion
    print(f"Loading best saved Keras model from {MODEL_SAVE_PATH} for final evaluation and ONNX export...")
    try:
        best_model = tf.keras.models.load_model(MODEL_SAVE_PATH)
    except Exception as e:
        print(f"Error loading the saved Keras model: {e}")
        print("Proceeding with the model from memory if available, but ONNX export might fail or use last state.")
        best_model = lstm_model # Fallback to model in memory

    # Evaluate the best model
    if X_test_scaled.size > 0 and y_test.size > 0:
        evaluate_model(best_model, X_test_scaled, y_test, num_classes_output)
    else:
        print("Skipping evaluation on test set as it's empty.")
        
    # Save in ONNX format (optional)
    save_model_onnx(MODEL_SAVE_PATH, ONNX_MODEL_SAVE_PATH, sequence_len, num_feat)

    print("LSTM Model Script Execution Finished.")

An error occurred while loading data: Object arrays cannot be loaded when allow_pickle=False


TypeError: cannot unpack non-iterable NoneType object