In [1]:
import numpy as np
import json
import os
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt

2025-06-06 10:45:21.485887: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Configurations

In [2]:
ai_ready_dir = "Dataset/ai_ready_data" # 

# Check if the directory and files exist
if not os.path.isdir(ai_ready_dir):
    print(f"Error: Directory '{ai_ready_dir}' not found")

# File paths
X_train_file = os.path.join(ai_ready_dir, "X_train.npy")
y_train_file = os.path.join(ai_ready_dir, "y_train.npy")
X_val_file = os.path.join(ai_ready_dir, "X_val.npy")
y_val_file = os.path.join(ai_ready_dir, "y_val.npy")
X_test_file = os.path.join(ai_ready_dir, "X_test.npy")
y_test_file = os.path.join(ai_ready_dir, "y_test.npy")
grid_map_file = os.path.join(ai_ready_dir, "grid_id_to_index.json")
feature_cols_file = os.path.join(ai_ready_dir, "feature_columns.json") # Though not directly used in model architecture

data_loaded_successfully = True

# Load the preprocessed Data

In [3]:
try:
    # Load datasets
    print("Loading data...")
    X_train_scaled = np.load(X_train_file, allow_pickle=True)
    y_train = np.load(y_train_file, allow_pickle=True)
    X_val_scaled = np.load(X_val_file, allow_pickle=True)
    y_val = np.load(y_val_file, allow_pickle=True)
    
    # Load test set
    X_test_scaled = None
    y_test = None
    if os.path.exists(X_test_file) and os.path.exists(y_test_file):
        X_test_scaled = np.load(X_test_file, allow_pickle=True)
        y_test = np.load(y_test_file, allow_pickle=True)
        print("Test data loaded.")
    else:
        print("Test data not found, proceeding without it for evaluation.")

    # Load grid_id to index mapping to determine num_classes
    with open(grid_map_file, "r") as f:
        grid_id_to_index = json.load(f)
    num_classes = len(grid_id_to_index)

    # Load feature columns 
    with open(feature_cols_file, "r") as f:
        feature_columns = json.load(f)
    
    print("Data loaded successfully.")

except FileNotFoundError as e:
    print(f"Error loading data files: {e}")
    print("Please ensure all .npy and .json files are in the 'ai_ready_data' directory.")
    data_loaded_successfully = False
except Exception as e:
    print(f"An unexpected error occurred during data loading: {e}")
    data_loaded_successfully = False

if data_loaded_successfully:
    print(f"X_train_scaled shape: {X_train_scaled.shape}")
    print(f"y_train shape: {y_train.shape}")
    print(f"X_val_scaled shape: {X_val_scaled.shape}")
    print(f"y_val shape: {y_val.shape}")
    if X_test_scaled is not None:
        print(f"X_test_scaled shape: {X_test_scaled.shape}")
        print(f"y_test shape: {y_test.shape}")
    print(f"Number of features: {X_train_scaled.shape[2]}")
    print(f"Sequence length: {X_train_scaled.shape[1]}")
    print(f"Number of unique grid_ids (classes): {num_classes}")
    # Ensure input data is float32
    if X_train_scaled.dtype == 'object':
        print("Attempting to convert X_train_scaled to float32...")
        try:
            X_train_scaled = X_train_scaled.astype(np.float32)
            print(f"X_train_scaled dtype after conversion: {X_train_scaled.dtype}")
        except ValueError as ve:
            print(f"ValueError during X_train_scaled conversion: {ve}")
            print("This often means non-numeric strings are present. Please check data preparation.")
            data_loaded_successfully = False # Halt further processing
    if X_val_scaled.dtype == 'object':
        print("Attempting to convert X_val_scaled to float32...")
        try:
            X_val_scaled = X_val_scaled.astype(np.float32)
            print(f"X_val_scaled dtype after conversion: {X_val_scaled.dtype}")
        except ValueError as ve:
            print(f"ValueError during X_val_scaled conversion: {ve}")
            data_loaded_successfully = False # Halt further processing
            
    if X_test_scaled is not None and X_test_scaled.dtype == 'object':
        print("Attempting to convert X_test_scaled to float32...")
        try:
            X_test_scaled = X_test_scaled.astype(np.float32)
            print(f"X_test_scaled dtype after conversion: {X_test_scaled.dtype}")
        except ValueError as ve:
            print(f"ValueError during X_test_scaled conversion: {ve}")
            # data_loaded_successfully = False # Potentially halt if test is crucial now

Loading data...
Test data loaded.
Data loaded successfully.
X_train_scaled shape: (4792, 10, 252)
y_train shape: (4792,)
X_val_scaled shape: (1026, 10, 252)
y_val shape: (1026,)
X_test_scaled shape: (1028, 10, 252)
y_test shape: (1028,)
Number of features: 252
Sequence length: 10
Number of unique grid_ids (classes): 212350
Attempting to convert X_train_scaled to float32...
X_train_scaled dtype after conversion: float32
Attempting to convert X_val_scaled to float32...
X_val_scaled dtype after conversion: float32
Attempting to convert X_test_scaled to float32...
X_test_scaled dtype after conversion: float32


# Data Preprocessing for LSTM

In [4]:
# Target variable (y) needs to be one-hot encoded for categorical_crossentropy
y_train_encoded = to_categorical(y_train, num_classes=num_classes)
y_val_encoded = to_categorical(y_val, num_classes=num_classes)
if y_test is not None:
    y_test_encoded = to_categorical(y_test, num_classes=num_classes)

print(f"y_train_encoded shape: {y_train_encoded.shape}")
print(f"y_val_encoded shape: {y_val_encoded.shape}")
if y_test is not None:
    print(f"y_test_encoded shape: {y_test_encoded.shape}")

# Define sequence_length and num_features from the loaded data
sequence_length = X_train_scaled.shape[1]
num_features = X_train_scaled.shape[2]

y_train_encoded shape: (4792, 212350)
y_val_encoded shape: (1026, 212350)
y_test_encoded shape: (1028, 212350)


# Define Model Archtecture

In [5]:
model = Sequential()

# Input layer
model.add(Input(shape=(sequence_length, num_features)))

# LSTM layer 1
model.add(LSTM(128, return_sequences=False)) # Changed to False as it's the only LSTM layer for now
model.add(Dropout(0.3))

# Dense hidden layer
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.3))

# Output layer
model.add(Dense(num_classes, activation='softmax'))

2025-06-06 10:45:27.474173: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 108723200 exceeds 10% of free system memory.
2025-06-06 10:45:27.499715: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 108723200 exceeds 10% of free system memory.
2025-06-06 10:45:27.513892: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 108723200 exceeds 10% of free system memory.


# Model compilation

In [6]:
# Using Adam optimizer and categorical_crossentropy for multi-class classification
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

model.summary()

# Train the Model

In [7]:
# Using a modest number of epochs and batch_size as a starting point.
# Early stopping can be useful to prevent overfitting and stop training when validation loss stops improving.
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

history = model.fit(
    X_train_scaled, y_train_encoded,
    epochs=20, # Adjust as needed; may require more for convergence
    batch_size=64, # Adjust based on memory and dataset size
    validation_data=(X_val_scaled, y_val_encoded),
    callbacks=[early_stopping]
)

# Plot training history (loss and accuracy)
print("\nPlotting training history...")
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.tight_layout()
# Saving the plot to a file
plot_filename = "training_history.png"
plt.savefig(plot_filename)
print(f"Training history plot saved to {plot_filename}")
# plt.show() # Use this if running in an environment that displays plots directly

2025-06-06 10:46:53.379570: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 4070324800 exceeds 10% of free system memory.


Epoch 1/20


2025-06-06 10:47:54.183585: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 108723200 exceeds 10% of free system memory.


[1m12/75[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m25s[0m 406ms/step - accuracy: 0.4687 - loss: 12.1535

KeyboardInterrupt: 

# Evaluation

In [None]:
if X_test_scaled is not None and y_test_encoded is not None:
    print("\nEvaluating model on test data...")
    test_loss, test_accuracy = model.evaluate(X_test_scaled, y_test_encoded, verbose=0)
    print(f"Test Loss: {test_loss:.4f}")
    print(f"Test Accuracy: {test_accuracy:.4f}")
else:
    print("\nSkipping model evaluation on test data as it was not loaded.")

# --- 7. Model Saving ---
model_save_path = "lstm_location_predictor.keras"
model.save(model_save_path)
print(f"\nTrained model saved to {model_save_path}")
print("You can later load it using: tf.keras.models.load_model('lstm_location_predictor.keras')")
print("\nFor deployment, consider converting to ONNX format as mentioned in your project proposal.")

# Save Model in ONNX format