In [1]:
import numpy as np

# Load input (features) and output (targets)
data = np.load("X.npy")  # Shape: (samples, time_steps, features)
target = np.load("ynpy.npy")  # Shape: (samples, output_dim)

print(f"Data shape: {data.shape}")  # Should be (samples, 5, 3)
print(f"Target shape: {target.shape}")  # Should be (samples, 3)

Data shape: (50, 7830, 3)
Target shape: (50, 7830, 3)


In [2]:

import tensorflow as tf

In [3]:


def create_time_windows(imu_data, gps_data, input_window=5):
    """
    Convert IMU and GPS time-series data into overlapping sequences for LSTM training.

    Args:
    - imu_data (np.array): IMU data of shape (runs, timesteps, imu_features)
    - gps_data (np.array): GPS data of shape (runs, timesteps, gps_features)
    - input_window (int): Number of time steps per sequence

    Returns:
    - X: Input sequences of shape (samples, input_window, imu_features + gps_features)
    - y: Corresponding target GPS values of shape (samples, gps_features)
    """
    X, y = [], []

    num_runs, timesteps, imu_features = imu_data.shape
    gps_features = gps_data.shape[2]

    for run in range(num_runs):
        for i in range(timesteps - input_window):
            # Extract past IMU and GPS time window
            imu_seq = imu_data[run, i:i+input_window, :]
            gps_seq = gps_data[run, i:i+input_window, :]

            # Concatenate IMU and GPS for input
            input_seq = np.concatenate([imu_seq, gps_seq], axis=-1)
            X.append(input_seq)

            # Extract target (next time step GPS values)
            y.append(gps_data[run, i+input_window, :])

    return np.array(X), np.array(y)

# Load IMU and GPS data
imu_data = np.load("X.npy")  # Shape: (runs, timesteps, imu_features)
gps_data = np.load("ynpy.npy")  # Shape: (runs, timesteps, gps_features)

# Convert data into time-windowed sequences
X, y = create_time_windows(imu_data, gps_data, input_window=5)

print(f"Processed Data Shape: {X.shape}")  # (samples, 5, imu_features + gps_features)
print(f"Processed Labels Shape: {y.shape}")  # (samples, gps_features)

# Save processed data
np.save("X_processed.npy", X)
np.save("y_processed.npy", y)

Processed Data Shape: (391250, 5, 6)
Processed Labels Shape: (391250, 3)


In [4]:
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import numpy as np

# Initialize scalers
scaler = MinMaxScaler()  # Use MinMaxScaler for GPS coordinates
imu_scaler = StandardScaler()  # Use StandardScaler for IMU data if necessary

# Flatten the data to fit the scaler
X_flattened = X.reshape(-1, X.shape[-1])
y_flattened = y.reshape(-1, y.shape[-1])

# Apply normalization
X_scaled = scaler.fit_transform(X_flattened)
y_scaled = scaler.fit_transform(y_flattened)

# Reshape back to original dimensions
X_scaled = X_scaled.reshape(X.shape)
y_scaled = y_scaled.reshape(y.shape)

In [5]:
from sklearn.model_selection import train_test_split

# Split data (80% Train, 20% Test)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.2, random_state=42, shuffle=True)

# Print shapes
print(f"X_train shape: {X_train.shape}, X_test shape: {X_test.shape}")
print(f"y_train shape: {y_train.shape}, y_test shape: {y_test.shape}")

X_train shape: (313000, 5, 6), X_test shape: (78250, 5, 6)
y_train shape: (313000, 3), y_test shape: (78250, 3)


In [6]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, Dot, Activation, Concatenate, Flatten

# Define input shapes
time_steps = X.shape[1]  # Sequence length (past 5 seconds of data)
num_features = X.shape[2]  # Number of input features per time step (IMU + GPS)
output_dim = y.shape[1]
# Define inputs
inputs = Input(shape=(time_steps, num_features), name="Input_Features")

# Encoder (LSTM)
encoder_lstm = LSTM(64, return_sequences=True, return_state=True, name="Encoder_LSTM")
encoder_outputs, state_h, state_c = encoder_lstm(inputs)

# Attention Mechanism (Local Attention)
# Calculate attention scores (alignment scores)
attention_scores = Dense(1, activation='tanh', name="Attention_Scores")(encoder_outputs)

#Normalize scores to obtain attention weights
attention_weights = Activation('softmax', name="Attention_Weights")(attention_scores)

#Compute the context vector as the weighted sum of encoder outputs
context_vector = Dot(axes=1, name="Context_Vector")([attention_weights, encoder_outputs])

# Decoder (Fully Connected Layer)
decoder_dense = Dense(64, activation='relu', name="Decoder_Dense")(context_vector)

# Output Layer (Latitude, Longitude, Altitude)
output_layer = Dense(3, activation='linear', name="Trajectory_Output")(decoder_dense)

model = Model(inputs=inputs, outputs=output_layer, name="Trajectory_Prediction_Model")
model.compile(optimizer='adam', loss='mse', metrics=['mae'])
model.summary()

In [10]:
from tensorflow.keras.callbacks import EarlyStopping

# Early stopping to monitor validation loss and stop if no improvement
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Train the model
history = model.fit(
    X_train, y_train,
    epochs=150,  # Maximum epochs
    batch_size=64,
    validation_split=0.2,  # 20% of training data for validation
    callbacks=[early_stopping]  # Use early stopping
)

# Save the trained model
model.save('trajectory_prediction_model.h5')

Epoch 1/150
[1m3912/3913[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 7ms/step - loss: 0.0880 - mae: 0.2444



[1m3913/3913[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 8ms/step - loss: 0.0880 - mae: 0.2444 - val_loss: 0.0878 - val_mae: 0.2491
Epoch 2/150
[1m3913/3913[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 9ms/step - loss: 0.0877 - mae: 0.2440 - val_loss: 0.0875 - val_mae: 0.2436
Epoch 3/150
[1m3913/3913[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 10ms/step - loss: 0.0876 - mae: 0.2437 - val_loss: 0.0874 - val_mae: 0.2420
Epoch 4/150
[1m3913/3913[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 8ms/step - loss: 0.0876 - mae: 0.2440 - val_loss: 0.0874 - val_mae: 0.2437
Epoch 5/150
[1m3913/3913[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 8ms/step - loss: 0.0876 - mae: 0.2438 - val_loss: 0.0874 - val_mae: 0.2421
Epoch 6/150
[1m3913/3913[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 9ms/step - loss: 0.0877 - mae: 0.2442 - val_loss: 0.0874 - val_mae: 0.2450
Epo



In [11]:
# Evaluate the model on the test set
test_loss, test_mae = model.evaluate(X_test, y_test, verbose=1)

print(f"Test Loss (MSE): {test_loss}")
print(f"Test Mean Absolute Error (MAE): {test_mae}")

[1m2446/2446[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3ms/step - loss: 0.0871 - mae: 0.2432
Test Loss (MSE): 0.08756671845912933
Test Mean Absolute Error (MAE): 0.24403154850006104


In [13]:
y_pred = model.predict(X_test)
y_pred = np.squeeze(y_pred)

[1m  28/2446[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m4s[0m 2ms/step    



[1m2446/2446[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step


In [14]:

from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

# Compute MAE, RMSE, and R^2 score for Latitude, Longitude, and Altitude
mae = mean_absolute_error(y_test, y_pred, multioutput='raw_values')
rmse = np.sqrt(mean_squared_error(y_test, y_pred, multioutput='raw_values'))
r2 = r2_score(y_test, y_pred, multioutput='raw_values')

# Print results
print(f"Mean Absolute Error (MAE): Latitude={mae[0]:.6f}, Longitude={mae[1]:.6f}, Altitude={mae[2]:.6f}")
print(f"Root Mean Squared Error (RMSE): Latitude={rmse[0]:.6f}, Longitude={rmse[1]:.6f}, Altitude={rmse[2]:.6f}")
print(f"R² Score: Latitude={r2[0]:.6f}, Longitude={r2[1]:.6f}, Altitude={r2[2]:.6f}")

Mean Absolute Error (MAE): Latitude=0.267549, Longitude=0.248628, Altitude=0.207124
Root Mean Squared Error (RMSE): Latitude=0.300495, Longitude=0.290920, Altitude=0.285379
R² Score: Latitude=0.024613, Longitude=0.021223, Altitude=0.027897
