# 5. Predict Speed and Acceleration

In [1]:
# Papermill parameters
INPUT_TOPOLOGY_PATH = None
MODEL_FILE_PATH = None
OUTPUT_PREDICTIONS_PATH = None
MINIO_ENDPOINT = None
MINIO_ACCESS_KEY = None
MINIO_SECRET_KEY = None


In [2]:
import s3fs
import pandas as pd
import pickle
import numpy as np
from geopy.distance import geodesic
from datetime import datetime
import googlemaps
import requests
import json


ModuleNotFoundError: No module named 's3fs'

In [None]:
fs = s3fs.S3FileSystem(
    key=MINIO_ACCESS_KEY,
    secret=MINIO_SECRET_KEY,
    client_kwargs={'endpoint_url': MINIO_ENDPOINT}
)

with fs.open(INPUT_TOPOLOGY_PATH, 'rb') as f:
    df_route = pd.read_csv(f)



In [None]:
print("Route columns:", df_route.columns.tolist())
print(df_route.head())

In [None]:
with fs.open(MODEL_FILE_PATH, 'rb') as f:
    model = pickle.load(f)

scaler = model["scaler"]
speed_model = model["speed_model"]
feature_cols = model["feature_cols"]

In [None]:
def predict_speed_and_accel_for_route(df_route, scaler, speed_model, feature_cols):

    # Initialize predicted speed
    df_route["predicted_speed"] = 0.0
    df_route.loc[0, "predicted_speed"] = 5.0  # starting speed

    # Add missing dynamic columns
    df_route = add_missing_features_for_prediction(df_route)

    for i in range(1, len(df_route)):

        # Set previous speeds
        df_route.loc[i, "speed_mps_prev1"] = df_route.loc[i-1, "predicted_speed"]
        df_route.loc[i, "speed_mps_prev2"] = df_route.loc[i-2, "predicted_speed"] if i >= 2 else 5.0

        # Build X row
        X = df_route.loc[i, feature_cols].values.reshape(1, -1)

        # Scale + Predict
        X_scaled = scaler.transform(X)
        pred_speed = speed_model.predict(X_scaled)[0]

        df_route.loc[i, "predicted_speed"] = pred_speed

    # Compute acceleration
    df_route["predicted_accel"] = df_route["predicted_speed"].diff().fillna(0)

    return df_route

def calculate_bearing(lat1, lon1, lat2, lon2):
    lat1_rad, lat2_rad = np.radians(lat1), np.radians(lat2)
    dlon_rad = np.radians(lon2 - lon1)
    x = np.sin(dlon_rad) * np.cos(lat2_rad)
    y = np.cos(lat1_rad) * np.sin(lat2_rad) - np.sin(lat1_rad) * np.cos(lat2_rad) * np.cos(dlon_rad)
    bearing = np.degrees(np.arctan2(x, y))
    return (bearing + 360) % 360

def add_missing_features_for_prediction(df):
    df["delta_lat"] = df["position_lat"].diff().fillna(0)
    df["delta_lon"] = df["position_long"].diff().fillna(0)
    df["delta_dist"] = df["distance_m"].fillna(0)

    df["speed_mps_prev1"] = df["predicted_speed"].shift(1).fillna(5.0)
    df["speed_mps_prev2"] = df["predicted_speed"].shift(2).fillna(5.0)

    df["traffic_level"] = df["traffic_level"].fillna(0.5)
    return df


In [None]:
# --- STATIC FEATURE ENGINEERING (Required for ML inference) ---

# 1. Compute distances
distances = [0.0]
for i in range(1, len(df_route)):
    coord1 = (df_route.loc[i-1, "position_lat"], df_route.loc[i-1, "position_long"])
    coord2 = (df_route.loc[i, "position_lat"], df_route.loc[i, "position_long"])
    distances.append(geodesic(coord1, coord2).meters)

df_route["distance_m"] = distances
df_route["distance_cum_m"] = df_route["distance_m"].cumsum()

# 2. Elevation gain
df_route["elev_gain_m"] = df_route["enhanced_altitude"].diff().fillna(0)

# 3. Bearing
bearings = [0.0]
for i in range(1, len(df_route)):
    b = calculate_bearing(
        df_route.loc[i-1, "position_lat"], df_route.loc[i-1, "position_long"],
        df_route.loc[i,   "position_lat"], df_route.loc[i,   "position_long"],
    )
    bearings.append(b)
df_route["bearing"] = bearings

# 4. Heading change
df_route["heading_change"] = df_route["bearing"].diff().abs().fillna(0)
df_route["heading_change"] = df_route["heading_change"].apply(lambda x: min(x, 360 - x))

# 5. Turn count (window of 30 points)
df_route["is_turn"] = (df_route["heading_change"] > 30).astype(int)
df_route["turn_count"] = df_route["is_turn"].rolling(30, min_periods=1).sum()

# 6. Initial traffic level (will be updated later)
df_route["traffic_level"] = 0.8
# --- END STATIC FEATURE ENGINEERING ---

In [None]:
df_pred = predict_speed_and_accel_for_route(
    df_route=df_route,
    scaler=scaler,
    speed_model=speed_model,
    feature_cols=feature_cols
)


In [None]:
df_pred["speed_kmh"] = df_pred["predicted_speed"] * 3.6

In [None]:
with fs.open(OUTPUT_PREDICTIONS_PATH, 'w') as f:
    df_pred.to_csv(f, index=False)
