In [20]:
###############################
# TEST / INFERENCE CODE
###############################

import os
import glob
import json
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import models
from sklearn.preprocessing import RobustScaler
from scipy.signal import savgol_filter
from geopy.distance import geodesic
import warnings
from scipy.stats import skew, kurtosis
from sklearn.ensemble import IsolationForest
import folium

warnings.filterwarnings("ignore", category=RuntimeWarning)

# -----------------------------
# 1. Helper Functions
# -----------------------------
def apply_kalman_filter(series, process_variance=1e-5, measurement_variance=1e-2):
    n = len(series)
    estimates = np.zeros(n)
    error_estimate = 1.0
    error_measure = measurement_variance
    for i in range(n):
        error_estimate += process_variance
        kalman_gain = error_estimate / (error_estimate + error_measure)
        estimate = series[i] if i == 0 else estimates[i-1]
        estimate = estimate + kalman_gain * (series[i] - estimate)
        error_estimate = (1 - kalman_gain) * error_estimate
        estimates[i] = estimate
    return estimates

def z_thresh_detection(series, threshold=3):
    mean_val = series.mean()
    std_val = series.std()
    z_scores = (series - mean_val) / std_val
    anomaly_mask = (np.abs(z_scores) > threshold).astype(int)
    return anomaly_mask, z_scores

def isolation_forest_anomaly_detection(df, contamination=0.01):
    # Ensure there are no NaNs in the features used by IsolationForest
    features = df[['acc_magnitude', 'jerk']].fillna(0)
    clf = IsolationForest(contamination=contamination, random_state=42)
    preds = clf.fit_predict(features)
    df['isof_anomaly'] = (preds == -1).astype(int)
    return df

def load_and_preprocess_df(df, process_variance=1e-5, measurement_variance=1e-2, 
                           sg_window_length=21, sg_polyorder=3):
    df = df.dropna(subset=['latitude', 'longitude'])
    df = df[np.isfinite(df['latitude']) & np.isfinite(df['longitude'])]
    if 'timestamp' in df.columns:
        try:
            df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s')
        except Exception:
            df['timestamp'] = pd.to_datetime(df['timestamp'])
    else:
        df['timestamp'] = pd.date_range(start='2025-01-01', periods=len(df), freq='s')
    
    rename_dict = {}
    if 'accelerometerX' in df.columns:
        rename_dict['accelerometerX'] = 'acc_x'
    if 'accelerometerY' in df.columns:
        rename_dict['accelerometerY'] = 'acc_y'
    if 'accelerometerZ' in df.columns:
        rename_dict['accelerometerZ'] = 'acc_z'
    if 'gyroX' in df.columns:
        rename_dict['gyroX'] = 'gyro_x'
    if 'gyroY' in df.columns:
        rename_dict['gyroY'] = 'gyro_y'
    if 'gyroZ' in df.columns:
        rename_dict['gyroZ'] = 'gyro_z'
    if 'potholes' in df.columns:
        rename_dict['potholes'] = 'pothole_label'
    df = df.rename(columns=rename_dict)
    
    for col in ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z', 'pothole_label']:
        if col not in df.columns:
            df[col] = 0

    df['acc_x_raw'] = df['acc_x'].copy()
    sensor_cols = ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z']
    for col in sensor_cols:
        filtered = apply_kalman_filter(df[col].values, process_variance, measurement_variance)
        if len(filtered) >= sg_window_length:
            df[col] = savgol_filter(filtered, window_length=sg_window_length, polyorder=sg_polyorder, mode='nearest')
        else:
            df[col] = filtered

    df['acc_magnitude'] = np.linalg.norm(df[['acc_x', 'acc_y', 'acc_z']], axis=1)
    df['gyro_magnitude'] = np.linalg.norm(df[['gyro_x', 'gyro_y', 'gyro_z']], axis=1)
    time_diff = df['timestamp'].diff().dt.total_seconds().fillna(1)
    df['jerk'] = np.gradient(df['acc_magnitude'], time_diff)
    
    distances = [0.0]
    for i in range(1, len(df)):
        prev_point = (df['latitude'].iloc[i-1], df['longitude'].iloc[i-1])
        curr_point = (df['latitude'].iloc[i], df['longitude'].iloc[i])
        distances.append(geodesic(prev_point, curr_point).meters)
    df['distance'] = distances
    
    df.replace([np.inf, -np.inf], np.nan, inplace=True)
    df = df.ffill().bfill().dropna(subset=['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z', 'latitude', 'longitude'])
    
    scaler = RobustScaler()
    cols_to_scale = sensor_cols + ['acc_magnitude', 'gyro_magnitude', 'jerk']
    df[cols_to_scale] = scaler.fit_transform(df[cols_to_scale])
    
    df['z_anomaly'], df['z_score'] = z_thresh_detection(df['acc_magnitude'], threshold=3)
    df['z_diff'] = df['z_score'].diff().fillna(0)
    df['z_diff_anomaly'] = (np.abs(df['z_diff']) > 2).astype(int)
    window_size = 50
    df['rolling_mean'] = df['acc_magnitude'].rolling(window=window_size, min_periods=1).mean()
    df['rolling_std'] = df['acc_magnitude'].rolling(window=window_size, min_periods=1).std().fillna(0)
    df['std_anomaly'] = (np.abs(df['acc_magnitude'] - df['rolling_mean']) > 3 * (df['rolling_std'] + 1e-8)).astype(int)
    threshold_low = 0.1
    df['g_zero_flag'] = (df['acc_magnitude'] < threshold_low).astype(int)
    
    # Ensure no NaN in the features used by IsolationForest
    df[['acc_magnitude', 'jerk']] = df[['acc_magnitude', 'jerk']].fillna(0)
    df = isolation_forest_anomaly_detection(df, contamination=0.01)
    return df

def extract_sliding_window_features(df, window_size=50, step=25):
    feature_list = []
    for start in range(0, len(df) - window_size + 1, step):
        window = df.iloc[start:start+window_size]
        center_idx = window_size // 2
        features = {
            'timestamp': window['timestamp'].iloc[center_idx],
            'latitude': window['latitude'].iloc[center_idx],
            'longitude': window['longitude'].iloc[center_idx],
            'acc_magnitude': window['acc_magnitude'].mean(),
            'acc_std': window['acc_magnitude'].std(),
            'acc_min': window['acc_magnitude'].min(),
            'acc_max': window['acc_magnitude'].max(),
            'acc_skew': skew(window['acc_magnitude']),
            'acc_kurtosis': kurtosis(window['acc_magnitude']),
            'jerk_mean': window['jerk'].mean(),
            'jerk_std': window['jerk'].std(),
            'z_anomaly_rate': window['z_anomaly'].mean(),
            'z_diff_rate': window['z_diff_anomaly'].mean(),
            'std_anomaly_rate': window['std_anomaly'].mean(),
            'g_zero_rate': window['g_zero_flag'].mean(),
            'isof_anomaly_rate': window['isof_anomaly'].mean(),
            'pothole_label': window['pothole_label'].iloc[center_idx]
        }
        feature_list.append(features)
    return pd.DataFrame(feature_list)

def create_sequences(features_df, feature_columns, sequence_length=10):
    # If "timestamp" is missing, create a dummy timestamp column.
    if 'timestamp' not in features_df.columns:
        features_df['timestamp'] = pd.date_range(start='2025-01-01', periods=len(features_df), freq='s')
    # Sort the DataFrame by timestamp
    features_df = features_df.sort_values(by='timestamp').reset_index(drop=True)
    X_seq = []
    center_indices = []
    for i in range(len(features_df) - sequence_length + 1):
        seq = features_df.iloc[i:i+sequence_length]
        X_seq.append(seq[feature_columns].values)
        # Record the center index of this sequence.
        center_indices.append(i + sequence_length // 2)
    # Return the sequences and the subset of features corresponding to the center points.
    return np.array(X_seq), features_df.iloc[center_indices].reset_index(drop=True)


# -----------------------------
# 2. Load Dynamic Filtering Parameters
# -----------------------------
with open(r"C:\Users\Rick Halder\Desktop\SpeedyCare\Best Work\filtering_params.json", "r") as f:
    filtering_params = json.load(f)

opt_process_variance     = filtering_params["process_variance"]
opt_measurement_variance = filtering_params["measurement_variance"]
opt_sg_window_length     = filtering_params["sg_window_length"]
opt_sg_polyorder         = filtering_params["sg_polyorder"]

print("Loaded filtering parameters:", filtering_params)

# -----------------------------
# 3. Load Inference Data (CSV files without the potholes column)
# -----------------------------
inference_folder_path = r"C:\Users\Rick Halder\Desktop\SpeedyCare\Best Work\New folder"  # <-- Update this path
file_pattern = os.path.join(inference_folder_path, "*.csv")
csv_files = glob.glob(file_pattern)

df_list = []
for file in csv_files:
    try:
        df = pd.read_csv(file)
        df["source_file"] = os.path.basename(file)
        df_list.append(df)
    except Exception as e:
        print(f"Error reading {file}: {e}")

if df_list:
    inference_data = pd.concat(df_list, ignore_index=True)
    print(f"Inference DataFrame shape: {inference_data.shape}")
else:
    raise ValueError("No CSV files were successfully read for inference.")

# -----------------------------
# 4. Preprocess Inference Data
# -----------------------------
inference_data_processed = load_and_preprocess_df(
    inference_data,
    process_variance=opt_process_variance,
    measurement_variance=opt_measurement_variance,
    sg_window_length=opt_sg_window_length,
    sg_polyorder=opt_sg_polyorder
)
print("Processed inference data shape:", inference_data_processed.shape)

# -----------------------------
# 5. Extract Features and Create Sequences
# -----------------------------
features_df_infer = extract_sliding_window_features(inference_data_processed, window_size=50, step=25)
feature_columns = [
    'acc_magnitude', 'acc_std', 'acc_min', 'acc_max', 
    'acc_skew', 'acc_kurtosis',
    'jerk_mean', 'jerk_std',
    'z_anomaly_rate', 'z_diff_rate', 'std_anomaly_rate', 
    'g_zero_rate', 'isof_anomaly_rate'
]
X_seq_infer, features_df_infer = create_sequences(features_df_infer, feature_columns, sequence_length=10)
print("Inference sequence shape:", X_seq_infer.shape)

# -----------------------------
# 6. Load Trained Model and Predict (using native Keras format)
# -----------------------------
trained_model = tf.keras.models.load_model(r"C:\Users\Rick Halder\Desktop\SpeedyCare\Best Work\trained_hybrid_model.keras")
predictions = trained_model.predict(X_seq_infer)
predicted_labels = (predictions > 0.5).astype(int).flatten()

# -----------------------------
# 7. Merge Predictions into the Feature DataFrame
# -----------------------------
features_df_infer['Predicted_Pothole'] = predicted_labels

# Save the predicted inference data to a CSV file.
features_df_infer.to_csv("predicted_inference_data.csv", index=False)
print("Predicted inference data saved as 'predicted_inference_data.csv'")

# -----------------------------
# 8. Generate a Folium Map for Predicted Potholes
# -----------------------------
if features_df_infer['latitude'].notnull().any() and features_df_infer['longitude'].notnull().any():
    center_lat = features_df_infer['latitude'].mean()
    center_lon = features_df_infer['longitude'].mean()
else:
    center_lat, center_lon = 0, 0

m = folium.Map(location=[center_lat, center_lon], zoom_start=13)

for idx, row in features_df_infer.iterrows():
    if row['Predicted_Pothole'] == 1:
        folium.CircleMarker(
            location=[row['latitude'], row['longitude']],
            radius=5,
            color='red',
            fill=True,
            fill_color='red',
            fill_opacity=0.7,
            popup=f"Timestamp: {row['timestamp']}"
        ).add_to(m)

m.save("predicted_potholes_map.html")
print("Folium map saved as 'predicted_potholes_map.html'")


Loaded filtering parameters: {'process_variance': 0.0006715429033260829, 'measurement_variance': 0.05235353908705457, 'sg_window_length': 51, 'sg_polyorder': 1}
Inference DataFrame shape: (5000, 12)
Processed inference data shape: (5000, 26)
Inference sequence shape: (190, 10, 13)
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
Predicted inference data saved as 'predicted_inference_data.csv'
Folium map saved as 'predicted_potholes_map.html'
