In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error, r2_score
import joblib # For saving/loading models and scalers
import os
import numpy as np

# --- Configuration ---
# Path to the CSV file generated by the previous feature extraction script
# IMPORTANT: Adjust this path if your CSV is in a different location.
CSV_FILE_PATH = "C:/Users/BCI-Lab/Downloads/teamA_dataset/_out_dataset/lane_line_features_extracted.csv"

# Output paths for the trained model and scaler
MODEL_OUTPUT_DIR = "trained_model"
SVR_MODEL_FILENAME = "svr_deviation_model.joblib"
SCALER_FILENAME = "feature_scaler.joblib"

# --- Main Script ---
if __name__ == "__main__":
    print("--- Starting SVR Model Training for Lane Deviation ---")

    # Ensure the output directory exists
    os.makedirs(MODEL_OUTPUT_DIR, exist_ok=True)
    model_save_path = os.path.join(MODEL_OUTPUT_DIR, SVR_MODEL_FILENAME)
    scaler_save_path = os.path.join(MODEL_OUTPUT_DIR, SCALER_FILENAME)

    # 1. Load Data
    try:
        df = pd.read_csv(CSV_FILE_PATH)
        print(f"Successfully loaded data from: {CSV_FILE_PATH}")
        print(f"Initial dataset shape: {df.shape}")
    except FileNotFoundError:
        print(f"ERROR: CSV file not found at '{CSV_FILE_PATH}'.")
        print("Please ensure the feature extraction script has been run and the path is correct.")
        exit()

    # 2. Handle Missing Values and Define Features/Target
    # The 'deviation_score' is NaN when no line is detected.
    # For training, we should only use instances where a line was actually detected.
    df_cleaned = df.dropna(subset=['deviation_score']).copy() # Drop rows where target is NaN

    # Ensure other features also have appropriate values (e.g., 0 or -1 where no line was detected)
    # The feature extraction script already assigns these default values, so we just use them.

    # Define features (X) and target (y)
    # Exclude 'filename' as it's not a numerical feature for the model.
    # Exclude 'deviation_score' as it's our target variable.
    # 'centroid_x_cropped' can be redundant with 'deviation_score' but might add context, let's keep it.
    
    feature_columns = [
        'is_line_detected',
        'centroid_x_cropped',
        'centroid_y_cropped',
        'num_line_segments',
        'total_line_pixel_length',
        'avg_line_angle_deg',
        'std_line_angle_deg',
        'min_line_y_cropped',
        'max_line_y_cropped',
        'fitted_line_slope_deg',
        'fitted_line_x_intercept_at_bottom_cropped',
        'fitted_line_x_intercept_at_top_cropped'
    ]

    # Filter out columns that might not exist if data extraction had issues
    feature_columns = [col for col in feature_columns if col in df_cleaned.columns]
    
    X = df_cleaned[feature_columns]
    y = df_cleaned['deviation_score']

    print(f"\nDataset shape after dropping rows with NaN deviation score: {df_cleaned.shape}")
    print(f"Features (X) shape: {X.shape}, Target (y) shape: {y.shape}")
    print(f"Features being used: {feature_columns}")

    if X.empty:
        print("ERROR: No valid data remaining after cleaning. Cannot train the model.")
        exit()

    # 3. Data Preprocessing - Scaling Features
    # SVR is sensitive to feature scaling.
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    
    print("\nFeatures scaled using StandardScaler.")

    # 4. Train-Test Split
    # Using a random state for reproducibility
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

    print(f"Training data shape: {X_train.shape}, Test data shape: {X_test.shape}")

    # 5. SVR Model Training
    # SVR with RBF kernel is a good starting point.
    # C: Regularization parameter. Higher C means less regularization.
    # epsilon: Epsilon-tube within which no penalty is associated in the training loss function.
    #          Determines the margin of tolerance.
    svr_model = SVR(kernel='linear', C=1.0, epsilon=0.05)
    
    print("\nTraining SVR model...")
    svr_model.fit(X_train, y_train)
    print("SVR model training complete.")

    # 6. Evaluation
    y_pred = svr_model.predict(X_test)

    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    print("\n--- Model Evaluation on Test Set ---")
    print(f"Mean Absolute Error (MAE): {mae:.4f}")
    print(f"R-squared (R2) Score: {r2:.4f}")
    print("\nNote: MAE represents the average absolute difference between predicted and actual values.")
    print("R-squared indicates the proportion of variance in the target that is predictable from the features.")

    # 7. Model and Scaler Dumping
    joblib.dump(svr_model, model_save_path)
    joblib.dump(scaler, scaler_save_path)

    print(f"\n✅ Trained SVR model dumped to: {model_save_path}")
    print(f"✅ Fitted StandardScaler dumped to: {scaler_save_path}")
    print("\n--- Training Complete ---")
    print("You can now load these files in a new script to make predictions on new, unseen data.")

--- Starting SVR Model Training for Lane Deviation ---
Successfully loaded data from: C:/Users/BCI-Lab/Downloads/teamA_dataset/_out_dataset/lane_line_features_extracted.csv
Initial dataset shape: (1291, 14)

Dataset shape after dropping rows with NaN deviation score: (923, 14)
Features (X) shape: (923, 12), Target (y) shape: (923,)
Features being used: ['is_line_detected', 'centroid_x_cropped', 'centroid_y_cropped', 'num_line_segments', 'total_line_pixel_length', 'avg_line_angle_deg', 'std_line_angle_deg', 'min_line_y_cropped', 'max_line_y_cropped', 'fitted_line_slope_deg', 'fitted_line_x_intercept_at_bottom_cropped', 'fitted_line_x_intercept_at_top_cropped']

Features scaled using StandardScaler.
Training data shape: (738, 12), Test data shape: (185, 12)

Training SVR model...
SVR model training complete.

--- Model Evaluation on Test Set ---
Mean Absolute Error (MAE): 0.0217
R-squared (R2) Score: 0.9902

Note: MAE represents the average absolute difference between predicted and actua