In [8]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [9]:
!pip install findspark
!pip install pyspark

import os
import findspark
from pyspark.sql import SparkSession

if 'spark' not in globals():
    print("Spark session not found, creating a new one...")
    findspark.init()
    spark = (
        SparkSession.builder
        .appName("CarInsuranceClaimEstimator")
        .config("spark.driver.memory", "4g")
        .config("spark.executor.memory", "4g")
        # Removed the HDFS configuration, as we intend to save locally.
        #.config("spark.hadoop.fs.defaultFS", "hdfs://localhost:9000")
        .getOrCreate()
    )
    print("Spark session created.")
else:
    print("Spark session already exists.")

Spark session already exists.


In [10]:
from pyspark.ml import PipelineModel

MODEL_PATH = "/content/drive/MyDrive/car_insurance_gbt_model_saved"

gbt_live_model = PipelineModel.load(MODEL_PATH)
print("Model loaded successfully!")


Model loaded successfully!


In [11]:
import joblib

KNN_PATH = "/content/drive/MyDrive/damage_type_knn.pkl"
knn_clf = joblib.load(KNN_PATH)

print("KNN classifier loaded!")


KNN classifier loaded!


In [12]:
!pip install -q gradio

import os
import numpy as np
import gradio as gr
import joblib

from pyspark.ml import PipelineModel
from pyspark.sql.types import (
    StructType, StructField, StringType,
    IntegerType, FloatType, ArrayType
)
from pyspark.sql.functions import udf
from pyspark.ml.linalg import Vectors, VectorUDT

from tensorflow.keras.applications.efficientnet import EfficientNetB0, preprocess_input
from tensorflow.keras.preprocessing import image


# GBT_PATH = "/content/drive/MyDrive/car_insurance_gbt_model_saved"
KNN_PATH = "/content/drive/MyDrive/damage_type_knn.pkl"

print("Loading GBT regression model...")
gbt_live_model = PipelineModel.load(GBT_PATH)
print("âœ” Loaded GBT model")

print("Loading KNN damage-type classifier...")
knn_clf = joblib.load(KNN_PATH)
print("âœ” Loaded KNN model")

print("Loading EfficientNet feature extractor...")
cnn_live = EfficientNetB0(weights="imagenet", include_top=False, pooling="avg")
print("âœ” Loaded CNN model")



# 2. Convert uploaded image -> EfficientNet feature vector

def pil_to_features(pil_img):
    pil_img = pil_img.convert("RGB").resize((224, 224))
    x = np.array(pil_img).astype("float32")
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    features = cnn_live.predict(x, verbose=0)[0]
    return features.astype("float32")



# 3. MAIN PREDICTION FUNCTION

def predict_claim(
    img,
    driver_age,
    car_year,
    mileage,
    auto_damage_type,
    manual_damage_type
):
    if img is None:
        return "âš  Please upload a car damage image."

    # A. Extract image features
    features = pil_to_features(img)

    # B. Compute severity using vector math
    vector_magnitude = float(np.linalg.norm(features))
    vector_variance  = float(np.var(features))

    damage_index = 0.6 * vector_magnitude + 50.0 * vector_variance
    severity_raw = np.log1p(damage_index) / 2.0
    severity_score = float(np.clip(severity_raw, 0.5, 5.0))

    # C. Determine damage type
    if auto_damage_type:
        damage_type = knn_clf.predict(features.reshape(1, -1))[0]
        source = "Auto (KNN)"
    else:
        damage_type = manual_damage_type
        source = "Manual selection"

    # D. Spark dataframe for GBT
    schema = StructType([
        StructField("claim_id",       StringType(), True),
        StructField("image_path",     StringType(), True),
        StructField("features",       ArrayType(FloatType()), True),
        StructField("driver_age",     IntegerType(), True),
        StructField("car_year",       IntegerType(), True),
        StructField("mileage",        IntegerType(), True),
        StructField("damage_type",    StringType(), True),
        StructField("severity_score", FloatType(), True),
    ])

    row = [(
        "ui_claim_1",
        "uploaded_image",
        features.tolist(),
        int(driver_age),
        int(car_year),
        int(mileage),
        str(damage_type),
        float(severity_score),
    )]

    sdf = spark.createDataFrame(row, schema)
    to_vec = udf(lambda xs: Vectors.dense(xs), VectorUDT())
    sdf = sdf.withColumn("features_vec", to_vec("features"))

    # E. Price prediction
    pred_df = gbt_live_model.transform(sdf)
    result = pred_df.select("damage_type", "severity_score", "prediction").collect()[0]
    cost = float(result["prediction"])

    # F. Format output
    output = []
    output.append("###  Image Vector Analysis")
    output.append(f"- Magnitude: **{vector_magnitude:.4f}**")
    output.append(f"- Variance: **{vector_variance:.6f}**")
    output.append(f"- Damage Index: **{damage_index:.4f}**\n")

    output.append("###  AI Diagnosis")
    output.append(f"- Damage Type: **{damage_type}**  _(Source: {source})_")
    output.append(f"- Severity Score: **{severity_score:.2f} / 5.0**\n")

    output.append("###  Vehicle Info")
    output.append(f"- Driver Age: **{driver_age}**")
    output.append(f"- Car Year: **{car_year}**")
    output.append(f"- Mileage: **{int(mileage):,} miles**\n")

    output.append("###  Estimated Repair Cost")
    output.append(f"- **${cost:,.2f}**")

    if cost > 5000:
        output.append("âž¡ **Conclusion: MAJOR DAMAGE / POSSIBLE TOTAL LOSS**")
    elif cost > 2500:
        output.append("âž¡ **Conclusion: Significant Repair Required**")
    else:
        output.append("âž¡ **Conclusion: Minor to Moderate Repair**")

    return "\n".join(output)



# 4. BUILD GRADIO INTERFACE

damage_options = ["scratch", "crack", "dent", "tire_flat", "glass_shatter"]

iface = gr.Interface(
    fn=predict_claim,
    inputs=[
        gr.Image(type="pil", label="Upload a damaged car image"),
        gr.Slider(18, 80, value=35, step=1, label="Driver Age"),
        gr.Slider(2000, 2024, value=2015, step=1, label="Car Year"),
        gr.Number(value=60000, label="Mileage"),
        gr.Checkbox(value=True, label="Auto-detect damage type"),
        gr.Dropdown(damage_options, value="scratch",
                    label="Manual damage type (if auto OFF)"),
    ],
    outputs=gr.Markdown(label="AI Assessment"),
    title="ðŸš— AI Car Damage Claim Estimator",
    description="Upload a photo and enter car details to estimate repair cost.",
)

iface.launch()


Loading GBT regression model...
âœ” Loaded GBT model
Loading KNN damage-type classifier...
âœ” Loaded KNN model
Loading EfficientNet feature extractor...
âœ” Loaded CNN model
It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://5160666987a596bff8.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


