**Old car Price Prediction**

**Step 1 : Data Processing**



1.   Load dataset
2.   Convert data types appropriately
3.   Handle missing values using mean




**Step 2. Exploratory Data Analysis**


1.   Feature relationships with price
2.   Correlation Analysis

**Step 3. Machine Learning Model Developement**

**Step 4. Data Visulization**

**Step 5. Price Prediction Filter**

**Final Code**

In [3]:
! pip install gradio

Collecting gradio
  Downloading gradio-5.25.2-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.8.0 (from gradio)
  Downloading gradio_client-1.8.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6 (

In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import gradio as gr
import time

# Load the dataset
path = "https://archive.ics.uci.edu/ml/machine-learning-databases/autos/imports-85.data"
df = pd.read_csv(path, header=None)

# Define column names
headers = ["symboling", "normalized_losses", "make", "fuel_type", "aspiration",
           "num_doors", "body_style", "drive_wheels", "engine_location",
           "wheel_base", "length", "width", "height", "curb_weight",
           "engine_type", "num_cylinders", "engine_size", "fuel_system",
           "bore", "stroke", "compression_ratio", "horsepower", "peak_rpm",
           "city_mpg", "highway_mpg", "price"]
df.columns = headers

# Data Cleaning
numeric_cols = ["normalized_losses", "bore", "stroke", "horsepower", "peak_rpm", "price"]
for col in numeric_cols:
    df[col] = pd.to_numeric(df[col], errors='coerce')
    df[col].fillna(df[col].mean(), inplace=True)

# Feature selection
features = ["normalized_losses", "wheel_base", "engine_size", "bore", "stroke",
            "compression_ratio", "horsepower", "peak_rpm"]
X = df[features]
Y = df["price"]

# Train Model
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25, random_state=5)
model = LinearRegression()
model.fit(X_train, Y_train)

# Prediction function with simulated delay and message
def predict_car_price(normalized_losses, wheel_base, engine_size, bore, stroke, compression_ratio, horsepower, peak_rpm):
    time.sleep(1.5)
    input_data = np.array([[normalized_losses, wheel_base, engine_size, bore, stroke, compression_ratio, horsepower, peak_rpm]])
    prediction = model.predict(input_data)
    return round(prediction[0], 2)

# Visualization functions

def price_by_make():
    fig, ax = plt.subplots(figsize=(10, 5))
    sns.boxplot(data=df, x='make', y='price', palette='Oranges', ax=ax)
    ax.set_title("Car Make vs Price")
    plt.xticks(rotation=45)
    return fig

def price_by_drive():
    fig, ax = plt.subplots(figsize=(7, 5))
    sns.boxplot(data=df, x='drive_wheels', y='price', palette='Oranges', ax=ax)
    ax.set_title("Drive Wheels vs Price")
    return fig

def price_distribution():
    fig, ax = plt.subplots(figsize=(8, 5))
    sns.histplot(df['price'], bins=30, kde=True, color="orange", ax=ax)
    ax.set_title("Car Price Distribution")
    return fig

def correlation_heatmap():
    fig, ax = plt.subplots(figsize=(10, 8))
    sns.heatmap(df[features + ["price"]].corr(), annot=True, cmap="Oranges", ax=ax)
    ax.set_title("Feature Correlation Heatmap")
    return fig

def fueltype_price_box():
    fig, ax = plt.subplots(figsize=(7, 5))
    sns.boxplot(data=df, x='fuel_type', y='price', palette='Oranges', ax=ax)
    ax.set_title("Fuel Type vs Price")
    return fig

def make_average_price():
    avg_price = df.groupby("make")["price"].mean().sort_values(ascending=False).head(10)
    fig, ax = plt.subplots(figsize=(9, 5))
    sns.barplot(x=avg_price.values, y=avg_price.index, palette="Oranges", ax=ax)
    ax.set_title("Top 10 Car Makes by Average Price")
    return fig

def horsepower_vs_price():
    fig, ax = plt.subplots(figsize=(8, 5))
    sns.scatterplot(data=df, x='horsepower', y='price', hue='fuel_type', palette='Oranges', ax=ax)
    ax.set_title("Horsepower vs Price")
    return fig

def engine_size_vs_price():
    fig, ax = plt.subplots(figsize=(8, 5))
    sns.scatterplot(data=df, x='engine_size', y='price', hue='make', palette='Oranges', legend=False, ax=ax)
    ax.set_title("Engine Size vs Price")
    return fig

def body_style_vs_price():
    fig, ax = plt.subplots(figsize=(7, 5))
    sns.boxplot(data=df, x='body_style', y='price', palette='Oranges', ax=ax)
    ax.set_title("Body Style vs Price")
    return fig

def get_visualization(chart_type):
    if chart_type == "Price Distribution":
        return price_distribution()
    elif chart_type == "Correlation Heatmap":
        return correlation_heatmap()
    elif chart_type == "Fuel Type vs Price":
        return fueltype_price_box()
    elif chart_type == "Top 10 Car Makes by Avg Price":
        return make_average_price()
    elif chart_type == "Horsepower vs Price":
        return horsepower_vs_price()
    elif chart_type == "Engine Size vs Price":
        return engine_size_vs_price()
    elif chart_type == "Body Style vs Price":
        return body_style_vs_price()
    elif chart_type == "Drive Type vs Price":
        return price_by_drive()
    elif chart_type == "Make vs Price":
        return price_by_make()

# Filter function with statistics

def filter_cars(min_price, max_price, fuel_type, body_style):
    filtered = df[(df['price'] >= min_price) & (df['price'] <= max_price)]
    if fuel_type != "All":
        filtered = filtered[filtered['fuel_type'] == fuel_type]
    if body_style != "All":
        filtered = filtered[filtered['body_style'] == body_style]

    stats = {
        "Total Cars": len(filtered),
        "Petrol Cars": len(filtered[filtered['fuel_type'] == 'gas']),
        "Diesel Cars": len(filtered[filtered['fuel_type'] == 'diesel']),
        "Hatchbacks": len(filtered[filtered['body_style'] == 'hatchback'])
    }

    stats_df = pd.DataFrame(list(stats.items()), columns=["Category", "Count"])

    return filtered[["make", "body_style", "fuel_type", "engine_size", "horsepower", "price"]].sort_values(by="price"), stats_df

# Gradio interface with enhancements
with gr.Blocks(theme=gr.themes.Soft(primary_hue="orange", font=["Verdana", "Arial", "sans-serif"])) as demo:
    gr.Markdown("""
    <div style='text-align:center;'>
        <img src='https://cdn-icons-png.flaticon.com/512/743/743008.png' width='80'/>
        <h1 style='color:#FF7F00; font-family:Verdana;'>🚗 Car Price Prediction Dashboard</h1>
        <p style='font-size:16px;'>✨ Predict car prices, explore features, and visualize insights ✨</p>
    </div>
    """)

    with gr.Tab("🔮 Predict Car Price"):
        with gr.Accordion("Enter Car Specifications", open=True):
            with gr.Row():
                nl = gr.Number(label="Normalized Losses", value=100)
                wb = gr.Number(label="Wheel Base", value=95)
                es = gr.Number(label="Engine Size", value=130)
                bore = gr.Number(label="Bore", value=3.2)
                stroke = gr.Number(label="Stroke", value=3.0)
            with gr.Row():
                cr = gr.Number(label="Compression Ratio", value=9.0)
                hp = gr.Number(label="Horsepower", value=120)
                rpm = gr.Number(label="Peak RPM", value=5200)

        predict_btn = gr.Button("🔍 Predict Now", variant="primary")
        price_output = gr.Number(label="Predicted Price (USD)", interactive=False)
        predict_btn.click(fn=predict_car_price, inputs=[nl, wb, es, bore, stroke, cr, hp, rpm], outputs=price_output)

    with gr.Tab("📊 Visual Insights"):
        chart_selector = gr.Dropdown(
            choices=[
                "Price Distribution",
                "Correlation Heatmap",
                "Fuel Type vs Price",
                "Top 10 Car Makes by Avg Price",
                "Horsepower vs Price",
                "Engine Size vs Price",
                "Body Style vs Price",
                "Drive Type vs Price",
                "Make vs Price"
            ],
            label="Select a Chart",
            value="Price Distribution"
        )
        chart_output = gr.Plot()
        chart_selector.change(fn=get_visualization, inputs=chart_selector, outputs=chart_output)

    with gr.Tab("🎯 Filter Cars"):
        gr.Markdown("<h3 style='color:#FF7F00;'>Filter Cars by Attributes</h3>")
        min_p = gr.Number(label="Minimum Price ($)", value=5000)
        max_p = gr.Number(label="Maximum Price ($)", value=20000)
        fuel_filter = gr.Dropdown(choices=["All"] + sorted(df['fuel_type'].dropna().unique()), label="Fuel Type", value="All")
        body_filter = gr.Dropdown(choices=["All"] + sorted(df['body_style'].dropna().unique()), label="Body Style", value="All")
        filter_btn = gr.Button("Filter Now")
        filter_table = gr.Dataframe()
        stats_table = gr.Dataframe(label="Filter Statistics")
        filter_btn.click(fn=filter_cars, inputs=[min_p, max_p, fuel_filter, body_filter], outputs=[filter_table, stats_table])

# Launch
if __name__ == "__main__":
    demo.launch()

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(df[col].mean(), inplace=True)


It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://144e421815bce5abb3.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
