In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np
import pickle

# Sample data loading (replace with your actual path)
data = pd.read_excel('/content/drive/MyDrive/CarDekho/Colab/FeaturesEngineered.xlsx')

# Define columns
important_numerical_cols = ['Width', 'MaxPower', 'ManufactureYear', 'TurningRadius', 'KilometersDriven', 'Length',
                            'WheelBase', 'KerbWeight', 'Torque', 'Engine', 'Height', 'Acceleration', 'Mileage',
                            'CargoVolume', 'PreviousOwners', 'Seats', 'Doors', 'Car_Age', 'TopSpeed', 'Mileage_per_Year']
important_categorical_cols = ['City', 'FuelType', 'BodyType', 'manufacturer', 'CarModel', 'VariantName', 'Color',
                              'EngineType', 'TransmissionType', 'DriveType', 'FuelSupplySystem', 'RearBrakeType',
                              'TyreType', 'SteeringType', 'HeadLights', 'Locking', 'GearBox', 'Insurance']

# Define X and y
X = data[important_numerical_cols + important_categorical_cols]
y = data['Price']

# Separate numerical and categorical data
X_numerical = X[important_numerical_cols]
X_categorical = X[important_categorical_cols]

# Scale numerical data
scaler = StandardScaler()
X_numerical_scaled = scaler.fit_transform(X_numerical)

# Encode categorical data
encoder = OneHotEncoder(handle_unknown='ignore')
X_categorical_encoded = encoder.fit_transform(X_categorical).toarray()

# Combine processed numerical and categorical data
X_processed = np.hstack([X_numerical_scaled, X_categorical_encoded])

# Split data
X_train, X_test, y_train, y_test = train_test_split(X_processed, y, test_size=0.3, random_state=0)

# Train the model
model = RandomForestRegressor()
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Test MAE: {mae:.4f}")
print(f"Test MSE: {mse:.4f}")
print(f"Test R^2: {r2:.4f}")

# Save the trained model, scaler, and encoder
with open('car_price_model.pkl', 'wb') as file:
    pickle.dump(model, file)

with open('scaler.pkl', 'wb') as file:
    pickle.dump(scaler, file)

with open('encoder.pkl', 'wb') as file:
    pickle.dump(encoder, file)

print("Model, scaler, and encoder saved successfully.")


Test MAE: 0.7609
Test MSE: 1.7960
Test R^2: 0.9222
Model, scaler, and encoder saved successfully.


In [2]:
!pip install streamlit

Collecting streamlit
  Downloading streamlit-1.38.0-py2.py3-none-any.whl.metadata (8.5 kB)
Collecting tenacity<9,>=8.1.0 (from streamlit)
  Downloading tenacity-8.5.0-py3-none-any.whl.metadata (1.2 kB)
Collecting gitpython!=3.1.19,<4,>=3.0.7 (from streamlit)
  Downloading GitPython-3.1.43-py3-none-any.whl.metadata (13 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Collecting watchdog<5,>=2.1.5 (from streamlit)
  Downloading watchdog-4.0.2-py3-none-manylinux2014_x86_64.whl.metadata (38 kB)
Collecting gitdb<5,>=4.0.1 (from gitpython!=3.1.19,<4,>=3.0.7->streamlit)
  Downloading gitdb-4.0.11-py3-none-any.whl.metadata (1.2 kB)
Collecting smmap<6,>=3.0.1 (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit)
  Downloading smmap-5.0.1-py3-none-any.whl.metadata (4.3 kB)
Downloading streamlit-1.38.0-py2.py3-none-any.whl (8.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.7/8.7 MB[0m [31m40.7 MB

In [3]:
import streamlit as st
import pandas as pd
import numpy as np
import pickle

# Load the saved model, scaler, and encoder
with open('car_price_model.pkl', 'rb') as file:
    model = pickle.load(file)

with open('scaler.pkl', 'rb') as file:
    scaler = pickle.load(file)

with open('encoder.pkl', 'rb') as file:
    encoder = pickle.load(file)

# Define feature input function
def get_user_input():
    st.sidebar.header("Enter Car Features")

    # Numerical features input
    width = st.sidebar.number_input("Width", min_value=1000, max_value=2500, value=1750)
    max_power = st.sidebar.number_input("Max Power (hp)", min_value=30, max_value=1000, value=150)
    manufacture_year = st.sidebar.slider("Manufacture Year", min_value=1990, max_value=2024, value=2020)
    turning_radius = st.sidebar.number_input("Turning Radius (m)", min_value=4.0, max_value=12.0, value=5.5)
    kilometers_driven = st.sidebar.number_input("Kilometers Driven", min_value=0, max_value=500000, value=50000)
    length = st.sidebar.number_input("Length (mm)", min_value=3000, max_value=6000, value=4000)
    wheel_base = st.sidebar.number_input("Wheel Base (mm)", min_value=2000, max_value=4000, value=2500)
    kerb_weight = st.sidebar.number_input("Kerb Weight (kg)", min_value=500, max_value=4000, value=1200)
    torque = st.sidebar.number_input("Torque (Nm)", min_value=50, max_value=1000, value=200)
    engine = st.sidebar.number_input("Engine Capacity (cc)", min_value=500, max_value=5000, value=1500)
    height = st.sidebar.number_input("Height (mm)", min_value=1000, max_value=3000, value=1500)
    acceleration = st.sidebar.number_input("Acceleration (0-100 km/h in seconds)", min_value=2.0, max_value=20.0, value=10.0)
    mileage = st.sidebar.number_input("Mileage (kmpl)", min_value=5.0, max_value=50.0, value=15.0)
    cargo_volume = st.sidebar.number_input("Cargo Volume (liters)", min_value=50, max_value=1000, value=300)
    previous_owners = st.sidebar.slider("Previous Owners", min_value=0, max_value=10, value=1)
    seats = st.sidebar.slider("Number of Seats", min_value=2, max_value=10, value=5)
    doors = st.sidebar.slider("Number of Doors", min_value=2, max_value=5, value=4)
    car_age = st.sidebar.slider("Car Age (years)", min_value=0, max_value=30, value=5)
    top_speed = st.sidebar.number_input("Top Speed (km/h)", min_value=50, max_value=400, value=180)
    mileage_per_year = st.sidebar.number_input("Mileage per Year (km)", min_value=0, max_value=50000, value=10000)

    # Categorical features input
    city = st.sidebar.selectbox("City", ['CityA', 'CityB', 'CityC'])  # Replace with actual city options
    fuel_type = st.sidebar.selectbox("Fuel Type", ['Petrol', 'Diesel', 'Electric', 'Hybrid'])
    body_type = st.sidebar.selectbox("Body Type", ['Sedan', 'SUV', 'Hatchback', 'Convertible', 'Coupe'])
    manufacturer = st.sidebar.selectbox("Manufacturer", ['Toyota', 'Ford', 'BMW', 'Audi', 'Mercedes'])  # Replace with actual options
    car_model = st.sidebar.text_input("Car Model", "ModelX")  # Example placeholder
    variant_name = st.sidebar.text_input("Variant Name", "VariantY")  # Example placeholder
    color = st.sidebar.selectbox("Color", ['Red', 'Blue', 'Green', 'Black', 'White'])
    engine_type = st.sidebar.selectbox("Engine Type", ['V6', 'V8', 'Electric', 'Hybrid'])
    transmission_type = st.sidebar.selectbox("Transmission Type", ['Manual', 'Automatic', 'Semi-Automatic'])
    drive_type = st.sidebar.selectbox("Drive Type", ['FWD', 'RWD', 'AWD'])
    fuel_supply_system = st.sidebar.selectbox("Fuel Supply System", ['Direct', 'Indirect'])
    rear_brake_type = st.sidebar.selectbox("Rear Brake Type", ['Drum', 'Disc'])
    tyre_type = st.sidebar.selectbox("Tyre Type", ['Radial', 'Cross Ply'])
    steering_type = st.sidebar.selectbox("Steering Type", ['Power', 'Manual'])
    head_lights = st.sidebar.selectbox("Head Lights", ['LED', 'Halogen'])
    locking = st.sidebar.selectbox("Locking", ['Central', 'Remote'])
    gear_box = st.sidebar.selectbox("Gear Box", ['5-speed', '6-speed'])

    # Create a dictionary of inputs
    features = {
        'Width': width,
        'MaxPower': max_power,
        'ManufactureYear': manufacture_year,
        'TurningRadius': turning_radius,
        'KilometersDriven': kilometers_driven,
        'Length': length,
        'WheelBase': wheel_base,
        'KerbWeight': kerb_weight,
        'Torque': torque,
        'Engine': engine,
        'Height': height,
        'Acceleration': acceleration,
        'Mileage': mileage,
        'CargoVolume': cargo_volume,
        'PreviousOwners': previous_owners,
        'Seats': seats,
        'Doors': doors,
        'Car_Age': car_age,
        'TopSpeed': top_speed,
        'Mileage_per_Year': mileage_per_year,
        'City': city,
        'FuelType': fuel_type,
        'BodyType': body_type,
        'manufacturer': manufacturer,
        'CarModel': car_model,
        'VariantName': variant_name,
        'Color': color,
        'EngineType': engine_type,
        'TransmissionType': transmission_type,
        'DriveType': drive_type,
        'FuelSupplySystem': fuel_supply_system,
        'RearBrakeType': rear_brake_type,
        'TyreType': tyre_type,
        'SteeringType': steering_type,
        'HeadLights': head_lights,
        'Locking': locking,
        'GearBox': gear_box
    }

    return pd.DataFrame(features, index=[0])

# Predict function
def predict_price(features):
    # Separate numerical and categorical features
    numerical_features = features[important_numerical_cols]
    categorical_features = features[important_categorical_cols]

    # Scale numerical features
    numerical_features_scaled = scaler.transform(numerical_features)

    # Encode categorical features
    categorical_features_encoded = encoder.transform(categorical_features).toarray()

    # Combine scaled and encoded features
    features_processed = np.hstack([numerical_features_scaled, categorical_features_encoded])

    # Predict the price
    prediction = model.predict(features_processed)

    return prediction[0]

# Streamlit UI
st.title("Car Price Prediction App")
st.write("Enter the car features in the sidebar and get an estimated price.")

# Get user input
user_input = get_user_input()

# Predict the price
if st.button("Predict Price"):
    try:
        prediction = predict_price(user_input)
        st.success(f"The estimated price of the car is: ${prediction:.2f}")
    except Exception as e:
        st.error(f"Error in prediction: {e}")


2024-08-31 14:31:31.628 
  command:

    streamlit run /usr/local/lib/python3.10/dist-packages/colab_kernel_launcher.py [ARGUMENTS]
2024-08-31 14:31:31.656 Session state does not function when running a script without `streamlit run`
