In [4]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt

# Load data
df = pd.read_csv("Quikr_car.csv")

# Clean and preprocess data
df = df[df['Price'] != 'Ask For Price']
df['Price'] = df['Price'].str.replace(',', '').astype(int)
df = df[df['Year'].str.isnumeric()]
df['Year'] = df['Year'].astype(int)
df = df[df['Kms_driven'].notnull()]
df['Kms_driven'] = df['Kms_driven'].str.replace(',', '').str.extract('(\d+)').astype(float)
df = df[df['Fuel_type'].notnull()]
df['Company'] = df['Name'].str.split().str[0]
df['Model'] = df['Name'].str.split().str[1]
df = df[['Company', 'Model', 'Year', 'Price', 'Kms_driven', 'Fuel_type']]
df.dropna(inplace=True)

# Features and labels
X = df[['Company', 'Model', 'Year', 'Kms_driven', 'Fuel_type']]
y = df['Price']

# One-hot encoding for categorical variables
X = pd.get_dummies(X, drop_first=True)

# Train-test split and model training
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)
model = LinearRegression()
model.fit(X_train, y_train)

# --------- Feature 1: Price Comparison with Market Average ----------
def compare_with_market(company, model_name, year, predicted_price):
    filtered = df[(df['Company'] == company) & (df['Model'] == model_name) & (df['Year'] == year)]
    if len(filtered) == 0:
        return "Market data not available"
    avg_market_price = filtered['Price'].mean()
    if predicted_price > avg_market_price * 1.10:
        return "Above Market Average"
    elif predicted_price < avg_market_price * 0.90:
        return "Below Market Average"
    else:
        return "At Market Rate"

# --------- Feature 2: Predict Resale Price After X Years ----------
def predict_resale_price(current_price, years=2, annual_depreciation_rate=0.15):
    return round(current_price * ((1 - annual_depreciation_rate) ** years), 2)

# --------- Example Usage ----------
# Input example
company = "Maruti"
model_name = "800"
year = 2010
kms_driven = 40000
fuel_type = "Petrol"

# Prepare input for model
input_df = pd.DataFrame([[company, model_name, year, kms_driven, fuel_type]],
                        columns=['Company', 'Model', 'Year', 'Kms_driven', 'Fuel_type'])
input_encoded = pd.get_dummies(input_df, drop_first=True).reindex(columns=X.columns, fill_value=0)

# Predict price
predicted_price = model.predict(input_encoded)[0]
print(f"Predicted Price: ₹{int(predicted_price)}")

# Feature 1: Market comparison
market_comparison = compare_with_market(company, model_name, year, predicted_price)
print("Market Comparison:", market_comparison)

# Feature 2: Resale price after 2 years
future_price = predict_resale_price(predicted_price, years=2)
print(f"Estimated Resale Price after 2 years: ₹{future_price}")


  df['Kms_driven'] = df['Kms_driven'].str.replace(',', '').str.extract('(\d+)').astype(float)


ValueError: invalid literal for int() with base 10: '₹380000'