In [2]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from datetime import datetime

In [3]:
df=pd.read_csv('./CAR DETAILS FROM CAR DEKHO.csv')
df

Unnamed: 0,name,year,selling_price,km_driven,fuel,seller_type,transmission,owner
0,Maruti 800 AC,2007,60000,70000,Petrol,Individual,Manual,First Owner
1,Maruti Wagon R LXI Minor,2007,135000,50000,Petrol,Individual,Manual,First Owner
2,Hyundai Verna 1.6 SX,2012,600000,100000,Diesel,Individual,Manual,First Owner
3,Datsun RediGO T Option,2017,250000,46000,Petrol,Individual,Manual,First Owner
4,Honda Amaze VX i-DTEC,2014,450000,141000,Diesel,Individual,Manual,Second Owner
...,...,...,...,...,...,...,...,...
4335,Hyundai i20 Magna 1.4 CRDi (Diesel),2014,409999,80000,Diesel,Individual,Manual,Second Owner
4336,Hyundai i20 Magna 1.4 CRDi,2014,409999,80000,Diesel,Individual,Manual,Second Owner
4337,Maruti 800 AC BSIII,2009,110000,83000,Petrol,Individual,Manual,Second Owner
4338,Hyundai Creta 1.6 CRDi SX Option,2016,865000,90000,Diesel,Individual,Manual,First Owner


In [4]:
cur=datetime.now().year
df['vehicle_age']=cur-df['year']

In [5]:
last_column=df.pop('selling_price')

In [6]:
df.insert(8, 'selling_price', last_column) 

In [7]:
df

Unnamed: 0,name,year,km_driven,fuel,seller_type,transmission,owner,vehicle_age,selling_price
0,Maruti 800 AC,2007,70000,Petrol,Individual,Manual,First Owner,17,60000
1,Maruti Wagon R LXI Minor,2007,50000,Petrol,Individual,Manual,First Owner,17,135000
2,Hyundai Verna 1.6 SX,2012,100000,Diesel,Individual,Manual,First Owner,12,600000
3,Datsun RediGO T Option,2017,46000,Petrol,Individual,Manual,First Owner,7,250000
4,Honda Amaze VX i-DTEC,2014,141000,Diesel,Individual,Manual,Second Owner,10,450000
...,...,...,...,...,...,...,...,...,...
4335,Hyundai i20 Magna 1.4 CRDi (Diesel),2014,80000,Diesel,Individual,Manual,Second Owner,10,409999
4336,Hyundai i20 Magna 1.4 CRDi,2014,80000,Diesel,Individual,Manual,Second Owner,10,409999
4337,Maruti 800 AC BSIII,2009,83000,Petrol,Individual,Manual,Second Owner,15,110000
4338,Hyundai Creta 1.6 CRDi SX Option,2016,90000,Diesel,Individual,Manual,First Owner,8,865000


In [8]:
X=df.iloc[:,1:-1]

In [9]:
X

Unnamed: 0,year,km_driven,fuel,seller_type,transmission,owner,vehicle_age
0,2007,70000,Petrol,Individual,Manual,First Owner,17
1,2007,50000,Petrol,Individual,Manual,First Owner,17
2,2012,100000,Diesel,Individual,Manual,First Owner,12
3,2017,46000,Petrol,Individual,Manual,First Owner,7
4,2014,141000,Diesel,Individual,Manual,Second Owner,10
...,...,...,...,...,...,...,...
4335,2014,80000,Diesel,Individual,Manual,Second Owner,10
4336,2014,80000,Diesel,Individual,Manual,Second Owner,10
4337,2009,83000,Petrol,Individual,Manual,Second Owner,15
4338,2016,90000,Diesel,Individual,Manual,First Owner,8


In [10]:
y = df.iloc[:,-1]

In [11]:
y

0        60000
1       135000
2       600000
3       250000
4       450000
         ...  
4335    409999
4336    409999
4337    110000
4338    865000
4339    225000
Name: selling_price, Length: 4340, dtype: int64

In [12]:
categorical_cols = ['fuel', 'seller_type', 'transmission', 'owner']
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(), categorical_cols)
    ],
    remainder='passthrough'
)

# Create the pipeline with preprocessor and linear regression model
regr = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression())
])

In [13]:
X

Unnamed: 0,year,km_driven,fuel,seller_type,transmission,owner,vehicle_age
0,2007,70000,Petrol,Individual,Manual,First Owner,17
1,2007,50000,Petrol,Individual,Manual,First Owner,17
2,2012,100000,Diesel,Individual,Manual,First Owner,12
3,2017,46000,Petrol,Individual,Manual,First Owner,7
4,2014,141000,Diesel,Individual,Manual,Second Owner,10
...,...,...,...,...,...,...,...
4335,2014,80000,Diesel,Individual,Manual,Second Owner,10
4336,2014,80000,Diesel,Individual,Manual,Second Owner,10
4337,2009,83000,Petrol,Individual,Manual,Second Owner,15
4338,2016,90000,Diesel,Individual,Manual,First Owner,8


In [14]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,random_state=0)

In [15]:
regr.fit(X_train,y_train)

Pipeline(steps=[('preprocessor',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('cat', OneHotEncoder(),
                                                  ['fuel', 'seller_type',
                                                   'transmission',
                                                   'owner'])])),
                ('regressor', LinearRegression())])

In [16]:
y_pred=regr.predict(X_test)

In [17]:
accuracyr2=r2_score(y_test,y_pred)

In [18]:
print(f"R2 Accuracy = {accuracyr2*100:.2f}%")

R2 Accuracy = 48.47%


In [23]:
yr=int(input("Enter Year of Car Purchase"))
km=int(input("Enter Number of kilometers Car Driven"))
age=cur-yr


In [24]:
sample_input = pd.DataFrame({
    'year': [yr],
    'km_driven': [km],
    'fuel': ['Petrol'],
    'seller_type': ['Dealer'],
    'transmission': ['Manual'],
    'owner': ['Second Owner'],
    'vehicle_age':[age]
})

In [25]:
# Use the trained model to make predictions on the sample input
sample_pred = regr.predict(sample_input)

# Print the predicted selling price
print(f'Predicted Selling Price = Rs {sample_pred[0]:.2f} /-')

Predicted Selling Price = Rs 263275.59 /-
