Importing Dependencies

In [95]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.metrics import r2_score

Loading the dataset

In [96]:
car_data = pd.read_csv("/content/sample_data/CAR DETAILS FROM CAR DEKHO.csv")

In [97]:
car_data.head()

Unnamed: 0,name,year,selling_price,km_driven,fuel,seller_type,transmission,owner
0,Maruti 800 AC,2007,60000,70000,Petrol,Individual,Manual,First Owner
1,Maruti Wagon R LXI Minor,2007,135000,50000,Petrol,Individual,Manual,First Owner
2,Hyundai Verna 1.6 SX,2012,600000,100000,Diesel,Individual,Manual,First Owner
3,Datsun RediGO T Option,2017,250000,46000,Petrol,Individual,Manual,First Owner
4,Honda Amaze VX i-DTEC,2014,450000,141000,Diesel,Individual,Manual,Second Owner


In [98]:
car_data.isnull().sum()

Unnamed: 0,0
name,0
year,0
selling_price,0
km_driven,0
fuel,0
seller_type,0
transmission,0
owner,0


In [99]:
car_data.describe()

Unnamed: 0,year,selling_price,km_driven
count,4340.0,4340.0,4340.0
mean,2013.090783,504127.3,66215.777419
std,4.215344,578548.7,46644.102194
min,1992.0,20000.0,1.0
25%,2011.0,208749.8,35000.0
50%,2014.0,350000.0,60000.0
75%,2016.0,600000.0,90000.0
max,2020.0,8900000.0,806599.0


Data Prepocessing

In [100]:
encoder = LabelEncoder()
car_data['fuel'] = encoder.fit_transform(car_data['fuel'])
car_data['seller_type'] = encoder.fit_transform(car_data['seller_type'])
car_data['transmission'] = encoder.fit_transform(car_data['transmission'])
car_data['owner'] = encoder.fit_transform(car_data['owner'])

In [101]:
car_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4340 entries, 0 to 4339
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   name           4340 non-null   object
 1   year           4340 non-null   int64 
 2   selling_price  4340 non-null   int64 
 3   km_driven      4340 non-null   int64 
 4   fuel           4340 non-null   int64 
 5   seller_type    4340 non-null   int64 
 6   transmission   4340 non-null   int64 
 7   owner          4340 non-null   int64 
dtypes: int64(7), object(1)
memory usage: 271.4+ KB


Splitting the data

In [102]:
X = car_data.drop(columns=['name','selling_price'], axis=1)
Y = car_data['selling_price']

In [103]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.1, random_state=2)

In [104]:
print(X.shape, X_train.shape, X_test.shape)

(4340, 6) (3906, 6) (434, 6)


Model Training

1. Linear Regression

In [105]:
model = LinearRegression()
model.fit(X_train, Y_train)

In [106]:
train_pred = model.predict(X_train)
test_pred = model.predict(X_test)

In [107]:
print("Training r2 using Linear regression : ", r2_score(Y_train, train_pred))
print("Testing r2 using Linear regression : ", r2_score(Y_test, test_pred))

Training r2 using Linear regression :  0.4414460114370832
Testing r2 using Linear regression :  0.5148590930573664


2. Lasso

In [108]:
lasso = Lasso(alpha=0.1)
lasso.fit(X_train, Y_train)

In [109]:
train_pred = lasso.predict(X_train)
test_pred = lasso.predict(X_test)

In [110]:
print("Training r2 using Lasso regression : ", r2_score(Y_train, train_pred))
print("Testing r2 using Lasso regression : ", r2_score(Y_test, test_pred))

Training r2 using Lasso regression :  0.4414460114366493
Testing r2 using Lasso regression :  0.5148590651938314


3. Ridge

In [111]:
ridge = Ridge(alpha=0.1)
ridge.fit(X_train, Y_train)

In [112]:
train_pred = ridge.predict(X_train)
test_pred = ridge.predict(X_test)

In [113]:
print("Training r2 using Ridge regression : ", r2_score(Y_train, train_pred))
print("Testing r2 using Ridge regression : ", r2_score(Y_test, test_pred))

Training r2 using Ridge regression :  0.44144599420653263
Testing r2 using Ridge regression :  0.5148476980073594
