<a href="https://colab.research.google.com/github/niharikaraghav/Car-price-prediction-model-/blob/main/CarPredictionProject.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Importing the dependencies

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn import metrics

Data collection and processing

In [None]:
# Loading the data from csv file to pandas data frame
car_dataset = pd.read_csv('/content/used_car_dataset (2).csv')

In [None]:
#inspecting the first five rows of the dataframe
car_dataset.head()

Unnamed: 0,car_name,car_price_in_rupees,kms_driven,fuel_type,city,year_of_manufacture
0,Hyundai Grand i10 Magna 1.2 Kappa VTVT [2017-2...,₹ 4.45 Lakh,"22,402 km",Petrol,Mumbai,2016
1,Maruti Suzuki Alto 800 Lxi,₹ 2.93 Lakh,"10,344 km",Petrol,Kolkata,2019
2,Tata Safari XZ Plus New,₹ 22.49 Lakh,"12,999 km",Diesel,Bangalore,2021
3,Maruti Suzuki Ciaz ZXI+,₹ 6.95 Lakh,"45,000 km",Petrol,Thane,2016
4,Jeep Compass Sport Plus 1.4 Petrol [2019-2020],₹ 12 Lakh,"11,193 km",Petrol,Kolkata,2019


In [None]:
#checking the number of row and coloums
car_dataset.shape


(2105, 6)

In [None]:
car_dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2105 entries, 0 to 2104
Data columns (total 6 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   car_name             2105 non-null   object
 1   car_price_in_rupees  2105 non-null   object
 2   kms_driven           2105 non-null   object
 3   fuel_type            2105 non-null   object
 4   city                 2105 non-null   object
 5   year_of_manufacture  2105 non-null   int64 
dtypes: int64(1), object(5)
memory usage: 98.8+ KB


In [None]:
# for checking missing values
car_dataset.isnull().sum()

car_name               0
car_price_in_rupees    0
kms_driven             0
fuel_type              0
city                   0
year_of_manufacture    0
dtype: int64

In [None]:
#checking the distribution of categorical data
print(car_dataset.fuel_type.value_counts())
print(car_dataset.car_name.value_counts())

Petrol        1348
Diesel         636
CNG             82
Petrol + 1      18
Electric        10
Diesel + 1       7
Hybrid           2
LPG              2
Name: fuel_type, dtype: int64
Maruti Suzuki Wagon R 1.0 VXI        25
Maruti Suzuki Alto 800 Lxi           23
Maruti Suzuki Wagon R 1.0 LXI CNG    21
Maruti Suzuki Baleno Delta 1.2       18
Honda City V                         17
                                     ..
Ford Fiesta Exi 1.6 Duratec Ltd       1
Renault Triber RXZ                    1
BMW 7 Series 730Ld                    1
Nissan Sunny XL D                     1
Maruti Suzuki Ciaz VXi+ AT            1
Name: car_name, Length: 946, dtype: int64


Encoding the data

```
# This is formatted as code
```



In [None]:
# encoding 'fuel-type' Column
car_dataset.replace({'fuel_type':{'Petrol':0, 'Diesel':1,'CNG':2,'Electric':3,'Hybrid':4,'LPG':5}},inplace=True)

In [None]:
car_dataset.head()

Unnamed: 0,car_name,car_price_in_rupees,kms_driven,fuel_type,city,year_of_manufacture
0,Hyundai Grand i10 Magna 1.2 Kappa VTVT [2017-2...,₹ 4.45 Lakh,"22,402 km",0,Mumbai,2016
1,Maruti Suzuki Alto 800 Lxi,₹ 2.93 Lakh,"10,344 km",0,Kolkata,2019
2,Tata Safari XZ Plus New,₹ 22.49 Lakh,"12,999 km",1,Bangalore,2021
3,Maruti Suzuki Ciaz ZXI+,₹ 6.95 Lakh,"45,000 km",0,Thane,2016
4,Jeep Compass Sport Plus 1.4 Petrol [2019-2020],₹ 12 Lakh,"11,193 km",0,Kolkata,2019


In [None]:
print(car_dataset.city.value_counts())

Bangalore      248
Pune           247
Mumbai         246
Ahmedabad      246
Kolkata        245
Hyderabad      245
Thane          244
Delhi          190
Chennai         78
Noida           41
Ambattur        19
Pallikarnai     17
Thiruvallur     16
Gurgaon          8
Poonamallee      8
Faridabad        7
Name: city, dtype: int64


splitting the data into training data and test data

In [None]:
X = car_dataset.drop(['car_name','car_price_in_rupees'], axis=1)
Y = car_dataset['car_price_in_rupees']

In [None]:
print(X)

     kms_driven fuel_type       city  year_of_manufacture
0     22,402 km         0     Mumbai                 2016
1     10,344 km         0    Kolkata                 2019
2     12,999 km         1  Bangalore                 2021
3     45,000 km         0      Thane                 2016
4     11,193 km         0    Kolkata                 2019
...         ...       ...        ...                  ...
2100  42,158 km         1    Kolkata                 2015
2101  68,862 km         1  Hyderabad                 2013
2102  37,622 km         0    Chennai                 2018
2103  64,726 km         0     Mumbai                 2017
2104  29,150 km         0       Pune                 2017

[2105 rows x 4 columns]


In [None]:
print(Y)

0        ₹ 4.45 Lakh
1        ₹ 2.93 Lakh
2       ₹ 22.49 Lakh
3        ₹ 6.95 Lakh
4          ₹ 12 Lakh
            ...     
2100      ₹ 3.6 Lakh
2101       ₹ 22 Lakh
2102     ₹ 8.38 Lakh
2103     ₹ 6.75 Lakh
2104     ₹ 8.76 Lakh
Name: car_price_in_rupees, Length: 2105, dtype: object


In [None]:
car_dataset['kms_driven'] = car_dataset['kms_driven'].astype(str).str.replace(' km', '').str.replace(',', '').astype(float)
car_dataset['kms_driven'] = car_dataset['kms_driven'].astype(str).str.replace('km', '').str.replace(',', '').astype(float)
print("\nUpdated Dataset:")
print(car_dataset)


Updated Dataset:
                                               car_name  car_price_in_rupees  \
0     Hyundai Grand i10 Magna 1.2 Kappa VTVT [2017-2...                 4.45   
1                            Maruti Suzuki Alto 800 Lxi                 2.93   
2                               Tata Safari XZ Plus New                22.49   
3                               Maruti Suzuki Ciaz ZXI+                 6.95   
4        Jeep Compass Sport Plus 1.4 Petrol [2019-2020]                12.00   
...                                                 ...                  ...   
2100                         Ford Figo Titanium1.5 TDCi                 3.60   
2101                    MINI Cooper Countryman Cooper D                22.00   
2102                          Hyundai Verna 1.6 VTVT SX                 8.38   
2103                         Maruti Suzuki Ciaz VXi+ AT                 6.75   
2104                          Hyundai Verna 1.6 VTVT SX                 8.76   

      kms_driven fuel

In [None]:
car_dataset['car_price_in_rupees'] = car_dataset['car_price_in_rupees'].str.replace('Lakh', '')
car_dataset['car_price_in_rupees'] = car_dataset['car_price_in_rupees'].str.replace('Crore', '')
car_dataset['car_price_in_rupees'] = car_dataset['car_price_in_rupees'].str.replace(',', '')
car_dataset['car_price_in_rupees'] = car_dataset['car_price_in_rupees'].str.replace('₹', '')
car_dataset['car_price_in_rupees'] = car_dataset['car_price_in_rupees'].astype(float)

Splitting the data

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.1, random_state=2)

Model training
1. Linear Regression model

In [None]:
lin_reg_model = LinearRegression()

In [None]:
lin_reg_model.fit(X_train,Y_train)

ValueError: could not convert string to float: '87,642 km'

In [None]:
print(X_train.dtypes)
print(Y_train.dtypes)


kms_driven             object
fuel_type              object
city                   object
year_of_manufacture     int64
dtype: object
object


In [None]:
#loading the linear regression model
lin_reg_model = LinearRegression()

In [None]:
print(X_train.shape)
print(Y_train.shape)


(1894, 4)
(1894,)


In [None]:
print(X_train.isnull().sum())
print(Y_train.isnull().sum())


kms_driven             0
fuel_type              0
city                   0
year_of_manufacture    0
dtype: int64
0


In [None]:
lin_reg_model.fit(X_train,Y_train)

ValueError: could not convert string to float: '87,642 km'

Model Evaluation

In [None]:
training_data_prediction = lin_reg_model.predict(X_train)

ValueError: could not convert string to float: '87,642 km'

In [None]:
training_data_prediction = lin_reg_model.predict(X_train)

ValueError: could not convert string to float: '87,642 km'