## Car Price Prediction

In [None]:
'''
Main steps to be followed:

1) Setting up the data
2) Creating the model
3) Training the model
4) Prediction
'''

'\nMain steps to be followed:\n\n1) Setting up the data\n2) Creating the model\n3) Training the model\n4) Prediction\n'

In [None]:
#Importing the Libraries
import numpy as np
import pandas as pd

In [None]:
#Importing Linear Regression Model
from sklearn.linear_model import LinearRegression

In [None]:
#Importing Accuracy
from sklearn import metrics

In [None]:
#Importing train test split function
from sklearn.model_selection import train_test_split

### Data Collection & Analysis (Setting up the data)

In [None]:
car_ds = pd.read_csv("car data.csv")

In [None]:
#Displaying the first 5 rows
car_ds.head()

Unnamed: 0,Car_Name,Year,Selling_Price,Present_Price,Kms_Driven,Fuel_Type,Seller_Type,Transmission,Owner
0,ritz,2014,3.35,5.59,27000,Petrol,Dealer,Manual,0
1,sx4,2013,4.75,9.54,43000,Diesel,Dealer,Manual,0
2,ciaz,2017,7.25,9.85,6900,Petrol,Dealer,Manual,0
3,wagon r,2011,2.85,4.15,5200,Petrol,Dealer,Manual,0
4,swift,2014,4.6,6.87,42450,Diesel,Dealer,Manual,0


In [None]:
#Finding number of rows & columns
car_ds.shape

(301, 9)

In [None]:
#Info of the dataset
car_ds.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 301 entries, 0 to 300
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Car_Name       301 non-null    object 
 1   Year           301 non-null    int64  
 2   Selling_Price  301 non-null    float64
 3   Present_Price  301 non-null    float64
 4   Kms_Driven     301 non-null    int64  
 5   Fuel_Type      301 non-null    object 
 6   Seller_Type    301 non-null    object 
 7   Transmission   301 non-null    object 
 8   Owner          301 non-null    int64  
dtypes: float64(2), int64(3), object(4)
memory usage: 21.3+ KB


In [None]:
#Finding the sum of null values
car_ds.isnull().sum()

Car_Name         0
Year             0
Selling_Price    0
Present_Price    0
Kms_Driven       0
Fuel_Type        0
Seller_Type      0
Transmission     0
Owner            0
dtype: int64

In [None]:
#Checking the distribution of categorical data
pd.unique(car_ds['Fuel_Type'])
car_ds['Fuel_Type'].value_counts()

Petrol    239
Diesel     60
CNG         2
Name: Fuel_Type, dtype: int64

In [None]:
#Transmission
pd.unique(car_ds['Transmission'])
car_ds['Transmission'].value_counts()

Manual       261
Automatic     40
Name: Transmission, dtype: int64

In [None]:
#Seller Type
pd.unique(car_ds['Seller_Type'])
car_ds['Seller_Type'].value_counts()

Dealer        195
Individual    106
Name: Seller_Type, dtype: int64

### Encoding the Categorical Data

In [None]:
#Encoding the Fuel_Type Column
car_ds.replace({'Fuel_Type':{'Petrol':0,'Diesel':1,'CNG':2}}, inplace=True)
#Encoding the Transmission Column
car_ds.replace({'Transmission':{'Manual':0,'Automatic':1}}, inplace=True)
#Encoding the Seller Type Column
car_ds.replace({'Seller_Type':{'Dealer':0,'Individual':1}}, inplace=True)

In [None]:
car_ds

Unnamed: 0,Car_Name,Year,Selling_Price,Present_Price,Kms_Driven,Fuel_Type,Seller_Type,Transmission,Owner
0,ritz,2014,3.35,5.59,27000,0,0,0,0
1,sx4,2013,4.75,9.54,43000,1,0,0,0
2,ciaz,2017,7.25,9.85,6900,0,0,0,0
3,wagon r,2011,2.85,4.15,5200,0,0,0,0
4,swift,2014,4.60,6.87,42450,1,0,0,0
...,...,...,...,...,...,...,...,...,...
296,city,2016,9.50,11.60,33988,1,0,0,0
297,brio,2015,4.00,5.90,60000,0,0,0,0
298,city,2009,3.35,11.00,87934,0,0,0,0
299,city,2017,11.50,12.50,9000,1,0,0,0


### Creating the model

In [None]:
#Splitting the data into Training Data & Testing Data
X = car_ds.drop(['Car_Name','Selling_Price'],axis=1)
Y = car_ds['Selling_Price']

In [None]:
#Printing the Input feature
print(X)

     Year  Present_Price  Kms_Driven  Fuel_Type  Seller_Type  Transmission  \
0    2014           5.59       27000          0            0             0   
1    2013           9.54       43000          1            0             0   
2    2017           9.85        6900          0            0             0   
3    2011           4.15        5200          0            0             0   
4    2014           6.87       42450          1            0             0   
..    ...            ...         ...        ...          ...           ...   
296  2016          11.60       33988          1            0             0   
297  2015           5.90       60000          0            0             0   
298  2009          11.00       87934          0            0             0   
299  2017          12.50        9000          1            0             0   
300  2016           5.90        5464          0            0             0   

     Owner  
0        0  
1        0  
2        0  
3        0 

In [None]:
#Printing the Target Variable
print(Y)

0       3.35
1       4.75
2       7.25
3       2.85
4       4.60
       ...  
296     9.50
297     4.00
298     3.35
299    11.50
300     5.30
Name: Selling_Price, Length: 301, dtype: float64


In [None]:
X_train,X_test,Y_train,Y_test = train_test_split(X, Y, test_size=0.2, random_state=2)
print(X.shape, X_train.shape, Y_test.shape)

(301, 7) (240, 7) (61,)


In [None]:
#Calling Linear Regression Model
model = LinearRegression()

In [None]:
#Training the ML Model using training model
model.fit(X_train,Y_train)

In [None]:
training_data_prediction = model.predict(X_train)

In [None]:
training_data_accuracy = metrics.r2_score(Y_train,training_data_prediction)
print("Accuracy score of training data ",training_data_accuracy)

Accuracy score of training data  0.8838169193709796


In [None]:
testing_data_prediction = model.predict(X_test)
testing_data_accuracy = metrics.r2_score(Y_test,testing_data_prediction)
print("Accuracy score of training data ",testing_data_accuracy)

Accuracy score of training data  0.8401532365377697


### Buliding A Predictive System

In [None]:
input_data = (2015,7.5,44000,0,1,1,0)
input_data_as_numpy_array = np.asarray(input_data)
#Reshape the numpy array as we are predicting for one data point
input_data_reshape = input_data_as_numpy_array.reshape(1,-1)

In [None]:
prediction = model.predict(input_data_reshape)
print("Car Price according to the prediction ",prediction)

Car Price according to the prediction  [5.42885296]


