# Downloading dependencies

In [23]:
import numpy as np 
import pandas as pd 
import matplotlib as plt 

In [24]:
file_path = './data.csv'

data_frame = pd.read_csv(file_path)
data_frame

Unnamed: 0,name,year,selling_price,km_driven,fuel,seller_type,transmission,owner
0,Maruti 800 AC,2007,60000,70000,Petrol,Individual,Manual,First Owner
1,Maruti Wagon R LXI Minor,2007,135000,50000,Petrol,Individual,Manual,First Owner
2,Hyundai Verna 1.6 SX,2012,600000,100000,Diesel,Individual,Manual,First Owner
3,Datsun RediGO T Option,2017,250000,46000,Petrol,Individual,Manual,First Owner
4,Honda Amaze VX i-DTEC,2014,450000,141000,Diesel,Individual,Manual,Second Owner
...,...,...,...,...,...,...,...,...
4335,Hyundai i20 Magna 1.4 CRDi (Diesel),2014,409999,80000,Diesel,Individual,Manual,Second Owner
4336,Hyundai i20 Magna 1.4 CRDi,2014,409999,80000,Diesel,Individual,Manual,Second Owner
4337,Maruti 800 AC BSIII,2009,110000,83000,Petrol,Individual,Manual,Second Owner
4338,Hyundai Creta 1.6 CRDi SX Option,2016,865000,90000,Diesel,Individual,Manual,First Owner


# Cleaning the data set

In [25]:
data_frame.dtypes

name             object
year              int64
selling_price     int64
km_driven         int64
fuel             object
seller_type      object
transmission     object
owner            object
dtype: object

### Name

In [26]:
data_frame['name'].unique()

array(['Maruti 800 AC', 'Maruti Wagon R LXI Minor',
       'Hyundai Verna 1.6 SX', ..., 'Mahindra Verito 1.5 D6 BSIII',
       'Toyota Innova 2.5 VX (Diesel) 8 Seater BS IV',
       'Hyundai i20 Magna 1.4 CRDi'], dtype=object)

### One-Hot encoding for fuel type

In [27]:
data_frame['fuel'].unique()

array(['Petrol', 'Diesel', 'CNG', 'LPG', 'Electric'], dtype=object)

In [28]:
data_frame['petrol'] = (data_frame["fuel"] == "Petrol").astype(int)
data_frame['diesel'] = (data_frame["fuel"] == "Diesel").astype(int)
data_frame['cng'] = (data_frame["fuel"] == "CNG").astype(int)
data_frame['lpg'] = (data_frame["fuel"] == "LPG").astype(int)
data_frame['electric'] = (data_frame["fuel"] == "Electric").astype(int)

data_frame


Unnamed: 0,name,year,selling_price,km_driven,fuel,seller_type,transmission,owner,petrol,diesel,cng,lpg,electric
0,Maruti 800 AC,2007,60000,70000,Petrol,Individual,Manual,First Owner,1,0,0,0,0
1,Maruti Wagon R LXI Minor,2007,135000,50000,Petrol,Individual,Manual,First Owner,1,0,0,0,0
2,Hyundai Verna 1.6 SX,2012,600000,100000,Diesel,Individual,Manual,First Owner,0,1,0,0,0
3,Datsun RediGO T Option,2017,250000,46000,Petrol,Individual,Manual,First Owner,1,0,0,0,0
4,Honda Amaze VX i-DTEC,2014,450000,141000,Diesel,Individual,Manual,Second Owner,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4335,Hyundai i20 Magna 1.4 CRDi (Diesel),2014,409999,80000,Diesel,Individual,Manual,Second Owner,0,1,0,0,0
4336,Hyundai i20 Magna 1.4 CRDi,2014,409999,80000,Diesel,Individual,Manual,Second Owner,0,1,0,0,0
4337,Maruti 800 AC BSIII,2009,110000,83000,Petrol,Individual,Manual,Second Owner,1,0,0,0,0
4338,Hyundai Creta 1.6 CRDi SX Option,2016,865000,90000,Diesel,Individual,Manual,First Owner,0,1,0,0,0


In [29]:
data_frame['seller_type'].unique()

array(['Individual', 'Dealer', 'Trustmark Dealer'], dtype=object)

In [30]:

data_frame['indivisual'] = (data_frame['seller_type'] == 'Individual').astype(int)
data_frame['dealer'] = (data_frame['seller_type'] == 'Dealer').astype(int)
data_frame['t_dealer'] = (data_frame['seller_type'] == 'Trustmark Dealer').astype(int)
data_frame

Unnamed: 0,name,year,selling_price,km_driven,fuel,seller_type,transmission,owner,petrol,diesel,cng,lpg,electric,indivisual,dealer,t_dealer
0,Maruti 800 AC,2007,60000,70000,Petrol,Individual,Manual,First Owner,1,0,0,0,0,1,0,0
1,Maruti Wagon R LXI Minor,2007,135000,50000,Petrol,Individual,Manual,First Owner,1,0,0,0,0,1,0,0
2,Hyundai Verna 1.6 SX,2012,600000,100000,Diesel,Individual,Manual,First Owner,0,1,0,0,0,1,0,0
3,Datsun RediGO T Option,2017,250000,46000,Petrol,Individual,Manual,First Owner,1,0,0,0,0,1,0,0
4,Honda Amaze VX i-DTEC,2014,450000,141000,Diesel,Individual,Manual,Second Owner,0,1,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4335,Hyundai i20 Magna 1.4 CRDi (Diesel),2014,409999,80000,Diesel,Individual,Manual,Second Owner,0,1,0,0,0,1,0,0
4336,Hyundai i20 Magna 1.4 CRDi,2014,409999,80000,Diesel,Individual,Manual,Second Owner,0,1,0,0,0,1,0,0
4337,Maruti 800 AC BSIII,2009,110000,83000,Petrol,Individual,Manual,Second Owner,1,0,0,0,0,1,0,0
4338,Hyundai Creta 1.6 CRDi SX Option,2016,865000,90000,Diesel,Individual,Manual,First Owner,0,1,0,0,0,1,0,0


In [31]:
data_frame['transmission'].unique()
data_frame['transmission'] = (data_frame['transmission'] == 'Manual').astype(int)
data_frame

Unnamed: 0,name,year,selling_price,km_driven,fuel,seller_type,transmission,owner,petrol,diesel,cng,lpg,electric,indivisual,dealer,t_dealer,gear
0,Maruti 800 AC,2007,60000,70000,Petrol,Individual,Manual,First Owner,1,0,0,0,0,1,0,0,1
1,Maruti Wagon R LXI Minor,2007,135000,50000,Petrol,Individual,Manual,First Owner,1,0,0,0,0,1,0,0,1
2,Hyundai Verna 1.6 SX,2012,600000,100000,Diesel,Individual,Manual,First Owner,0,1,0,0,0,1,0,0,1
3,Datsun RediGO T Option,2017,250000,46000,Petrol,Individual,Manual,First Owner,1,0,0,0,0,1,0,0,1
4,Honda Amaze VX i-DTEC,2014,450000,141000,Diesel,Individual,Manual,Second Owner,0,1,0,0,0,1,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4335,Hyundai i20 Magna 1.4 CRDi (Diesel),2014,409999,80000,Diesel,Individual,Manual,Second Owner,0,1,0,0,0,1,0,0,1
4336,Hyundai i20 Magna 1.4 CRDi,2014,409999,80000,Diesel,Individual,Manual,Second Owner,0,1,0,0,0,1,0,0,1
4337,Maruti 800 AC BSIII,2009,110000,83000,Petrol,Individual,Manual,Second Owner,1,0,0,0,0,1,0,0,1
4338,Hyundai Creta 1.6 CRDi SX Option,2016,865000,90000,Diesel,Individual,Manual,First Owner,0,1,0,0,0,1,0,0,1


In [32]:
data_frame['owner'].unique()

array(['First Owner', 'Second Owner', 'Fourth & Above Owner',
       'Third Owner', 'Test Drive Car'], dtype=object)

In [33]:
owner_map ={
    'First Owner' : 1,
    'Second Owner' : 2,
    'Fourth & Above Owner' : 4,
    'Third Owner' : 3,
    'Test Drive Car' : 0
}

data_frame['owner'] = data_frame['owner'].map(owner_map)
data_frame

Unnamed: 0,name,year,selling_price,km_driven,fuel,seller_type,transmission,owner,petrol,diesel,cng,lpg,electric,indivisual,dealer,t_dealer,gear
0,Maruti 800 AC,2007,60000,70000,Petrol,Individual,Manual,1,1,0,0,0,0,1,0,0,1
1,Maruti Wagon R LXI Minor,2007,135000,50000,Petrol,Individual,Manual,1,1,0,0,0,0,1,0,0,1
2,Hyundai Verna 1.6 SX,2012,600000,100000,Diesel,Individual,Manual,1,0,1,0,0,0,1,0,0,1
3,Datsun RediGO T Option,2017,250000,46000,Petrol,Individual,Manual,1,1,0,0,0,0,1,0,0,1
4,Honda Amaze VX i-DTEC,2014,450000,141000,Diesel,Individual,Manual,2,0,1,0,0,0,1,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4335,Hyundai i20 Magna 1.4 CRDi (Diesel),2014,409999,80000,Diesel,Individual,Manual,2,0,1,0,0,0,1,0,0,1
4336,Hyundai i20 Magna 1.4 CRDi,2014,409999,80000,Diesel,Individual,Manual,2,0,1,0,0,0,1,0,0,1
4337,Maruti 800 AC BSIII,2009,110000,83000,Petrol,Individual,Manual,2,1,0,0,0,0,1,0,0,1
4338,Hyundai Creta 1.6 CRDi SX Option,2016,865000,90000,Diesel,Individual,Manual,1,0,1,0,0,0,1,0,0,1
