<a href="https://colab.research.google.com/github/sarahcodebyte/CarPricePrediction/blob/main/CarPricePrediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Importing dependencies**

In [33]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import scale
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import accuracy_score
from sklearn.metrics import r2_score

# **Data preprocessing and analysis**

In [2]:
dataset = pd.read_csv('/content/CarPrice_dataset.csv')

In [3]:
dataset.head()

Unnamed: 0,car_ID,symboling,CarName,fueltype,aspiration,doornumber,carbody,drivewheel,enginelocation,wheelbase,...,enginesize,fuelsystem,boreratio,stroke,compressionratio,horsepower,peakrpm,citympg,highwaympg,price
0,1,3,alfa-romero giulia,gas,std,two,convertible,rwd,front,88.6,...,130,mpfi,3.47,2.68,9.0,111,5000,21,27,13495.0
1,2,3,alfa-romero stelvio,gas,std,two,convertible,rwd,front,88.6,...,130,mpfi,3.47,2.68,9.0,111,5000,21,27,16500.0
2,3,1,alfa-romero Quadrifoglio,gas,std,two,hatchback,rwd,front,94.5,...,152,mpfi,2.68,3.47,9.0,154,5000,19,26,16500.0
3,4,2,audi 100 ls,gas,std,four,sedan,fwd,front,99.8,...,109,mpfi,3.19,3.4,10.0,102,5500,24,30,13950.0
4,5,2,audi 100ls,gas,std,four,sedan,4wd,front,99.4,...,136,mpfi,3.19,3.4,8.0,115,5500,18,22,17450.0


In [4]:
dataset.shape

(205, 26)

In [5]:
dataset = dataset.drop(['car_ID', 'CarName', 'fuelsystem', 'enginetype', 'cylindernumber'], axis = 1)
dataset.head()

Unnamed: 0,symboling,fueltype,aspiration,doornumber,carbody,drivewheel,enginelocation,wheelbase,carlength,carwidth,...,curbweight,enginesize,boreratio,stroke,compressionratio,horsepower,peakrpm,citympg,highwaympg,price
0,3,gas,std,two,convertible,rwd,front,88.6,168.8,64.1,...,2548,130,3.47,2.68,9.0,111,5000,21,27,13495.0
1,3,gas,std,two,convertible,rwd,front,88.6,168.8,64.1,...,2548,130,3.47,2.68,9.0,111,5000,21,27,16500.0
2,1,gas,std,two,hatchback,rwd,front,94.5,171.2,65.5,...,2823,152,2.68,3.47,9.0,154,5000,19,26,16500.0
3,2,gas,std,four,sedan,fwd,front,99.8,176.6,66.2,...,2337,109,3.19,3.4,10.0,102,5500,24,30,13950.0
4,2,gas,std,four,sedan,4wd,front,99.4,176.6,66.4,...,2824,136,3.19,3.4,8.0,115,5500,18,22,17450.0




*   *fueltype: gas-1, disel-0*

*   *aspiration: std-1, turbo-0*

*   *doornumber: as it is*

*   *carbody: convertible-0, hatchback-1, sedan-2, wagon-3, hardtop-4*

*   *drivewheel: rwd-0, fwd-1, 4wd-1*
*   *enginelocation: front-0, rear-1*






In [6]:
dataset.replace({'fueltype': {'gas':1, 'disel':0, 'diesel': 0}}, inplace=True) 
dataset.replace({'aspiration': {'std':1, 'turbo':0}}, inplace=True)
dataset.replace({'doornumber': {'two': 2, 'four': 4, 'six': 6, 'eight': 8}}, inplace=True)
dataset.replace({'carbody': {'convertible': 0, 'hatchback': 1, 'sedan': 2, 'wagon': 3, 'hardtop': 4}}, inplace=True) 
dataset.replace({'drivewheel': {'rwd': 0, 'fwd': 1, '4wd': 1}}, inplace=True) 
dataset.replace({'enginelocation': {'front': 0, 'rear': 1}}, inplace=True)


In [7]:
dataset.head()

Unnamed: 0,symboling,fueltype,aspiration,doornumber,carbody,drivewheel,enginelocation,wheelbase,carlength,carwidth,...,curbweight,enginesize,boreratio,stroke,compressionratio,horsepower,peakrpm,citympg,highwaympg,price
0,3,1,1,2,0,0,0,88.6,168.8,64.1,...,2548,130,3.47,2.68,9.0,111,5000,21,27,13495.0
1,3,1,1,2,0,0,0,88.6,168.8,64.1,...,2548,130,3.47,2.68,9.0,111,5000,21,27,16500.0
2,1,1,1,2,1,0,0,94.5,171.2,65.5,...,2823,152,2.68,3.47,9.0,154,5000,19,26,16500.0
3,2,1,1,4,2,1,0,99.8,176.6,66.2,...,2337,109,3.19,3.4,10.0,102,5500,24,30,13950.0
4,2,1,1,4,2,1,0,99.4,176.6,66.4,...,2824,136,3.19,3.4,8.0,115,5500,18,22,17450.0


# **Data splitting into features and target**

In [8]:
X = dataset.drop('price', axis =1)
Y = dataset['price']

In [9]:
X

Unnamed: 0,symboling,fueltype,aspiration,doornumber,carbody,drivewheel,enginelocation,wheelbase,carlength,carwidth,carheight,curbweight,enginesize,boreratio,stroke,compressionratio,horsepower,peakrpm,citympg,highwaympg
0,3,1,1,2,0,0,0,88.6,168.8,64.1,48.8,2548,130,3.47,2.68,9.0,111,5000,21,27
1,3,1,1,2,0,0,0,88.6,168.8,64.1,48.8,2548,130,3.47,2.68,9.0,111,5000,21,27
2,1,1,1,2,1,0,0,94.5,171.2,65.5,52.4,2823,152,2.68,3.47,9.0,154,5000,19,26
3,2,1,1,4,2,1,0,99.8,176.6,66.2,54.3,2337,109,3.19,3.40,10.0,102,5500,24,30
4,2,1,1,4,2,1,0,99.4,176.6,66.4,54.3,2824,136,3.19,3.40,8.0,115,5500,18,22
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
200,-1,1,1,4,2,0,0,109.1,188.8,68.9,55.5,2952,141,3.78,3.15,9.5,114,5400,23,28
201,-1,1,0,4,2,0,0,109.1,188.8,68.8,55.5,3049,141,3.78,3.15,8.7,160,5300,19,25
202,-1,1,1,4,2,0,0,109.1,188.8,68.9,55.5,3012,173,3.58,2.87,8.8,134,5500,18,23
203,-1,0,0,4,2,0,0,109.1,188.8,68.9,55.5,3217,145,3.01,3.40,23.0,106,4800,26,27


In [10]:
Y

0      13495.0
1      16500.0
2      16500.0
3      13950.0
4      17450.0
        ...   
200    16845.0
201    19045.0
202    21485.0
203    22470.0
204    22625.0
Name: price, Length: 205, dtype: float64

# **Scaling the features**

In [11]:
cols = X.columns
X = pd.DataFrame(scale(X))
X.columns = cols
X

Unnamed: 0,symboling,fueltype,aspiration,doornumber,carbody,drivewheel,enginelocation,wheelbase,carlength,carwidth,carheight,curbweight,enginesize,boreratio,stroke,compressionratio,horsepower,peakrpm,citympg,highwaympg
0,1.743470,0.328798,0.469295,-1.130388,-2.156678,-1.302831,-0.121867,-1.690772,-0.426521,-0.844782,-2.020417,-0.014566,0.074449,0.519071,-1.839377,-0.288349,0.174483,-0.262960,-0.646553,-0.546059
1,1.743470,0.328798,0.469295,-1.130388,-2.156678,-1.302831,-0.121867,-1.690772,-0.426521,-0.844782,-2.020417,-0.014566,0.074449,0.519071,-1.839377,-0.288349,0.174483,-0.262960,-0.646553,-0.546059
2,0.133509,0.328798,0.469295,-1.130388,-0.958524,-1.302831,-0.121867,-0.708596,-0.231513,-0.190566,-0.543527,0.514882,0.604046,-2.404880,0.685946,-0.288349,1.264536,-0.262960,-0.953012,-0.691627
3,0.938490,0.328798,0.469295,0.884652,0.239631,0.767559,-0.121867,0.173698,0.207256,0.136542,0.235942,-0.420797,-0.431076,-0.517266,0.462183,-0.035973,-0.053668,0.787855,-0.186865,-0.109354
4,0.938490,0.328798,0.469295,0.884652,0.239631,0.767559,-0.121867,0.107110,0.207256,0.230001,0.235942,0.516807,0.218885,-0.517266,0.462183,-0.540725,0.275883,0.787855,-1.106241,-1.273900
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
200,-1.476452,0.328798,0.469295,0.884652,0.239631,-1.302831,-0.121867,1.721873,1.198549,1.398245,0.728239,0.763241,0.339248,1.666445,-0.336970,-0.162161,0.250533,0.577692,-0.340094,-0.400490
201,-1.476452,0.328798,-2.130854,0.884652,0.239631,-1.302831,-0.121867,1.721873,1.198549,1.351515,0.728239,0.949992,0.339248,1.666445,-0.336970,-0.364062,1.416637,0.367529,-0.953012,-0.837195
202,-1.476452,0.328798,0.469295,0.884652,0.239631,-1.302831,-0.121867,1.721873,1.198549,1.398245,0.728239,0.878757,1.109571,0.926204,-1.232021,-0.338824,0.757535,0.787855,-1.106241,-1.128332
203,-1.476452,-3.041381,-2.130854,0.884652,0.239631,-1.302831,-0.121867,1.721873,1.198549,1.398245,0.728239,1.273437,0.435538,-1.183483,0.462183,3.244916,0.047732,-0.683286,0.119594,-0.546059


# **Splitting dataset into train and test data**

In [52]:
  X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.33, random_state=1)

# **Training the model**

In [53]:
model = RandomForestRegressor()
model.fit(X_train, Y_train)

RandomForestRegressor()

In [54]:
X_test_prediction = model.predict(X_test)

In [55]:
r2score = r2_score(Y_test, X_test_prediction)
print(r2score*100)

92.70553315300232


# **Predictive System**

In [56]:
newCar = (3, 1,	1, 2,	0, 0,	0, 88.6, 168.8,	64.1,	48.8,	2548,	130, 3.47, 2.68, 9.0, 111, 5000, 21, 27)

newCar_numpy_array = np.asarray(newCar)
newCar_reshaped = newCar_numpy_array.reshape(1, -1)

result = model.predict(newCar_reshaped)
print(result)

[34524.18]


  "X does not have valid feature names, but"
