In [1]:
import pandas as pd 
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error

In [2]:
focus=pd.read_csv("../input/used-car-dataset-ford-and-mercedes/focus.csv")
focus.head()

Unnamed: 0,model,year,price,transmission,mileage,fuelType,engineSize
0,Focus,2016,8000,Manual,38852,Petrol,1.0
1,Focus,2019,13400,Manual,11952,Petrol,1.0
2,Focus,2019,14600,Manual,22142,Petrol,1.5
3,Focus,2016,9450,Manual,14549,Diesel,1.6
4,Focus,2015,9999,Manual,7010,Diesel,1.6


In [3]:
focus.shape

(5454, 7)

In [4]:
focus.isnull().sum()

model           0
year            0
price           0
transmission    0
mileage         0
fuelType        0
engineSize      0
dtype: int64

In [5]:
focus.describe

<bound method NDFrame.describe of        model  year  price transmission  mileage fuelType  engineSize
0      Focus  2016   8000       Manual    38852   Petrol         1.0
1      Focus  2019  13400       Manual    11952   Petrol         1.0
2      Focus  2019  14600       Manual    22142   Petrol         1.5
3      Focus  2016   9450       Manual    14549   Diesel         1.6
4      Focus  2015   9999       Manual     7010   Diesel         1.6
...      ...   ...    ...          ...      ...      ...         ...
5449   Focus  2019  18745       Manual     7855   Diesel         2.0
5450   Focus  2019  16350       Manual    13891   Petrol         1.0
5451   Focus  2019  16850       Manual    13452   Petrol         1.0
5452   Focus  2019  17310    Automatic    13376   Petrol         1.0
5453   Focus  2019  17610    Automatic     7886   Petrol         1.0

[5454 rows x 7 columns]>

In [6]:
focus.info

<bound method DataFrame.info of        model  year  price transmission  mileage fuelType  engineSize
0      Focus  2016   8000       Manual    38852   Petrol         1.0
1      Focus  2019  13400       Manual    11952   Petrol         1.0
2      Focus  2019  14600       Manual    22142   Petrol         1.5
3      Focus  2016   9450       Manual    14549   Diesel         1.6
4      Focus  2015   9999       Manual     7010   Diesel         1.6
...      ...   ...    ...          ...      ...      ...         ...
5449   Focus  2019  18745       Manual     7855   Diesel         2.0
5450   Focus  2019  16350       Manual    13891   Petrol         1.0
5451   Focus  2019  16850       Manual    13452   Petrol         1.0
5452   Focus  2019  17310    Automatic    13376   Petrol         1.0
5453   Focus  2019  17610    Automatic     7886   Petrol         1.0

[5454 rows x 7 columns]>

In [7]:
encoder = LabelEncoder()
focus['model'] = encoder.fit_transform(focus['model'])
model_mapping = {index : label for index, label in enumerate(encoder.classes_)}
model_mapping

{0: ' Focus'}

In [8]:
focus['transmission'] = encoder.fit_transform(focus['transmission'])
transmission_mapping = {index : label for index, label in enumerate(encoder.classes_)}
transmission_mapping

{0: 'Automatic', 1: 'Manual', 2: 'Semi-Auto'}

In [9]:
focus['fuelType'] = encoder.fit_transform(focus['fuelType'])
fuelType_mapping = {index : label for index, label in enumerate(encoder.classes_)}
fuelType_mapping

{0: 'Diesel', 1: 'Petrol'}

In [10]:
x = focus.drop('price', axis=1)
y = focus['price']

In [11]:
scaler = MinMaxScaler(copy=True, feature_range=(0, 1))
X = scaler.fit_transform(x)

#showing data
print('x \n' , X[:10])
print('y \n' , y[:10])

x 
 [[0.         0.77777778 0.5        0.21870268 1.         0.4       ]
 [0.         0.94444444 0.5        0.06727538 1.         0.4       ]
 [0.         0.94444444 0.5        0.12463762 1.         0.6       ]
 [0.         0.77777778 0.5        0.08189459 0.         0.64      ]
 [0.         0.72222222 0.5        0.03945554 0.         0.64      ]
 [0.         0.77777778 0.         0.15083623 1.         0.4       ]
 [0.         0.88888889 0.5        0.0907832  1.         0.4       ]
 [0.         1.         0.5        0.02757778 1.         0.92      ]
 [0.         0.88888889 0.5        0.17423709 1.         0.6       ]
 [0.         0.94444444 0.5        0.01688217 0.         0.6       ]]
y 
 0     8000
1    13400
2    14600
3     9450
4     9999
5    10699
6    12849
7    24999
8    11999
9    19500
Name: price, dtype: int64


In [12]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=30,random_state=0)

In [13]:
print("x_train shape is:",x_train.shape)

x_train shape is: (5424, 6)


In [14]:
print("y_train shape is:",y_train.shape)

y_train shape is: (5424,)


In [15]:
print("y_test shape is:",y_test.shape)

y_test shape is: (30,)


In [16]:
RandomForestRegressorModel = RandomForestRegressor(n_estimators=100,max_depth=9, random_state=33)
RandomForestRegressorModel.fit(x_train, y_train)

print('Random Forest Regressor Train Score is : ' , RandomForestRegressorModel.score(x_train, y_train))
print('Random Forest Regressor Test Score is : ' , RandomForestRegressorModel.score(x_test, y_test))
print('Random Forest Regressor No. of features are : ' , RandomForestRegressorModel.n_features_)
print('----------------------------------------------------')

y_pred = RandomForestRegressorModel.predict(x_test)
print('Predicted Value for Random Forest Regressor is : ' , y_pred[:10])

Random Forest Regressor Train Score is :  0.9525534602529591
Random Forest Regressor Test Score is :  0.9424995315094009
Random Forest Regressor No. of features are :  6
----------------------------------------------------
Predicted Value for Random Forest Regressor is :  [ 5871.50204692 11165.66837116 19072.42143662  7820.6855406
  8975.55261616 15979.18326558 13663.69157543 16947.66426253
 13673.4227777  13341.85826242]


In [17]:
LinearRegressionModel = LinearRegression(fit_intercept=True, normalize=True,copy_X=True,n_jobs=-1)
LinearRegressionModel.fit(x_train, y_train)

print('Linear Regression Train Score is : ' , LinearRegressionModel.score(x_train, y_train))
print('Linear Regression Test Score is : ' , LinearRegressionModel.score(x_test, y_test))
print('Linear Regression Coef is : ' , LinearRegressionModel.coef_)
print('Linear Regression intercept is : ' , LinearRegressionModel.intercept_)
print('----------------------------------------------------')

y_pred = LinearRegressionModel.predict(x_test)
print('Predicted Value for Linear Regression is : ' , y_pred[:10])

Linear Regression Train Score is :  0.8350124865214883
Linear Regression Test Score is :  0.8762527377681347
Linear Regression Coef is :  [ 0.00000000e+00  1.32007844e+03 -9.44820300e+02 -6.30306905e-02
  2.35115188e+03  6.18754043e+03]
Linear Regression intercept is :  -2656823.279772705
----------------------------------------------------
Predicted Value for Linear Regression is :  [ 4474.86103652 14208.40986223 17484.24044203  7966.69826094
  9381.58412118 17391.65697067 14034.37398086 18346.59862071
 14180.4160908  14153.55575521]


In [18]:
DecisionTreeRegressorModel = DecisionTreeRegressor( max_depth=9,random_state=33)
DecisionTreeRegressorModel.fit(x_train, y_train)

print('DecisionTreeRegressor Train Score is : ' , DecisionTreeRegressorModel.score(x_train, y_train))
print('DecisionTreeRegressor Test Score is : ' , DecisionTreeRegressorModel.score(x_test, y_test))
print('----------------------------------------------------')

y_pred = DecisionTreeRegressorModel.predict(x_test)
print('Predicted Value for DecisionTreeRegressorModel is : ' , y_pred[:10])

DecisionTreeRegressor Train Score is :  0.9500282103916018
DecisionTreeRegressor Test Score is :  0.937178683958171
----------------------------------------------------
Predicted Value for DecisionTreeRegressorModel is :  [ 5760.66666667 10966.10067114 19056.84210526  7870.1
  8868.41666667 15920.26666667 13809.50625    17092.0952381
 13809.50625    13851.95238095]


In [19]:
MSEValue = mean_squared_error(y_test, y_pred, multioutput='uniform_average') # it can be raw_values
print('Mean Squared Error Value is : ', MSEValue)

Mean Squared Error Value is :  1231071.098046991
