In [1]:
import pandas as pd
import numpy as np 
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error

In [2]:
hyundi=pd.read_csv("../input/used-car-dataset-ford-and-mercedes/hyundi.csv")
hyundi.head()

Unnamed: 0,model,year,price,transmission,mileage,fuelType,tax(£),mpg,engineSize
0,I20,2017,7999,Manual,17307,Petrol,145,58.9,1.2
1,Tucson,2016,14499,Automatic,25233,Diesel,235,43.5,2.0
2,Tucson,2016,11399,Manual,37877,Diesel,30,61.7,1.7
3,I10,2016,6499,Manual,23789,Petrol,20,60.1,1.0
4,IX35,2015,10199,Manual,33177,Diesel,160,51.4,2.0


In [3]:
hyundi.shape

(4860, 9)

In [4]:
hyundi.dtypes

model            object
year              int64
price             int64
transmission     object
mileage           int64
fuelType         object
tax(£)            int64
mpg             float64
engineSize      float64
dtype: object

In [5]:
hyundi.isnull().sum()

model           0
year            0
price           0
transmission    0
mileage         0
fuelType        0
tax(£)          0
mpg             0
engineSize      0
dtype: int64

In [6]:
hyundi.describe

<bound method NDFrame.describe of         model  year  price transmission  mileage fuelType  tax(£)   mpg  \
0         I20  2017   7999       Manual    17307   Petrol     145  58.9   
1      Tucson  2016  14499    Automatic    25233   Diesel     235  43.5   
2      Tucson  2016  11399       Manual    37877   Diesel      30  61.7   
3         I10  2016   6499       Manual    23789   Petrol      20  60.1   
4        IX35  2015  10199       Manual    33177   Diesel     160  51.4   
...       ...   ...    ...          ...      ...      ...     ...   ...   
4855      I30  2016   8680       Manual    25906   Diesel       0  78.4   
4856      I40  2015   7830       Manual    59508   Diesel      30  65.7   
4857      I10  2017   6830       Manual    13810   Petrol      20  60.1   
4858   Tucson  2018  13994       Manual    23313   Petrol     145  44.8   
4859   Tucson  2016  15999    Automatic    11472   Diesel     125  57.6   

      engineSize  
0            1.2  
1            2.0  
2       

In [7]:
hyundi.info


<bound method DataFrame.info of         model  year  price transmission  mileage fuelType  tax(£)   mpg  \
0         I20  2017   7999       Manual    17307   Petrol     145  58.9   
1      Tucson  2016  14499    Automatic    25233   Diesel     235  43.5   
2      Tucson  2016  11399       Manual    37877   Diesel      30  61.7   
3         I10  2016   6499       Manual    23789   Petrol      20  60.1   
4        IX35  2015  10199       Manual    33177   Diesel     160  51.4   
...       ...   ...    ...          ...      ...      ...     ...   ...   
4855      I30  2016   8680       Manual    25906   Diesel       0  78.4   
4856      I40  2015   7830       Manual    59508   Diesel      30  65.7   
4857      I10  2017   6830       Manual    13810   Petrol      20  60.1   
4858   Tucson  2018  13994       Manual    23313   Petrol     145  44.8   
4859   Tucson  2016  15999    Automatic    11472   Diesel     125  57.6   

      engineSize  
0            1.2  
1            2.0  
2         

In [8]:
encoder = LabelEncoder()
hyundi['model'] = encoder.fit_transform(hyundi['model'])
model_mapping = {index : label for index, label in enumerate(encoder.classes_)}
model_mapping

{0: ' Accent',
 1: ' Amica',
 2: ' Getz',
 3: ' I10',
 4: ' I20',
 5: ' I30',
 6: ' I40',
 7: ' I800',
 8: ' IX20',
 9: ' IX35',
 10: ' Ioniq',
 11: ' Kona',
 12: ' Santa Fe',
 13: ' Terracan',
 14: ' Tucson',
 15: ' Veloster'}

In [9]:
hyundi['transmission'] = encoder.fit_transform(hyundi['transmission'])
transmission_mapping = {index : label for index, label in enumerate(encoder.classes_)}
transmission_mapping

{0: 'Automatic', 1: 'Manual', 2: 'Other', 3: 'Semi-Auto'}

In [10]:
hyundi['fuelType'] = encoder.fit_transform(hyundi['fuelType'])
fuelType_mapping = {index : label for index, label in enumerate(encoder.classes_)}
fuelType_mapping

{0: 'Diesel', 1: 'Hybrid', 2: 'Other', 3: 'Petrol'}

In [11]:
x = hyundi.drop('price', axis=1)
y = hyundi['price']

In [12]:
scaler = MinMaxScaler(copy=True, feature_range=(0, 1))
X = scaler.fit_transform(x)

#showing data
print('x \n' , X[:10])
print('y \n' , y[:10])

x 
 [[0.26666667 0.85       0.33333333 0.12540671 1.         0.26126126
  0.22604615 0.4137931 ]
 [0.93333333 0.8        0.         0.1828419  0.         0.42342342
  0.16581932 0.68965517]
 [0.93333333 0.8        0.33333333 0.27446576 0.         0.05405405
  0.23699648 0.5862069 ]
 [0.2        0.8        0.33333333 0.17237806 1.         0.03603604
  0.23073915 0.34482759]
 [0.6        0.75       0.33333333 0.24040754 0.         0.28828829
  0.1967149  0.68965517]
 [0.2        0.75       0.33333333 0.30180653 1.         0.03603604
  0.23073915 0.34482759]
 [0.26666667 0.75       0.33333333 0.28656729 1.         0.05405405
  0.21235823 0.4137931 ]
 [0.93333333 0.8        0.         0.21955956 0.         0.22522523
  0.22096206 0.5862069 ]
 [0.33333333 0.9        0.         0.27595128 1.         0.26126126
  0.1967149  0.48275862]
 [0.93333333 0.8        0.33333333 0.15226922 0.         0.05405405
  0.23699648 0.5862069 ]]
y 
 0     7999
1    14499
2    11399
3     6499
4    10199
5     

In [13]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=30,random_state=0)

In [14]:
print("x_train shape is:",x_train.shape)

x_train shape is: (4830, 8)


In [15]:
print("x_test shape is:",x_test.shape)

x_test shape is: (30, 8)


In [16]:
print("y_train shape is:",y_train.shape)

y_train shape is: (4830,)


In [17]:
print("y_test shape is:",y_test.shape)

y_test shape is: (30,)


In [18]:
RandomForestRegressorModel = RandomForestRegressor(n_estimators=100,max_depth=8, random_state=33)
RandomForestRegressorModel.fit(x_train, y_train)

print('Random Forest Regressor Train Score is : ' , RandomForestRegressorModel.score(x_train, y_train))
print('Random Forest Regressor Test Score is : ' , RandomForestRegressorModel.score(x_test, y_test))
print('Random Forest Regressor No. of features are : ' , RandomForestRegressorModel.n_features_)
print('----------------------------------------------------')

y_pred = RandomForestRegressorModel.predict(x_test)
print('Predicted Value for Random Forest Regressor is : ' , y_pred[:10])

Random Forest Regressor Train Score is :  0.9595119115583844
Random Forest Regressor Test Score is :  0.934122911051392
Random Forest Regressor No. of features are :  8
----------------------------------------------------
Predicted Value for Random Forest Regressor is :  [ 8128.61994955  8754.08226734 11024.42026955  9926.40376711
 13969.43236807  6638.01033333  8665.74820345 19190.44551606
 10114.33703312 16237.12271682]


In [19]:
DecisionTreeRegressorModel = DecisionTreeRegressor( max_depth=7,random_state=33)
DecisionTreeRegressorModel.fit(x_train, y_train)

print('DecisionTreeRegressor Train Score is : ' , DecisionTreeRegressorModel.score(x_train, y_train))
print('DecisionTreeRegressor Test Score is : ' , DecisionTreeRegressorModel.score(x_test, y_test))
print('----------------------------------------------------')

y_pred = DecisionTreeRegressorModel.predict(x_test)
print('Predicted Value for DecisionTreeRegressorModel is : ' , y_pred[:10])

DecisionTreeRegressor Train Score is :  0.9193212545413738
DecisionTreeRegressor Test Score is :  0.9033943889743087
----------------------------------------------------
Predicted Value for DecisionTreeRegressorModel is :  [ 8090.225       8501.53191489 11432.82352941 10757.81132075
 13643.29473684  6495.          8501.53191489 19880.69230769
 10757.81132075 15362.55769231]


In [20]:
MSEValue = mean_squared_error(y_test, y_pred, multioutput='uniform_average') # it can be raw_values
print('Mean Squared Error Value is : ', MSEValue)

Mean Squared Error Value is :  3233614.51169215
