# Random Forest Algorithm

In [149]:
import pandas as pd
data = pd.read_csv('car_evaluation.csv')
data.head()

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,outcome
0,vhigh,vhigh,2,2,small,low,unacc
1,vhigh,vhigh,2,2,small,med,unacc
2,vhigh,vhigh,2,2,small,high,unacc
3,vhigh,vhigh,2,2,med,low,unacc
4,vhigh,vhigh,2,2,med,med,unacc


In [150]:
# Count the each data come in the column
from collections import Counter
Counter(data.outcome)

Counter({'unacc': 1210, 'acc': 384, 'vgood': 65, 'good': 69})

In [151]:
Counter(data.safety)

Counter({'low': 576, 'med': 576, 'high': 576})

In [152]:
Counter(data.buying)

Counter({'vhigh': 432, 'high': 432, 'med': 432, 'low': 432})

In [153]:
# when we have very less category we can use replace else we use  LabelEncoder

#data.buying.replace('low',1)
#data.buying.replace('vhigh',2)

In [154]:
# when we have lot of category we use LabelEncoder
# Automatic it will label for diffrent category.

from sklearn.preprocessing import LabelEncoder
enc = LabelEncoder()
print(enc.fit_transform(data.buying))

data.buying = enc.fit_transform(data.buying)
data.maint = enc.fit_transform(data.maint)
data.lug_boot = enc.fit_transform(data.lug_boot)
data.safety = enc.fit_transform(data.safety)
data.head()

[3 3 3 ... 1 1 1]


Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,outcome
0,3,3,2,2,2,1,unacc
1,3,3,2,2,2,2,unacc
2,3,3,2,2,2,0,unacc
3,3,3,2,2,1,1,unacc
4,3,3,2,2,1,2,unacc


In [155]:
X = data.iloc[:, :-1]
y = data.outcome

In [156]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 10)

print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(1209, 6)
(519, 6)
(1209,)
(519,)


In [157]:
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(max_depth=13, random_state=5)
model.fit(X_train, y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=13, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
            oob_score=False, random_state=5, verbose=0, warm_start=False)

In [158]:
y_predict = model.predict(X_test)

In [159]:
from sklearn.metrics import accuracy_score, classification_report
print(accuracy_score(y_test, y_predict))
pd.crosstab(y_test, y_predict)

0.9595375722543352


col_0,acc,good,unacc,vgood
outcome,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
acc,95,3,4,0
good,2,19,0,0
unacc,9,0,362,0
vgood,3,0,0,22


# GridSearchCV 
### This provide us the best possible parameter to get best score

In [160]:
from sklearn.model_selection import GridSearchCV
#parameters1 = { 'max_depth':[3,4,5,6,7,10,15,20],
#                'random_state': [0,2,4,5,6,7,9,10,15],
#                'criterion':['gini','entropy']
#            }

parameters = { 'max_depth':range(1,20),
               'random_state': range(1,20),
               'criterion':['gini','entropy']
              }
grid = GridSearchCV(RandomForestClassifier(), parameters)

grid.fit(X_train, y_train)

GridSearchCV(cv=None, error_score='raise',
       estimator=RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False),
       fit_params=None, iid=True, n_jobs=1,
       param_grid={'max_depth': range(1, 20), 'random_state': range(1, 20), 'criterion': ['gini', 'entropy']},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [161]:
print(grid.best_params_)
print(grid.best_score_)

{'criterion': 'entropy', 'max_depth': 12, 'random_state': 19}
0.9429280397022333
