In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier

In [2]:
iris = load_iris()

In [3]:
df= pd.DataFrame(data= np.c_[iris['data'], iris['target']],
                 columns= iris['feature_names'] + ['target'])

df['species'] = pd.Categorical.from_codes(iris.target, iris.target_names)

In [4]:
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target,species
0,5.1,3.5,1.4,0.2,0.0,setosa
1,4.9,3.0,1.4,0.2,0.0,setosa
2,4.7,3.2,1.3,0.2,0.0,setosa
3,4.6,3.1,1.5,0.2,0.0,setosa
4,5.0,3.6,1.4,0.2,0.0,setosa


In [5]:
x = df.iloc[:,0:4]
y = df.iloc[:,4]

In [6]:
x_tr, x_ts, y_tr, y_ts = train_test_split(x,y, test_size = 0.2, random_state = 0)

In [7]:
rf_classifier = RandomForestClassifier(n_estimators = 50, max_depth = 3,
                                         max_leaf_nodes = 3)

In [8]:
score = cross_val_score(rf_classifier,x_tr,y_tr, cv= 5)
print("평균 정확도 :{:.2f}".format(score.mean()))

평균 정확도 :0.96


In [9]:
n_x_tr, n_x_ts, n_y_tr, n_y_ts = train_test_split(x,y, test_size = 0.2, 
                                          random_state = 0, stratify = y)

In [12]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier

In [22]:
rf_classifier = RandomForestClassifier(random_state = 0)

In [36]:
rf_classifier.fit(x_tr,y_tr)



RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=10,
                       n_jobs=None, oob_score=False, random_state=0, verbose=0,
                       warm_start=False)

In [46]:
rf_param_grid = {'n_estimators': [10,20,30,50,100],
              'max_depth': [2,3,4],
              'max_features' : [2,3],
              'min_samples_leaf': [1,2,3,4,5] }

In [47]:
rf_classifier_grid = GridSearchCV(rf_classifier, param_grid = rf_param_grid, 
                       scoring ='accuracy',n_jobs= -1, cv= 5, verbose = 1)

In [48]:
rf_classifier_grid.fit(x_tr, y_tr)

Fitting 5 folds for each of 150 candidates, totalling 750 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    8.0s
[Parallel(n_jobs=-1)]: Done 512 tasks      | elapsed:   14.5s
[Parallel(n_jobs=-1)]: Done 750 out of 750 | elapsed:   17.8s finished


GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=RandomForestClassifier(bootstrap=True, class_weight=None,
                                              criterion='gini', max_depth=None,
                                              max_features='auto',
                                              max_leaf_nodes=None,
                                              min_impurity_decrease=0.0,
                                              min_impurity_split=None,
                                              min_samples_leaf=1,
                                              min_samples_split=2,
                                              min_weight_fraction_leaf=0.0,
                                              n_estimators=10, n_jobs=None,
                                              oob_score=False, random_state=0,
                                              verbose=0, warm_start=False),
             iid='warn', n_jobs=-1,
             param_grid={'max_dep

In [52]:
print("가장 높은 정확도 : {0:.2f}".format(rf_classifier_grid.best_score_))
print("최적의 hyperparamter :",rf_classifier_grid.best_params_)

가장 높은 정확도 : 0.96
최적의 hyperparamter : {'max_depth': 4, 'max_features': 3, 'min_samples_leaf': 2, 'n_estimators': 100}
