## 손글씨 정확도 예측
- 의사결정트리, 로지스틱, 랜덤포레스트 등을 사용하여 정확도를 추정하자.

In [7]:
from matplotlib.streamplot import Grid
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

cancer = load_digits()
cancer_data = cancer['data']
cancer_target = cancer['target']
X_train, X_test, y_train, y_test = train_test_split(cancer_data, cancer_target, test_size=0.3, random_state=11)

model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print("로지스틱 회귀분석-------")
print("훈련셋 : ", model.score(X_train, y_train))
print("테스트셋 : ", model.score(X_test, y_test))

from sklearn.tree import DecisionTreeClassifier
model = DecisionTreeClassifier()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print("디시젼트리 회귀분석-------")
print("훈련셋 : ", model.score(X_train, y_train))
print("테스트셋 : ", model.score(X_test, y_test))

from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(random_state=0, max_depth=5, n_estimators=100)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print("랜덤포레스트 회귀분석-------")
print("훈련셋 : ", model.score(X_train, y_train))
print("테스트셋 : ", model.score(X_test, y_test))


# 랜덤포레스트의 하이퍼 파라미터 튜닝
from sklearn.model_selection import GridSearchCV
grid_params = {
    'max_depth':[8,16,24],
    'min_samples_leaf':[1,6,12],
    'min_samples_split':[2,8,16],
    'n_estimators':[10, 50, 100]
}
# n_jobs : 시스템 내의 프로세서
model = RandomForestClassifier(random_state=0, n_jobs=-1)
grid_cv = GridSearchCV(model, param_grid = grid_params, cv=2, n_jobs=-1)
grid_cv.fit(X_train, y_train)

print("최적의 파라미터", grid_cv.best_params_)
print("최고의 예측 정확도", grid_cv.best_score_)

# 그라디언트 부스팅
from sklearn.ensemble import GradientBoostingClassifier

model = GradientBoostingClassifier(random_state=0, max_depth=1)
model.fit(X_train, y_train)
print("그라디언트 부스팅-------")
print("훈련셋 : ", model.score(X_train, y_train))
print("테스트셋 : ", model.score(X_test, y_test))

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


로지스틱 회귀분석-------
훈련셋 :  1.0
테스트셋 :  0.9574074074074074
디시젼트리 회귀분석-------
훈련셋 :  1.0
테스트셋 :  0.8574074074074074
랜덤포레스트 회귀분석-------
훈련셋 :  0.9713603818615751
테스트셋 :  0.9425925925925925
최적의 파라미터 {'max_depth': 16, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}
최고의 예측 정확도 0.9626201229329742
그라디언트 부스팅-------
훈련셋 :  0.9801113762927606
테스트셋 :  0.9333333333333333
