# 1. 모듈 import

In [97]:
# 모듈 import
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

import pandas as pd

# 2. 데이터 준비

In [98]:
# 손글씨 데이터 가져오기
digits = load_digits()
digits.keys()

# 데이터 형상확인
digits_data = digits.data
digits_data.shape # = (1797, 64)
# 1797개의 데이터와 64개의 숫자로 이루어진것을 확인

# 1번째의 데이터확인
digits_data[0]

array([ 0.,  0.,  5., 13.,  9.,  1.,  0.,  0.,  0.,  0., 13., 15., 10.,
       15.,  5.,  0.,  0.,  3., 15.,  2.,  0., 11.,  8.,  0.,  0.,  4.,
       12.,  0.,  0.,  8.,  8.,  0.,  0.,  5.,  8.,  0.,  0.,  9.,  8.,
        0.,  0.,  4., 11.,  0.,  1., 12.,  7.,  0.,  0.,  2., 14.,  5.,
       10., 12.,  0.,  0.,  0.,  0.,  6., 13., 10.,  0.,  0.,  0.])

# 3. 데이터 이해하기

In [99]:
# 3-1 Feature data 지정
digits_df = pd.DataFrame(data=digits_data, columns=digits.feature_names)
digits_df

digits_df["label"] = digits.target
digits_df

# 3-2 Label data 지정
digits_label = digits.target
print(digits_label.shape)
digits_label

# 3-3 Target names 출력 = array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) 0부터 9까지의 숫자 이미지
digits.target_names

# 3-4 데이터 describe
# print(digits.DESCR)


(1797,)


array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

# 4. 데이터 분리하기

In [100]:
X_train, X_test, y_train, y_test = train_test_split(digits_data, 
                                                    digits_label, 
                                                    test_size=0.2,  # test 셋의 비율 
                                                    random_state=7) # 데이터분리시 랜덤적용

print('X_train 개수: ', len(X_train),', X_test 개수: ', len(X_test))
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)


X_train 개수:  1437 , X_test 개수:  360
(1437, 64) (1437,)
(360, 64) (360,)


# 5. 다양한 모델로 학습시키기
### Decision Tree 사용해 보기
### Random Forest 사용해 보기
### SVM 사용해 보기
### SGD Classifier 사용해 보기
### Logistic Regression 사용해 보기

In [107]:
# Decision Tree
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score # 정확도 모듈

decision_tree = DecisionTreeClassifier(random_state=32)
decision_tree.fit(X_train, y_train)
y_pred = decision_tree.predict(X_test)

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.91      0.94      0.92        32
           1       0.93      0.72      0.81        36
           2       0.96      0.73      0.83        30
           3       0.74      0.83      0.78        41
           4       0.80      0.88      0.84        32
           5       0.90      0.93      0.91        46
           6       0.93      0.88      0.90        32
           7       0.90      0.93      0.91        40
           8       0.77      0.81      0.79        42
           9       0.69      0.76      0.72        29

    accuracy                           0.84       360
   macro avg       0.85      0.84      0.84       360
weighted avg       0.85      0.84      0.84       360



In [102]:
# Random Forest
from sklearn.ensemble import RandomForestClassifier

X_train, X_test, y_train, y_test = train_test_split(digits_data, 
                                                    digits_label, 
                                                    test_size=0.2, 
                                                    random_state=21)

random_forest = RandomForestClassifier(random_state=32)
random_forest.fit(X_train, y_train)
y_pred = random_forest.predict(X_test)

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.97      0.94      0.95        32
           1       0.97      1.00      0.99        36
           2       1.00      1.00      1.00        30
           3       0.98      0.98      0.98        41
           4       0.94      0.97      0.95        32
           5       1.00      0.98      0.99        46
           6       1.00      0.97      0.98        32
           7       0.98      1.00      0.99        40
           8       0.93      0.98      0.95        42
           9       1.00      0.93      0.96        29

    accuracy                           0.97       360
   macro avg       0.98      0.97      0.97       360
weighted avg       0.98      0.97      0.98       360



In [94]:
# SVM
from sklearn import svm
svm_model = svm.SVC()

print(svm_model._estimator_type)

svm_model.fit(X_train, y_train)
y_pred = svm_model.predict(X_test)

print(classification_report(y_test, y_pred))

classifier
              precision    recall  f1-score   support

           0       1.00      0.97      0.98        32
           1       0.95      1.00      0.97        36
           2       1.00      1.00      1.00        30
           3       1.00      1.00      1.00        41
           4       0.97      1.00      0.98        32
           5       0.98      1.00      0.99        46
           6       1.00      1.00      1.00        32
           7       1.00      1.00      1.00        40
           8       0.98      0.95      0.96        42
           9       1.00      0.93      0.96        29

    accuracy                           0.99       360
   macro avg       0.99      0.99      0.99       360
weighted avg       0.99      0.99      0.99       360



In [95]:
# SGD
from sklearn.linear_model import SGDClassifier
sgd_model = SGDClassifier()

print(sgd_model._estimator_type)

sgd_model.fit(X_train, y_train)
y_pred = sgd_model.predict(X_test)

print(classification_report(y_test, y_pred))

classifier
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        32
           1       0.87      0.92      0.89        36
           2       0.97      0.93      0.95        30
           3       0.77      1.00      0.87        41
           4       1.00      0.94      0.97        32
           5       0.98      0.87      0.92        46
           6       0.97      1.00      0.98        32
           7       0.98      1.00      0.99        40
           8       0.95      0.90      0.93        42
           9       1.00      0.79      0.88        29

    accuracy                           0.94       360
   macro avg       0.95      0.94      0.94       360
weighted avg       0.94      0.94      0.94       360



In [96]:
# Logistic Regression
from sklearn.linear_model import LogisticRegression
logistic_model = LogisticRegression()

print(logistic_model._estimator_type)
logistic_model.fit(X_train, y_train)
y_pred = logistic_model.predict(X_test)

print(classification_report(y_test, y_pred))

classifier
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        32
           1       0.95      0.97      0.96        36
           2       1.00      1.00      1.00        30
           3       0.98      1.00      0.99        41
           4       0.94      0.97      0.95        32
           5       0.98      0.98      0.98        46
           6       1.00      1.00      1.00        32
           7       0.97      0.97      0.97        40
           8       1.00      0.95      0.98        42
           9       0.96      0.93      0.95        29

    accuracy                           0.98       360
   macro avg       0.98      0.98      0.98       360
weighted avg       0.98      0.98      0.98       360



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
