In [1]:
import sklearn

In [3]:
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [4]:
wine = load_wine()
wine_data = wine.data
wine_label = wine.target

In [5]:
print(wine.keys())

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names'])


In [6]:
print(wine_data.shape)

(178, 13)


In [8]:
print(wine.target_names)

['class_0' 'class_1' 'class_2']


In [9]:
print(wine.target)

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2]


In [12]:
print(wine.feature_names)

['alcohol', 'malic_acid', 'ash', 'alcalinity_of_ash', 'magnesium', 'total_phenols', 'flavanoids', 'nonflavanoid_phenols', 'proanthocyanins', 'color_intensity', 'hue', 'od280/od315_of_diluted_wines', 'proline']


In [11]:
print(wine.DESCR)

.. _wine_dataset:

Wine recognition dataset
------------------------

**Data Set Characteristics:**

    :Number of Instances: 178 (50 in each of three classes)
    :Number of Attributes: 13 numeric, predictive attributes and the class
    :Attribute Information:
 		- Alcohol
 		- Malic acid
 		- Ash
		- Alcalinity of ash  
 		- Magnesium
		- Total phenols
 		- Flavanoids
 		- Nonflavanoid phenols
 		- Proanthocyanins
		- Color intensity
 		- Hue
 		- OD280/OD315 of diluted wines
 		- Proline

    - class:
            - class_0
            - class_1
            - class_2
		
    :Summary Statistics:
    
                                   Min   Max   Mean     SD
    Alcohol:                      11.0  14.8    13.0   0.8
    Malic Acid:                   0.74  5.80    2.34  1.12
    Ash:                          1.36  3.23    2.36  0.27
    Alcalinity of Ash:            10.6  30.0    19.5   3.3
    Magnesium:                    70.0 162.0    99.7  14.3
    Total Phenols:                0

In [13]:
X_train, X_test, y_train, y_test = train_test_split(wine_data,
                                                    wine_label,
                                                    test_size = 0.2,
                                                    random_state = 9 )

# Decision Tree

In [14]:
from sklearn.tree import DecisionTreeClassifier

In [16]:
decision_tree = DecisionTreeClassifier(random_state = 11)
decision_tree.fit(X_train, y_train)
y_pred = decision_tree.predict(X_test)
DT_report = classification_report(y_test, y_pred)
print(DT_report)

              precision    recall  f1-score   support

           0       1.00      0.94      0.97        17
           1       0.92      1.00      0.96        11
           2       1.00      1.00      1.00         8

    accuracy                           0.97        36
   macro avg       0.97      0.98      0.98        36
weighted avg       0.97      0.97      0.97        36



# Random Forest

In [17]:
from sklearn.ensemble import RandomForestClassifier

In [18]:
random_forest = RandomForestClassifier(random_state = 11)
random_forest.fit(X_train, y_train)
y_pred = random_forest.predict(X_test)
RF_report = classification_report(y_test, y_pred)
print(RF_report)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        17
           1       1.00      1.00      1.00        11
           2       1.00      1.00      1.00         8

    accuracy                           1.00        36
   macro avg       1.00      1.00      1.00        36
weighted avg       1.00      1.00      1.00        36



# SVM

In [19]:
from sklearn import svm

In [20]:
svm_model = svm.SVC()
svm_model.fit(X_train, y_train)
y_pred = svm_model.predict(X_test)
SVM_report = classification_report(y_test, y_pred)
print(SVM_report)

              precision    recall  f1-score   support

           0       0.94      0.94      0.94        17
           1       0.67      0.73      0.70        11
           2       0.43      0.38      0.40         8

    accuracy                           0.75        36
   macro avg       0.68      0.68      0.68        36
weighted avg       0.74      0.75      0.75        36



# SGD

In [21]:
from sklearn.linear_model import SGDClassifier

In [22]:
sgd_model = SGDClassifier()
sgd_model.fit(X_train, y_train)
y_pred = sgd_model.predict(X_test)
SGD_report = classification_report(y_test, y_pred)
print(SGD_report)

              precision    recall  f1-score   support

           0       0.80      0.94      0.86        17
           1       0.62      0.91      0.74        11
           2       0.00      0.00      0.00         8

    accuracy                           0.72        36
   macro avg       0.48      0.62      0.54        36
weighted avg       0.57      0.72      0.63        36



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


# Logistic Regression

In [23]:
from sklearn.linear_model import LogisticRegression

In [24]:
logistic_model = LogisticRegression()
logistic_model.fit(X_train, y_train)
y_pred = logistic_model.predict(X_test)
LR_report = classification_report(y_test, y_pred)
print(LR_report)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        17
           1       1.00      1.00      1.00        11
           2       1.00      1.00      1.00         8

    accuracy                           1.00        36
   macro avg       1.00      1.00      1.00        36
weighted avg       1.00      1.00      1.00        36



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


# 모델 평가:
  'precision'과 'recall'은 양성과 음성, 즉 두 가지 데이터를 예측하고 어디에 중점 두어 문제를 판단할 때 참고하기 좋은 것 같으며, 와인 등급(0~2)을 예측하는 데에 아주 적합하다고 생각들지 않는다. 따라서 단순하게 와인의 등급을 얼마나 정확하게 예측하는가가 중요하다고 생각하고 정확도가 1로 완벽하게 예측한 'Random Forest' 모델과 'Logistic Regression' 모델이 학습 잘 된 모델이라고 생각합니다.
  