In [1]:
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [2]:
wine = load_wine() 
print(wine.data.shape)
print(wine.target.shape)

(178, 13)
(178,)


In [None]:
import pandas as pd
wine_df = pd.DataFrame(data=wine_data, columns = wine.feature_name)

In [4]:
wine.keys()

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names'])

In [5]:
print(wine.DESCR)

.. _wine_dataset:

Wine recognition dataset
------------------------

**Data Set Characteristics:**

    :Number of Instances: 178 (50 in each of three classes)
    :Number of Attributes: 13 numeric, predictive attributes and the class
    :Attribute Information:
 		- Alcohol
 		- Malic acid
 		- Ash
		- Alcalinity of ash  
 		- Magnesium
		- Total phenols
 		- Flavanoids
 		- Nonflavanoid phenols
 		- Proanthocyanins
		- Color intensity
 		- Hue
 		- OD280/OD315 of diluted wines
 		- Proline

    - class:
            - class_0
            - class_1
            - class_2
		
    :Summary Statistics:
    
                                   Min   Max   Mean     SD
    Alcohol:                      11.0  14.8    13.0   0.8
    Malic Acid:                   0.74  5.80    2.34  1.12
    Ash:                          1.36  3.23    2.36  0.27
    Alcalinity of Ash:            10.6  30.0    19.5   3.3
    Magnesium:                    70.0 162.0    99.7  14.3
    Total Phenols:                0

# 데이터 정규화

In [22]:
# (1) 필요한 모듈 import
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report
from sklearn import preprocessing

mm_scaler = preprocessing.MinMaxScaler()


# (2) 데이터 준비
wine = load_wine()
wine_data = wine.data
wine_label = wine.target

# (3) train, test 데이터 분리
X_train, X_test, y_train, y_test = train_test_split(wine_data, 
                                                    wine_label, 
                                                    test_size=0.2, 
                                                    random_state=7)

X_train_minmax = mm_scaler.fit_transform(X_train)

##### 이코드 실행하면은 에러가 나는데, 왜 그럴가? y_train은 별도로 FIT을 했는데.....
#y_train = mm_scaler.transform(y_train)

X_test_minmax = mm_scaler.fit_transform(X_test)
#y_test_minmax = mm_scaler.transform(y_test)



# Decision Tree 사용해 보기

In [24]:
# (4) 모델 학습
decision_tree = DecisionTreeClassifier(random_state=32)
decision_tree.fit(X_train_minmax, y_train)

# (5) 예측하기
y_pred = decision_tree.predict(X_test_minmax)



# (6) 평가
print(classification_report(y_pred, y_test))

              precision    recall  f1-score   support

           0       1.00      0.88      0.93         8
           1       0.94      0.94      0.94        17
           2       0.92      1.00      0.96        11

    accuracy                           0.94        36
   macro avg       0.95      0.94      0.94        36
weighted avg       0.95      0.94      0.94        36



# Random Forest 사용해 보기

In [26]:
from sklearn.ensemble import RandomForestClassifier


# (4) 모델 학습
RandomForest = RandomForestClassifier(random_state=32)
RandomForest.fit(X_train_minmax, y_train)

# (5) 예측하기
y_pred = RandomForest.predict(X_test_minmax)



# (6) 평가
print(classification_report(y_pred, y_test))



              precision    recall  f1-score   support

           0       1.00      1.00      1.00         7
           1       1.00      1.00      1.00        17
           2       1.00      1.00      1.00        12

    accuracy                           1.00        36
   macro avg       1.00      1.00      1.00        36
weighted avg       1.00      1.00      1.00        36



# SVM 사용해 보기

In [19]:
from sklearn import svm

# (4) 모델 학습
#clf = svm.SVC(decision_function_shape='ovo')
clf = svm.SVC()
clf.fit(X_train_minmax, y_train)

# (5) 예측하기
y_pred = clf.predict(X_test)


# (6) 평가
print(classification_report(y_pred, y_test))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       1.00      0.47      0.64        36
           2       0.00      0.00      0.00         0

    accuracy                           0.47        36
   macro avg       0.33      0.16      0.21        36
weighted avg       1.00      0.47      0.64        36



  _warn_prf(average, modifier, msg_start, len(result))


# SGD Classifier 사용해 보기

In [11]:
from sklearn.linear_model import SGDClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

# (4) 모델 학습
clf = make_pipeline(StandardScaler(), SGDClassifier(max_iter=1000, tol=1e-3))
clf.fit(X_train, y_train)

# (5) 예측하기
y_pred = clf.predict(X_test)



# 평가
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       1.00      1.00      1.00         7
           1       0.94      0.94      0.94        17
           2       0.92      0.92      0.92        12

    accuracy                           0.94        36
   macro avg       0.95      0.95      0.95        36
weighted avg       0.94      0.94      0.94        36



# Logistic Regression 사용해 보기

In [27]:
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

# (4) 모델 학습
clf = LogisticRegression(random_state=32).fit(X_train_minmax, y_train)
clf.fit(X_train_minmax, y_train)

# (5) 예측하기
y_pred = clf.predict(X_test_minmax)



# 평가
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.88      1.00      0.93         7
           1       1.00      0.88      0.94        17
           2       0.92      1.00      0.96        12

    accuracy                           0.94        36
   macro avg       0.93      0.96      0.94        36
weighted avg       0.95      0.94      0.94        36

