# 필요한 모듈 설정

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from matplotlib import patches
%matplotlib inline

# 데이터 로드

In [None]:
wine = pd.read_csv('WineQT.csv')
wine.drop(["Id"],axis=1, inplace=True) # 필요없는 'Id' column 제거
wine.head()

## 데이터 크기, 결측치 확인

In [None]:
wine.shape

In [None]:
wine.isnull().sum()

# Modeling 

## Scaled, Normalization

In [None]:
from sklearn import preprocessing
from imblearn.over_sampling import SMOTE 

oversample = SMOTE()
features, labels =  oversample.fit_resample(wine.drop(["quality"],axis=1), wine["quality"])
scaler = preprocessing.MinMaxScaler()
scaled_data = pd.DataFrame(scaler.fit_transform(features), columns=features.columns)
scaled_data

In [None]:
normalized_arr = preprocessing.normalize(scaled_data)
normalized_data = pd.DataFrame(normalized_arr, columns=features.columns)
normalized_data

In [None]:
from numpy import log
import warnings
warnings.filterwarnings(action='ignore')
unskew_data = normalized_data.copy(deep=True)

## train / test split

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(unskew_data, labels, test_size=0.2, random_state=42)

## Classifier Models

In [None]:
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import classification_report
from sklearn import metrics

In [None]:
rnd_clf = RandomForestClassifier(random_state=42)
log_clf = LogisticRegression(random_state=42)
dt_clf = DecisionTreeClassifier(random_state=42)
kn_clf = KNeighborsClassifier()
svm_clf = SVC(random_state=42)

models = [rnd_clf, dt_clf, kn_clf, svm_clf, log_clf]

## fit

In [None]:
for m in models:
    m.fit(X_train, y_train)

## predict

In [None]:
accuracy = []
precision = []
recall = []
F1 = []

for m in models:
    y_pred = m.predict(X_test)
    print(m.__class__.__name__,'Acc: ', accuracy_score(y_test,y_pred))
    print(m.__class__.__name__,'Precision: ', precision_score(y_test,y_pred, average = "macro"))
    print(m.__class__.__name__,'Recall: ', recall_score(y_test,y_pred, average = "macro"))
    print(m.__class__.__name__,'F1: ', f1_score(y_test,y_pred, average = "macro"))
    
    print("")
    
    acc = accuracy_score(y_test,y_pred)
    pcs = precision_score(y_test,y_pred, average = "macro")
    rec = recall_score(y_test,y_pred, average = "macro")
    f1 = f1_score(y_test,y_pred, average = "macro")
    
    accuracy.append(acc)
    precision.append(pcs)
    recall.append(rec)
    F1.append(f1)
    
    print("")

# Metrics Score

In [None]:
import math

metrics = [accuracy, precision, recall , F1]
total = []

for i in range(5):
    avg = 0
    for mt in metrics:
        avg += mt[i]*0.25
    avg = round(avg,2)*100
    avg = math.trunc(avg)
    total.append(avg)
        
print(total)