In [None]:
def preprocessing(df):
    X = df.iloc[:, :-1]
    y = df.iloc[:, -1]
    
    from sklearn.model_selection import train_test_split
    X_tn, X_te, y_tn, y_te = train_test_split(X, y, random_state=0)

    from sklearn.preprocessing import StandardScaler
    scaler = StandardScaler()
    scaler.fit(X_tn)
    X_tn_std = scaler.transform(X_tn)
    X_te_std = scaler.transform(X_te)
    
    return X_tn_std, X_te_std, y_tn, y_te

def print_result_classifier(y, pred):
    from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
    from sklearn.metrics import confusion_matrix, classification_report
    print(accuracy_score(y, pred))
    print(precision_score(y, pred, average='macro'))
    print(recall_score(y, pred, average='macro'))
    print(f1_score(y, pred, average='macro'))
    print(confusion_matrix(y, pred))
    print(classification_report(y, pred))
    
def print_result(y, pred):
    from sklearn.metrics import mean_squared_error, r2_score
    print(f'mse: {mean_squared_error(y, pred):.6f}, r2_score: {r2_score(y, pred):.6f}')

In [None]:
import pandas as pd
df_iris = pd.read_csv('./data/iris.csv')
df_house = pd.read_csv('./data/house_prices.csv')
df_wine = pd.read_csv('./data/wine_data.csv')

| 분류                  | 분류기                  | 회귀기                                     |
|-----------------------|------------------------|--------------------------------------------|
| KNeighbors            | KNeighborsClassifier   | KNeighborsRegressor                        |
| Regression            | LogisticRegression     | LinearRegression, Ridge, Lasso, ElasticNet |
| GaussianNB            | GaussianNB             |                                            |
| DecisionTree          | DecisionTreeClassifier | DecisionTreeRegressor                      |
| SVM                   | SVC                    | SVR                                        |
| Voting                | VotingClassifier       | VotingRegressor                            |
| RandomForest          | RandomForestClassifier | RandomForestRegressor                      |
| Bagging               | BaggingClassifier      | BaggingRegressor                           |


분류

In [None]:
# KNN 분류
X_tn_std, X_te_std, y_tn, y_te = preprocessing(df_iris)

from sklearn.neighbors import KNeighborsClassifier
clf = KNeighborsClassifier(n_neighbors=2)
clf.fit(X_tn_std, y_tn)
pred = clf.predict(X_te_std)
print_result_classifier(y_te, pred)

In [None]:
# 로지스틱 회귀 분류
X_tn_std, X_te_std, y_tn, y_te = preprocessing(df_wine)

from sklearn.linear_model import LogisticRegression
clf = LogisticRegression(penalty='l2')
clf.fit(X_tn_std, y_tn)
# print(clf.coef_, clf.intercept_)
# clf.predict_proba(X_te_std)
print_result_classifier(y_te, clf.predict(X_te_std))

In [None]:
# 나이브 베이즈 분류
X_tn_std, X_te_std, y_tn, y_te = preprocessing(df_wine)

from sklearn.naive_bayes import GaussianNB
clf = GaussianNB()
clf.fit(X_tn_std, y_tn)
print_result_classifier(y_te, clf.predict(X_te_std))

In [None]:
# 의사 결정 나무 분류
X_tn_std, X_te_std, y_tn, y_te = preprocessing(df_wine)

from sklearn.tree import DecisionTreeClassifier
clf = DecisionTreeClassifier(random_state=0)
clf.fit(X_tn_std, y_tn)
print_result_classifier(y_te, clf.predict(X_te_std))

In [None]:
# 서포트 벡터 머신 분류
X_tn_std, X_te_std, y_tn, y_te = preprocessing(df_wine)

from sklearn.svm import SVC
clf = SVC(kernel='linear')
clf.fit(X_tn_std, y_tn)
print_result_classifier(y_te, clf.predict(X_te_std))

In [None]:
# 보팅 분류
X_tn_std, X_te_std, y_tn, y_te = preprocessing(df_wine)

from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import VotingClassifier

clf = VotingClassifier(
    estimators=[
        ('logistic_regression', LogisticRegression()),
        ('svc', SVC(kernel='linear')),
        ('gaussianNB', GaussianNB())
    ],
    voting='hard',
    weights=[1, 1, 1]
)
clf.fit(X_tn_std, y_tn)
print_result_classifier(y_te, clf.predict(X_te_std))

In [None]:
# 랜덤 포레스트 분류
X_tn_std, X_te_std, y_tn, y_te = preprocessing(df_wine)

from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier(max_depth=2,
                             random_state=0)
clf.fit(X_tn_std, y_tn)
print_result_classifier(y_te, clf.predict(X_te_std))

In [None]:
# 배깅 분류
X_tn_std, X_te_std, y_tn, y_te = preprocessing(df_wine)

from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import BaggingClassifier
clf = BaggingClassifier(estimator=GaussianNB(),
                        n_estimators=10,
                        random_state=0)
clf.fit(X_tn_std, y_tn)
print_result_classifier(y_te, clf.predict(X_te_std))

회귀

In [None]:
# KNN 회귀
X_tn_std, X_te_std, y_tn, y_te = preprocessing(df_house)

from sklearn.neighbors import KNeighborsRegressor
clf = KNeighborsRegressor(n_neighbors=2)
clf.fit(X_tn_std, y_tn)
pred = clf.predict(X_te_std)
print_result(y_te, pred)

In [None]:
# 선형 회귀
X_tn_std, X_te_std, y_tn, y_te = preprocessing(df_house)

from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
clf_linear = LinearRegression()
clf_linear.fit(X_tn_std, y_tn)
clf_ridge = Ridge(alpha=1)
clf_ridge.fit(X_tn_std, y_tn)
clf_lasso = Lasso(alpha=0.01)
clf_lasso.fit(X_tn_std, y_tn)
clf_elastic = ElasticNet(alpha=0.01, l1_ratio=0.01)
clf_elastic.fit(X_tn_std, y_tn)
print_result(y_te, clf_linear.predict(X_te_std))
print_result(y_te, clf_ridge.predict(X_te_std))
print_result(y_te, clf_lasso.predict(X_te_std))
print_result(y_te, clf_elastic.predict(X_te_std))

In [None]:
# 의사 결정 나무 회귀
X_tn_std, X_te_std, y_tn, y_te = preprocessing(df_house)

from sklearn.tree import DecisionTreeRegressor
clf = DecisionTreeRegressor(random_state=0)
clf.fit(X_tn_std, y_tn)
pred = clf.predict(X_te_std)
print_result(y_te, pred)

In [None]:
# 서포트 벡터 머신 회귀
X_tn_std, X_te_std, y_tn, y_te = preprocessing(df_house)

from sklearn.svm import SVR
clf = SVR(kernel='linear')
clf.fit(X_tn_std, y_tn)
pred = clf.predict(X_te_std)
print_result(y_te, pred)

In [None]:
# 보팅 회귀
X_tn_std, X_te_std, y_tn, y_te = preprocessing(df_house)

from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import VotingRegressor

clf = VotingRegressor(
    estimators=[
        ('linear_regression', LinearRegression()),
        ('svm', SVR(kernel='linear')),
        ('tree', DecisionTreeRegressor())
    ],
    weights=[1, 1, 1]
)
clf.fit(X_tn_std, y_tn)
pred = clf.predict(X_te_std)
print_result(y_te, pred)

In [None]:
# 랜덤 포레스트 회귀
X_tn_std, X_te_std, y_tn, y_te = preprocessing(df_house)

from sklearn.ensemble import RandomForestRegressor
clf = RandomForestRegressor(max_depth=2,
                            random_state=0)
clf.fit(X_tn_std, y_tn)
pred = clf.predict(X_te_std)
print_result(y_te, pred)

In [None]:
# 배깅 회귀
X_tn_std, X_te_std, y_tn, y_te = preprocessing(df_house)

from sklearn.svm import SVR
from sklearn.ensemble import BaggingRegressor
clf = BaggingRegressor(estimator=SVR(),
                       n_estimators=10,
                       random_state=0)
clf.fit(X_tn_std, y_tn)
pred = clf.predict(X_te_std)
print_result(y_te, pred)