<a href="https://colab.research.google.com/github/ok-yuto/CA_Tech_Lounge/blob/main/ca_tech_lounge_task1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_covtype
from sklearn.model_selection import train_test_split, cross_val_score, cross_validate, KFold, StratifiedKFold, RepeatedStratifiedKFold, GroupKFold, GridSearchCV
from sklearn.metrics import accuracy_score, f1_score,  precision_recall_curve, auc, roc_curve
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.decomposition import PCA
# from sklearn.manifold import TSNE, MDS
from sklearn import svm
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier
# import xgboost as xgb
import lightgbm as lgb
import matplotlib.pyplot as plt
import seaborn as sns
# import pandas_profiling as pdp
# from mlxtend.plotting import plot_decision_regions
# !pip install optuna
# import optuna
# from optuna.integration import lightgbm as lgb
import functools


def objective_svc(X_train_std, X_test_std, y_train, y_test, trial):
    #目的関数
    params = {
        'kernel': trial.suggest_categorical('kernel', ['linear','rbf','sigmoid']),
        'C': trial.suggest_loguniform('C', 1e+0, 1e+2/2),
        'gamma': trial.suggest_loguniform('gamma', 1e-3, 3.0),
    }
    mdl = svm.SVC(**params)
    mdl.fit(X_train_std, y_train)
    pred_test = mdl.predict(X_test_std)
    accuracy_test = accuracy_score(y_test, pred_test)
    return 1.0 - accuracy_test


if __name__ == "__main__":

    # Read Dataset
    covtype = fetch_covtype()
    x = covtype.data
    y = covtype.target
    df = pd.DataFrame(x, columns = covtype.feature_names).assign(Cover_Type=y)
    print(df)

    # Cross Validation
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)
    rskf = RepeatedStratifiedKFold(n_splits=4, n_repeats=5, random_state=0)

    # LightGBM Parameter
    num_round = 50
    early_rounds = 25
    lgb_params = {'objective': 'multiclass', 'num_class': 21,'metric': 'multi_logloss', 'seed': 0, 'verbose': 0, 'force_col_wise': 'true'}
    results_lgb = np.array([])

    # Other Machine Learning Model
    ss = StandardScaler()
    # pca = PCA(n_components=10)
    knn = KNeighborsClassifier(n_neighbors=5)
    # mlr = LogisticRegression(multi_class='multinomial', random_state=0)
    # mnb = MultinomialNB()
    svc = svm.SVC(kernel='rbf', max_iter=50)
    rfc = RandomForestClassifier(max_depth=5, random_state=0)
    results_knn = np.array([])
    # results_mlr = np.array([])
    # results_mnb = np.array([])
    results_svc = np.array([])
    results_rfc = np.array([])

    for train, valid in rskf.split(x_train, y_train):

      # LightGBM
      lgb_train = lgb.Dataset(x_train[train], y_train[train], feature_name=covtype.feature_names)
      lgb_eval = lgb.Dataset(x_train[valid], y_train[valid], feature_name=covtype.feature_names, reference=lgb_train)
      lgb.test = lgb.Dataset(x_test, y_test)
      lgb_evals_result = {}
      cla_lgb = lgb.train(params=lgb_params, train_set=lgb_train, num_boost_round=num_round,
                          valid_names=['train', 'valid'], valid_sets=[lgb_train, lgb_eval])
      preds = cla_lgb.predict(x_test, num_iteration=cla_lgb.best_iteration)
      y_pred_lgb = []
      for x in preds:
          y_pred_lgb.append(np.argmax(x))
      result = accuracy_score(y_test, y_pred_lgb)
      print("LGB")
      print(accuracy_score(y_test, y_pred_lgb))  # , f1_score(y_test, y_pred_lgb, average='macro'))
      lgb.plot_importance(cla_lgb)
      results_lgb = np.append(results_lgb, result)
      
      # Feature Scaling
      x_train_std = ss.fit_transform(x_train[train])
      x_valid_std = ss.transform(x_train[valid])

      # k-Nearest Neighbor
      knn.fit(x_train_std, y_train[train])
      y_pred = knn.predict(x_valid_std)
      result = accuracy_score(y_train[valid], y_pred)
      # result = f1_score(y_train[valid], y_pred)
      print("kNN:")
      print(result)
      results_knn = np.append(results_knn, result)
      '''
      # Multinomial Logistic Regression
      mlr.fit(x_train_std, y_train[train])
      y_pred = mlr.predict(x_valid_std)
      result = accuracy_score(y_train[valid], y_pred)
      # result = f1_score(y_train[valid], y_pred)
      print("MLR:")
      print(result)
      results_mlr = np.append(results_mlr, result)

      # Multinomial Naive Bayes
      mnb.fit(x_train_std, y_train[train])
      y_pred = mnb.predict(x_valid_std)
      result = accuracy_score(y_train[valid], y_pred)
      # result = f1_score(y_train[valid], y_pred)
      print("MNB:")
      print(result)
      results_mnb = np.append(results_mnb, result)
      '''
      # Support Vector Machine (RBF)
      svc.fit(x_train_std, y_train[train])
      y_pred = svc.predict(x_valid_std)
      result = accuracy_score(y_train[valid], y_pred)
      # result = f1_score(y_train[valid], y_pred)
      print("SVC:")
      print(result)
      results_svc = np.append(results_svc, result)

      # Random Forest Classifier
      rfc.fit(x_train[train], y_train[train])
      y_pred = rfc.predict(x_train[valid])
      result = accuracy_score(y_train[valid], y_pred)
      # result = f1_score(y_train[valid], y_pred)
      print("RFC:")
      print(result)
      results_rfc = np.append(results_rfc, result)

    # Output Results
    print("LGB:", np.mean(results_lgb), "±", np.std(results_lgb))
    print("kNN:", np.mean(results_knn), "±", np.std(results_knn))
    # print("MLR:", np.mean(results_mlr), "±", np.std(results_mlr))
    # print("MNB:", np.mean(results_mnb), "±", np.std(results_mnb))
    print("SVC:", np.mean(results_svc), "±", np.std(results_svc))
    print("RFC:", np.mean(results_rfc), "±", np.std(results_rfc))


KeyboardInterrupt: ignored