In [55]:
import numpy as np
from numpy import mean
from numpy import std
from sklearn.model_selection import cross_val_score, RepeatedStratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import StackingClassifier, RandomForestClassifier
from matplotlib import pyplot
from sklearn.metrics import classification_report, accuracy_score
from sklearn.multiclass import OneVsRestClassifier
import pandas as pd
import warnings
warnings.filterwarnings(action='ignore')

In [56]:
def get_dataset():
    x_train, x_test, y_train, y_test = pd.read_pickle('./data/forest/forest.pkl')
    return x_train, x_test, y_train, y_test    

In [57]:
def get_stacking():    
    level0 = []
    level0.append(('RF', RandomForestClassifier()))
    level0.append(('SVM', SVC(C=10, gamma=0.1)))

    level1 = LogisticRegression()

    ensemble = StackingClassifier(estimators=level0, final_estimator=level1, cv=5)
    model = OneVsRestClassifier(ensemble)
    return model

In [58]:
def get_models():
    models = dict()
    models['RF'] = RandomForestClassifier()
    models['SVM'] = SVC(C=10, gamma=0.1)
    models['STACKING'] = get_stacking()
    return models

In [59]:
def evaluate_model(model):    
    model.fit(x_train, y_train)
    y_pred = model.predict(x_test)
    score = accuracy_score(y_test, y_pred) 
    print('Accuracy: {:.3f}'.format(score))

In [60]:
x_train, x_test, y_train, y_test = get_dataset()

models = get_models()
results, names = list(), list()
for name, model in models.items():
    print(name + '--' * 10 )
    evaluate_model(model)
    print('')

RF--------------------
Accuracy: 0.893

SVM--------------------
Accuracy: 0.880

STACKING--------------------
Accuracy: 0.905



In [61]:
# from sklearn.model_selection import GridSearchCV

# x_train, x_test, y_train, y_test = get_dataset()
# X = np.concatenate([x_train, x_test])
# y = np.concatenate([y_train, y_test])

# svm_rbf = SVC(kernel = 'rbf',random_state=100)
# params = {'C': [0.001, 0.01, 0.1, 1, 10, 25, 50, 100],
#              'gamma':[0.001, 0.01, 0.1, 1, 10, 25, 50, 100]}

# grid_svm = GridSearchCV(svm_rbf, param_grid = params, cv = 5)
# grid_svm.fit(X, y)

# result = pd.DataFrame(grid_svm.cv_results_['params'])
# result['mean_test_score'] = grid_svm.cv_results_['mean_test_score']
# result.sort_values(by='mean_test_score', ascending=False)