In [1]:
from numpy import hstack
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import LabelEncoder
import xgboost
import pandas as pd
from sklearn.ensemble import (RandomForestClassifier, AdaBoostClassifier,
                              GradientBoostingClassifier, ExtraTreesClassifier)


In [2]:
def get_models():
    models = list()
    # models.append(('lr', LogisticRegression()))
    # models.append(('knn', KNeighborsClassifier()))
    # models.append(('dt', DecisionTreeClassifier()))
    models.append(('abc', AdaBoostClassifier()))
    models.append(('gbc', GradientBoostingClassifier()))
    models.append(('rfc', RandomForestClassifier()))
    models.append(('etc', ExtraTreesClassifier()))
    # models.append(('svm', SVC()))
    # models.append(('bayes', GaussianNB()))
    return models


In [3]:
data = pd.read_csv('dataset/augmented.csv')
data2 = pd.read_csv('test.csv')
data2 = data2.drop(['Class'], axis=1)
data2 = data2.drop(['Unnamed: 0'], axis=1)
data = data.drop(['Unnamed: 0'], axis=1)
# data = data.drop(['date'], axis=1)
data


Unnamed: 0,T,RH,LW,WS,GR,Class
0,26.6,67.0,31,1,27,1
1,28.9,62.0,22,5,38,1
2,25.1,93.0,24,1,52,1
3,27.6,72.0,27,2,53,1
4,21.7,71.0,38,2,37,1
...,...,...,...,...,...,...
995,26.1,77.0,37,2,44,1
996,28.5,79.0,24,1,48,1
997,20.3,80.0,29,1,56,1
998,21.0,71.0,33,2,45,1


In [4]:
data2

Unnamed: 0,T,RH,LW,WS,GR
0,32.300000,47.000000,24.000000,3.000000,51.000000
1,31.600000,45.000000,23.000000,2.000000,49.000000
2,30.400000,43.000000,21.000000,4.000000,46.000000
3,31.700000,46.000000,24.000000,3.000000,50.000000
4,32.500000,50.000000,27.000000,1.000000,53.000000
...,...,...,...,...,...
251,22.596744,70.543683,23.108311,2.193107,32.690875
252,22.762959,70.534441,23.340512,2.201344,33.070252
253,22.917562,70.511004,23.564296,2.210169,33.429773
254,23.066089,70.495807,23.785557,2.217117,33.777363


In [5]:
# target = data[['Class']]
# target.head()
# le = LabelEncoder()
# t = le.fit_transform(target)
# print(t)
# data['Class'] = t
# print(data.head(25))


In [6]:
data

Unnamed: 0,T,RH,LW,WS,GR,Class
0,26.6,67.0,31,1,27,1
1,28.9,62.0,22,5,38,1
2,25.1,93.0,24,1,52,1
3,27.6,72.0,27,2,53,1
4,21.7,71.0,38,2,37,1
...,...,...,...,...,...,...
995,26.1,77.0,37,2,44,1
996,28.5,79.0,24,1,48,1
997,20.3,80.0,29,1,56,1
998,21.0,71.0,33,2,45,1


In [7]:
X = data[['T', 'RH', 'LW', 'WS', 'GR']]
predicated_X = data2[['T', 'RH', 'LW', 'WS', 'GR']]
y = data['Class']
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

In [8]:
X_train, X_val, y_train, y_val = train_test_split(
    X_train, y_train, test_size=0.2, random_state=42)

In [9]:
def fit_ensemble(models, X_train, X_val, y_train, y_val):
    meta_X = list()
    for name, model in models:
        # fit in training set
        model.fit(X_train, y_train)
        # predict on hold out set
        yhat = model.predict(X_val)
        # reshape predictions into a matrix with one column
        yhat = yhat.reshape(len(yhat), 1)
        # store predictions as input for blending
        meta_X.append(yhat)
    # create 2d array from predictions, each set is an input feature
    meta_X = hstack(meta_X)
    # define blending model
    blender = xgboost.XGBClassifier()
    # fit on predictions from base models
    blender.fit(meta_X, y_val)
    return blender

def predict_ensemble(models, blender, X_test):
    # make predictions with base models
    meta_X = list()
    for name, model in models:
        # predict with base model
        yhat = model.predict(X_test)
        yhat = yhat.reshape(len(yhat), 1)
        # store prediction
        meta_X.append(yhat)
    # create 2d array from predictions, each set is an input feature
    meta_X = hstack(meta_X)
    # predict
    return blender.predict(meta_X)

# create the base models
models = get_models()
# train the blending ensemble
blender = fit_ensemble(models, X_train, X_val, y_train, y_val)
# make predictions on test set
yhat = predict_ensemble(models, blender, X_test)
# evaluate predictions
score = accuracy_score(y_test, yhat)
print('Blending Accuracy: %.3f' % (score*100))

Blending Accuracy: 98.000


In [10]:
abc = AdaBoostClassifier()
gbc = GradientBoostingClassifier()
rfc = RandomForestClassifier()
etc = ExtraTreesClassifier()

In [11]:
models = [abc, gbc, rfc, etc]
for model in models:
    model.fit(X_train, y_train)
    predicted = model.predict(X_test)
    accuracy = accuracy_score(predicted, y_test)
    model_name = model.__class__.__name__
    print(f'{model_name}: {accuracy:.4f}')


AdaBoostClassifier: 0.9750
GradientBoostingClassifier: 0.9750
RandomForestClassifier: 0.9800
ExtraTreesClassifier: 0.9800
