In [28]:
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn.ensemble import (RandomForestClassifier, AdaBoostClassifier,
                              GradientBoostingClassifier, ExtraTreesClassifier)


In [29]:
data = pd.read_csv('dataset/augmented.csv')
data = data.drop(['Unnamed: 0'], axis=1)
# data = data.drop(['date'], axis=1)
data

Unnamed: 0,T,RH,LW,WS,GR,Class
0,26.6,67.0,31,1,27,1
1,28.9,62.0,22,5,38,1
2,25.1,93.0,24,1,52,1
3,27.6,72.0,27,2,53,1
4,21.7,71.0,38,2,37,1
...,...,...,...,...,...,...
995,26.1,77.0,37,2,44,1
996,28.5,79.0,24,1,48,1
997,20.3,80.0,29,1,56,1
998,21.0,71.0,33,2,45,1


In [30]:
# Split the data into training and testing sets
X = data[['T', 'RH', 'LW', 'WS', 'GR']]
y = data['Class']
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

In [31]:
# Define the base models
# dt = DecisionTreeClassifier()
# knn = KNeighborsClassifier()
# lr = LogisticRegression()
abc = AdaBoostClassifier()
gbc = GradientBoostingClassifier()
rfc = RandomForestClassifier()
etc = ExtraTreesClassifier()

In [32]:
voting_ensemble = VotingClassifier(
    estimators=[('abc', abc),
     ('gbc', gbc), ('rfc', rfc), ('etc', etc)], voting='hard')

In [33]:
voting_ensemble.fit(X_train, y_train)

VotingClassifier(estimators=[('abc',
                              AdaBoostClassifier(algorithm='SAMME.R',
                                                 base_estimator=None,
                                                 learning_rate=1.0,
                                                 n_estimators=50,
                                                 random_state=None)),
                             ('gbc',
                              GradientBoostingClassifier(ccp_alpha=0.0,
                                                         criterion='friedman_mse',
                                                         init=None,
                                                         learning_rate=0.1,
                                                         loss='deviance',
                                                         max_depth=3,
                                                         max_features=None,
                                                         max_le

In [34]:
y_pred = voting_ensemble.predict(X_test)

In [35]:
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: {:.2f}%".format(accuracy*100))

Accuracy: 98.00%


In [36]:
models = [abc, gbc, rfc, etc]
for model in models:
    model.fit(X_train, y_train)
    predicted = model.predict(X_test)
    accuracy = accuracy_score(predicted, y_test)
    model_name = model.__class__.__name__
    print(f'{model_name}: {accuracy:.4f}')


AdaBoostClassifier: 0.9800
GradientBoostingClassifier: 0.9700
RandomForestClassifier: 0.9800
ExtraTreesClassifier: 0.9750
