In [91]:
import pandas as pd
import numpy as np

from sklearn.ensemble import VotingClassifier
from sklearn.metrics import accuracy_score

import joblib

In [92]:
import warnings
warnings.filterwarnings("ignore", message="X has feature names")

In [93]:
X_train = pd.read_csv("X_train_scaled.csv")
X_test = pd.read_csv("X_test_scaled.csv")

y_train = pd.read_csv("y_train.csv")
y_test = pd.read_csv("y_test.csv")

y_train = y_train.squeeze().to_numpy()
y_test = y_test.squeeze().to_numpy()

print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(5625, 40)
(1407, 40)
(5625,)
(1407,)


In [94]:
log_reg = joblib.load("logistic_regression_model.pkl")
dec_tree = joblib.load("decision_tree_model.pkl")
rand_for = joblib.load("random_forest_model.pkl")
knn = joblib.load("knearest_neigbors_model.pkl")

In [95]:
voting_classifier = VotingClassifier(
    estimators = [('lr', log_reg),('dt', dec_tree),('rf', rand_for),('knn', knn)], voting='soft'
)

voting_classifier.fit(X_train, y_train)

y_pred = voting_classifier.predict(X_test)

acc = accuracy_score(y_test, y_pred)
print(f"Ensemble Accuracy: {acc}")

Ensemble Accuracy: 0.7910447761194029


In [96]:
models = [log_reg, dec_tree, rand_for, knn]
model_names = ["Logistic Regression", "Decision Tree", "Random Forest", "KNN"]

for model, name in zip(models, model_names):
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    print(f"{name} Accuracy: {acc:.5f}")

Logistic Regression Accuracy: 0.79033
Decision Tree Accuracy: 0.77328
Random Forest Accuracy: 0.79318
KNN Accuracy: 0.75551


In [104]:
joblib.dump(voting_classifier, "voting_classifier.pkl")

['voting_classifier.pkl']