In [1]:
import numpy as np
import pandas as pd
import warnings
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from hpelm import ELM
from mlxtend.classifier import StackingClassifier
from mlxtend.feature_selection import ColumnSelector
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, auc, accuracy_score
warnings.filterwarnings("ignore")

In [2]:
data = pd.read_csv("Group_14_data_cleaned.csv", index_col=0)
# Split the data into training and testing sets
X = data.drop(['Class', 'Bidder_ID'], axis=1)
y = data['Class']

In [14]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the base models
rf = RandomForestClassifier(random_state=42)
svm = make_pipeline(StandardScaler(), SVC(kernel='rbf', C=10, gamma=1, random_state=42))
lr = LogisticRegression(random_state=42)
knn = make_pipeline(StandardScaler(), KNeighborsClassifier(n_neighbors=7))
mlp = MLPClassifier(hidden_layer_sizes=(64,64), random_state=42)

# Define the meta-model
# meta_model = RandomForestClassifier(n_estimators=100, random_state=42)

# Define the meta-model
meta_model = LogisticRegression(C=0.1, penalty='l1', solver='liblinear', random_state=42)

# Define the stacking model
stacking = StackingClassifier(classifiers=[rf, svm, lr, knn, mlp],
                              meta_classifier=meta_model)

# Fit the stacking model
stacking.fit(X_train, y_train)

# Evaluate the stacking model
score = stacking.score(X_test, y_test)
print("Accuracy:", score)


Accuracy: 0.9960474308300395


In [15]:
# Use the trained classifier to predict the class labels for the test data
y_pred = stacking.predict(X_test)

# Evaluate the classifier's performance on the test data
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))


[[1131    2]
 [   3  129]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      1133
           1       0.98      0.98      0.98       132

    accuracy                           1.00      1265
   macro avg       0.99      0.99      0.99      1265
weighted avg       1.00      1.00      1.00      1265

