In [5]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split




In [6]:
# Load MNIST dataset
mnist = fetch_openml('mnist_784', version=1)
X, y = mnist["data"], mnist["target"]

# Split the data into training, validation, and test sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.2, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

In [7]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Initialize classifiers
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
mlp_classifier = MLPClassifier(hidden_layer_sizes=(100,), max_iter=10, random_state=42)
svm_classifier = SVC(probability=True, random_state=42)

# Train classifiers
rf_classifier.fit(X_train, y_train)
mlp_classifier.fit(X_train, y_train)
svm_classifier.fit(X_train, y_train)

# Evaluate on validation set
rf_val_preds = rf_classifier.predict(X_val)
mlp_val_preds = mlp_classifier.predict(X_val)
svm_val_preds = svm_classifier.predict(X_val)

# Print accuracy for each classifier
print("Random Forest Accuracy:", accuracy_score(y_val, rf_val_preds))
print("MLP Accuracy:", accuracy_score(y_val, mlp_val_preds))
print("SVM Accuracy:", accuracy_score(y_val, svm_val_preds))




Random Forest Accuracy: 0.9648571428571429
MLP Accuracy: 0.9401428571428572
SVM Accuracy: 0.975


In [8]:
from sklearn.ensemble import VotingClassifier

# Create a voting ensemble (soft or hard voting)
voting_classifier = VotingClassifier(estimators=[
    ('rf', rf_classifier),
    ('mlp', mlp_classifier),
    ('svm', svm_classifier)],
    voting='soft')  # Use 'hard' for hard voting

# Train the voting ensemble
voting_classifier.fit(X_train, y_train)

# Evaluate on the test set
voting_test_preds = voting_classifier.predict(X_test)

# Print accuracy for the ensemble
print("Voting Ensemble Accuracy:", accuracy_score(y_test, voting_test_preds))




Voting Ensemble Accuracy: 0.9738571428571429


In [10]:
import numpy as np
# Get predictions on validation set
rf_val_preds = rf_classifier.predict(X_val)
mlp_val_preds = mlp_classifier.predict(X_val)
svm_val_preds = svm_classifier.predict(X_val)

# Create a new training set for the blender
blender_X_train = np.column_stack((rf_val_preds, mlp_val_preds, svm_val_preds))
blender_y_train = y_val


In [12]:
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression

# Create a blender classifier (you can use any classifier, e.g., Logistic Regression)
blender_classifier = LogisticRegression()

# Train the blender
blender_classifier.fit(blender_X_train, blender_y_train)

# Get predictions from individual classifiers on the test set
rf_test_preds = rf_classifier.predict(X_test)
mlp_test_preds = mlp_classifier.predict(X_test)
svm_test_preds = svm_classifier.predict(X_test)

# Create input for blender from test set predictions
blender_X_test = np.column_stack((rf_test_preds, mlp_test_preds, svm_test_preds))

# Get predictions from the blender on the test set
blender_test_preds = blender_classifier.predict(blender_X_test)

# Print accuracy for the stacking ensemble
print("Stacking Ensemble Accuracy:", accuracy_score(y_test, blender_test_preds))


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Stacking Ensemble Accuracy: 0.9601428571428572
