In [1]:
from sklearn.datasets import fetch_openml
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score
from pprint import pprint

In [2]:
X, y = fetch_openml('mnist_784', version=1, return_X_y=True, parser='auto')
X = X / 255.0
y = y.cat.codes

X_train, X_test = X[:60000], X[60000:]
y_train, y_test = y[:60000], y[60000:]

In [3]:
def test_classifier(classifier, parameters=None):
    clf_ = classifier(**parameters) if parameters is not None else classifier()
    clf = make_pipeline(StandardScaler(), clf_)
    pprint(clf_.get_params())
    clf.fit(X=X_train, y=y_train)
    y_pred = clf.predict(X_test)
    accuracy = accuracy_score(y_true=y_test, y_pred=y_pred)
    print("Accuracy of {} classifier for the MNIST dataset is {}%.".format(clf_.__class__.__name__, accuracy*100))
    print()
    return accuracy

In [4]:
dt_acc = test_classifier(DecisionTreeClassifier)

{'ccp_alpha': 0.0,
 'class_weight': None,
 'criterion': 'gini',
 'max_depth': None,
 'max_features': None,
 'max_leaf_nodes': None,
 'min_impurity_decrease': 0.0,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'random_state': None,
 'splitter': 'best'}
Accuracy of DecisionTreeClassifier classifier for the MNIST dataset is 87.97%.



In [5]:
bag_acc = test_classifier(BaggingClassifier)

{'base_estimator': 'deprecated',
 'bootstrap': True,
 'bootstrap_features': False,
 'estimator': None,
 'max_features': 1.0,
 'max_samples': 1.0,
 'n_estimators': 10,
 'n_jobs': None,
 'oob_score': False,
 'random_state': None,
 'verbose': 0,
 'warm_start': False}
Accuracy of BaggingClassifier classifier for the MNIST dataset is 94.22%.



In [6]:
rf_acc = test_classifier(RandomForestClassifier)

{'bootstrap': True,
 'ccp_alpha': 0.0,
 'class_weight': None,
 'criterion': 'gini',
 'max_depth': None,
 'max_features': 'sqrt',
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 100,
 'n_jobs': None,
 'oob_score': False,
 'random_state': None,
 'verbose': 0,
 'warm_start': False}
Accuracy of RandomForestClassifier classifier for the MNIST dataset is 96.94%.



In [7]:
gb_acc = test_classifier(GradientBoostingClassifier)

{'ccp_alpha': 0.0,
 'criterion': 'friedman_mse',
 'init': None,
 'learning_rate': 0.1,
 'loss': 'log_loss',
 'max_depth': 3,
 'max_features': None,
 'max_leaf_nodes': None,
 'min_impurity_decrease': 0.0,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 100,
 'n_iter_no_change': None,
 'random_state': None,
 'subsample': 1.0,
 'tol': 0.0001,
 'validation_fraction': 0.1,
 'verbose': 0,
 'warm_start': False}
Accuracy of GradientBoostingClassifier classifier for the MNIST dataset is 94.59%.

