In [None]:
import csv
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.metrics import f1_score
from sklearn.utils import shuffle
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, BaggingClassifier
from sklearn.datasets import fetch_openml
# Load data from https://www.openml.org/d/554
X, y = fetch_openml('mnist_784', version=1, return_X_y=True)

#normalizing the data
X = X / 255.

# (60K: Train) and (10K: Test)
X_train, X_test = X[:60000], X[60000:]
y_train, y_test = y[:60000], y[60000:]


In [None]:
def evaluate_model(dt_classifier, x, y, l):
    y_pred=dt_classifier.predict(x)
    print("Test Accuracy for", l, "is", accuracy_score(y, y_pred))
    print("Test Confusion Matrix:")
    print(confusion_matrix(y, y_pred))

tree_model=None

for l in ['tree', 'bagging', 'randomForest', 'gradientBoosting']:

  if l=='tree':
    params = {
        'max_depth': [5, 10, 20],
        'min_samples_leaf': [5, 10, 20],
        'max_features': [2, 30, 80],
        'splitter': ["best", "random"]
    }

  dt = DecisionTreeClassifier(random_state=42)
  grid_search = GridSearchCV(estimator=dt, param_grid=params, cv=4, n_jobs=-1, scoring = "accuracy")

  if l == 'bagging':
    params = {
    'bootstrap': [True, False],
    'n_estimators': [10, 30],
    'base_estimator__max_depth': [2, 3, 4],
    'base_estimator__splitter': ["best", "random"]
    }
    grid_search = GridSearchCV(BaggingClassifier(base_estimator=dt, random_state=42), param_grid=params, cv=5, scoring = "accuracy")

  if l=='randomForest':
    params= {
    'bootstrap': [True],
    'max_features': [3, 50, 300],
    'n_estimators': [100, 200]
    }
    rf = RandomForestClassifier(random_state=42)
    grid_search = GridSearchCV(estimator=rf, param_grid=params, cv=5, scoring = "accuracy")

  if l=='gradientBoosting':
    gbc = GradientBoostingClassifier()
    params = {
      "n_estimators":[20, 30, 40],
      "max_depth":[5,7,9],
      "learning_rate":[0.01,0.1,1,10]
    }
    grid_search = GridSearchCV(estimator=gbc, param_grid=params, cv=5, scoring = "accuracy")

  grid_search.fit(X_train, y_train)
  dt_best = grid_search.best_estimator_
  evaluate_model(dt_best, X_test, y_test, l)
