In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import get_scorer_names
from sklearn.metrics import accuracy_score

# Import Random Forest and XGBoost Classifiers

In [2]:
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb

# Import Pytorch CNN

In [3]:
from torchvision import transforms
from torchvision import datasets
import torch
from torch.utils.data import DataLoader
from torch import nn
import torch.optim as optim
from tqdm import tqdm

# Select Datasets

In [4]:
from sklearn.datasets import load_digits
digits_X, digits_y = load_digits(return_X_y=True)

#downloaded from Kaggle
fashion_train = pd.read_csv('fashion-mnist_train.csv', index_col = 0)
fashion_test = pd.read_csv('fashion-mnist_test.csv', index_col = 0)

# Train a RandomForestClassifier

### RandomForest on MNIST dataset

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(digits_X, digits_y, 
                                                    test_size=0.3, random_state=42)

rf = RandomForestClassifier()
param_grid = {'n_estimators': [50,100,150,200],'max_depth' : [1,4,5,10], "max_features": [2,5,10]}
rf_gs = GridSearchCV(rf, param_grid, scoring = 'accuracy', n_jobs = -1)
rf_gs.fit(X_train, y_train)
params = rf_gs.best_params_

params

{'max_depth': 10, 'max_features': 5, 'n_estimators': 200}

In [6]:
optimal_rf = RandomForestClassifier(n_estimators=150, max_depth=10, max_features=10)
optimal_rf.fit(X_train, y_train)

In [7]:
y_pred = optimal_rf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy of Random Forest Classifier for Digits Datset: {accuracy}')

Accuracy of Random Forest Classifier for Digits Datset: 0.9740740740740741


### RandomForest on Fashion Dataset

In [8]:
rf = RandomForestClassifier()
train_X, train_y = fashion_train.values, fashion_train.index.values
test_X, test_y = fashion_test.values, fashion_test.index.values

In [9]:
param_grid = {'n_estimators': [50,100,150,200],'max_depth' : [1,4,5,10], "max_features": [2,5,10]}
rf_gs = GridSearchCV(rf, param_grid, scoring = 'accuracy', n_jobs = -1)
rf_gs.fit(train_X, train_y)
params = rf_gs.best_params_

params

{'max_depth': 10, 'max_features': 10, 'n_estimators': 200}

In [10]:
optimal_rf = RandomForestClassifier(n_estimators = 200, max_depth = 10, max_features = 10)
optimal_rf.fit(train_X, train_y)

In [11]:
pred_y = optimal_rf.predict(test_X)
accuracy = accuracy_score(test_y, pred_y)
print(f'Accuracy of Random Forest Classifier for Fashion Datset: {accuracy}')

Accuracy of Random Forest Classifier for Fashion Datset: 0.8414


# XGBoost Classifier

### XGBoost on MNIST Dataset

In [None]:
X_train, X_test, y_train, y_test = train_test_split(digits_X, digits_y, 
                                                    test_size = 0.3, random_state = 35)

xgbc = xgb.XGBClassifier(use_label_encoder = False, eval_metric = 'mlogloss')

# Define the parameter grid
param_grid = {
    'n_estimators': [50, 75, 100, 150, 200],
    'max_depth': [3, 4, 5,10,15],
    'learning_rate': [0.001, 0.01, 0.1, 0.2,0.3],
    'subsample': [0.5, 0.7, 0.8, 0.9]
}
xgb_gs = GridSearchCV(xgbc, param_grid, scoring = 'accuracy', n_jobs = -1)
xgb_gs.fit(X_train, y_train)
params = xgb_gs.best_params_

params

In [None]:
optimal_xgb = xgb.XGBClassifier(use_label_encoder = False, n_estimators = , max_depth = ,
                                learning_rate = , subsample = )
optimal_xgb.fit(X_train, y_train)

In [None]:
pred_y = optimal_xgb.predict(X_test)
accuracy = accuracy_score(y_test, pred_y)
print(f'Accuracy of XGBoost Classifier for Digits Dataset')

### XGBoost on Fashion Dataset

In [None]:
train_X, train_y = fashion_train.values, fashion_train.index.values
test_X, test_y = fashion_test.values, fashion_test.index.values

In [None]:
XGBoost = xgb.XGBClassifier(use_label_encoder = False, n_estimators = , max_depth = ,
                                learning_rate = , subsample = )
XGBoost.fit(train_X, train_y)

In [None]:
y_pred = XGBoost.predict(test_X)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy of XGBoost Classifier for Fashion Dataset')