In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import get_scorer_names
from sklearn.metrics import accuracy_score

# Import Random Forest and XGBoost Classifiers

In [None]:
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb

# Import Pytorch CNN

In [None]:
from torchvision import transforms
from torchvision import datasets
import torch
from torch.utils.data import DataLoader
from torch import nn
import torch.optim as optim
from tqdm import tqdm

# Select Datasets

In [None]:
from sklearn.datasets import load_digits
digits_X, digits_y = load_digits(return_X_y=True)

#downloaded from Kaggle
fashion_train = pd.read_csv('fashion-mnist_train.csv', index_col = 0)
fashion_test = pd.read_csv('fashion-mnist_test.csv', index_col = 0)

# Train a RandomForestClassifier

### RandomForest on MNIST dataset

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(digits_X, digits_y, 
                                                    test_size=0.3, random_state=42)

rf = RandomForestClassifier()
param_grid = {'n_estimators': [50,100,150,200],'max_depth' : [1,4,5,10], "max_features": [2,5,10]}
rf_gs = GridSearchCV(rf, param_grid, scoring = 'accuracy', n_jobs = -1)
rf_gs.fit(X_train, y_train)
params = rf_gs.best_params_

params

In [None]:
optimal_rf = RandomForestClassifier(n_estimators=150, max_depth=10, max_features=10)
optimal_rf.fit(X_train, y_train)

In [None]:
y_pred = optimal_rf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy of Random Forest Classifier for Digits Datset: {accuracy}')

### RandomForest on Fashion Dataset

In [None]:
rf = RandomForestClassifier()
train_X, train_y = fashion_train.values, fashion_train.index.values
test_X, test_y = fashion_test.values, fashion_test.index.values

In [None]:
param_grid = {'n_estimators': [50,100,150,200],'max_depth' : [1,4,5,10], "max_features": [2,5,10]}
rf_gs = GridSearchCV(rf, param_grid, scoring = 'accuracy', n_jobs = -1)
rf_gs.fit(train_X, train_y)
params = rf_gs.best_params_

params

In [None]:
optimal_rf = RandomForestClassifier(n_estimators = 200, max_depth = 10, max_features = 10)
optimal_rf.fit(train_X, train_y)

In [None]:
pred_y = optimal_rf.predict(test_X)
accuracy = accuracy_score(test_y, pred_y)
print(f'Accuracy of Random Forest Classifier for Fashion Datset: {accuracy}')