In [3]:
from sklearn.datasets import load_iris, fetch_openml

iris = load_iris()
X = iris.data
Y = iris.target
X_mnist, Y_mnist = fetch_openml('mnist_784', version=1, return_X_y=True, as_frame=False)
Y_mnist = Y_mnist.astype(int)

In [7]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_mnist_scaled = scaler.fit_transform(X_mnist)

In [9]:
from sklearn.model_selection import train_test_split, GridSearchCV

X_train, X_test, Y_train, Y_test = train_test_split(X_scaled, Y, test_size=0.2, random_state=42)
X_mnist_train, X_mnist_test, Y_mnist_train, Y_mnist_test = train_test_split(X_mnist_scaled, Y_mnist, test_size=0.2, random_state=42)

In [12]:
from sklearn.ensemble import RandomForestClassifier

random_forest_iris = RandomForestClassifier(n_estimators= 100, max_depth=5, random_state= 42)
random_forest_iris.fit(X_train, Y_train)
Y_iris = random_forest_iris.predict(X_test)

In [16]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

acc_iris = accuracy_score(Y_iris, Y_test)
conf_iris = confusion_matrix(Y_iris, Y_test)

print(f"Accuracy for the Random Forest on the Iris dataset is: {acc_iris: .2f}")
print("Confusion Matrix of the Random Forest model on the Iris dataset is: ", conf_iris)
print("Classification report of the Random Forest model on the Iris dataset is: ", classification_report(Y_iris, Y_test))

Accuracy for the Random Forest on the Iris dataset is:  1.00
Confusion Matrix of the Random Forest model on the Iris dataset is:  [[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]
Classification report of the Random Forest model on the Iris dataset is:                precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



In [20]:
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [5, 10, 15],
    'min_samples_split': [2, 5]
}

rf = RandomForestClassifier(random_state=42)
rf_mnist = GridSearchCV(rf, param_grid, cv=3, scoring='accuracy', n_jobs= -1)
rf_mnist.fit(X_mnist_train, Y_mnist_train)
best_rf_mnist = rf_mnist.best_estimator_

print("The best params for the Rondom Forest model is: ", rf_mnist.best_params_)

The best params for the Rondom Forest model is:  {'max_depth': 15, 'min_samples_split': 2, 'n_estimators': 200}


In [22]:
Y_mnist_pred = best_rf_mnist.predict(X_mnist_test)

acc_grid = accuracy_score(Y_mnist_pred, Y_mnist_test)
conf_grid = confusion_matrix(Y_mnist_pred, Y_mnist_test)

print(f"Accuracy for the optimal Random Forest on the Iris dataset is: {acc_grid: .2f}")
print("Confusion Matrix of the optimal Random Forest model on the Iris dataset is: ", conf_grid)
print("Classification report of the optimal Random Forest model on the Iris dataset is: ", classification_report(Y_mnist_pred, Y_mnist_test))

Accuracy for the optimal Random Forest on the Iris dataset is:  0.96
Confusion Matrix of the optimal Random Forest model on the Iris dataset is:  [[1319    0    3    1    3    3    5    2    0    6]
 [   0 1576    3    0    0    4    2    8   10    7]
 [   5    7 1337   24    3    3    0   19   11    2]
 [   0    7    2 1355    0   17    0    0   13   19]
 [   2    2    5    1 1244    3    5    6    6   18]
 [   2    0    1   10    0 1219    7    0    7    3]
 [   5    0    9    1    4    8 1373    0    7    1]
 [   1    6    9   16    4    3    0 1444    5   15]
 [   8    2    9   15    2   11    4    2 1287    7]
 [   1    0    2   10   35    2    0   22   11 1342]]
Classification report of the optimal Random Forest model on the Iris dataset is:                precision    recall  f1-score   support

           0       0.98      0.98      0.98      1342
           1       0.98      0.98      0.98      1610
           2       0.97      0.95      0.96      1411
           3       0.95 