# Interactive Random Forest Classifier on MNIST Dataset

This notebook demonstrates the use of a Random Forest Classifier on the MNIST dataset. It allows interactive tuning of hyperparameters to understand their impact on model performance.

In [None]:
# Import necessary libraries
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import StandardScaler
import seaborn as sns
from ipywidgets import interact, IntSlider, Dropdown
import warnings
warnings.filterwarnings('ignore')

## Load and Preprocess the MNIST Dataset

In [None]:
# Load MNIST dataset
mnist = fetch_openml('mnist_784', version=1, as_frame=False)
X, y = mnist.data, mnist.target.astype(np.int8)

# Normalize the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

print(f"Training set size: {X_train.shape[0]} samples")
print(f"Test set size: {X_test.shape[0]} samples")

## Define Function to Train and Evaluate the Model

In [None]:
def train_evaluate_rf(n_estimators, max_depth, max_features):
    """
    Train and evaluate a Random Forest classifier with given hyperparameters.
    """
    # Initialize the model
    rf = RandomForestClassifier(
        n_estimators=n_estimators,
        max_depth=max_depth if max_depth > 0 else None,
        max_features=max_features,
        random_state=42,
        n_jobs=-1
    )
    
    # Train the model
    rf.fit(X_train, y_train)
    
    # Predict on the test set
    y_pred = rf.predict(X_test)
    
    # Calculate accuracy
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy: {accuracy * 100:.2f}%")
    
    # Display confusion matrix
    cm = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(10, 7))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False)
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.title('Confusion Matrix')
    plt.show()
    
    # Display classification report
    print("Classification Report:")
    print(classification_report(y_test, y_pred))

## Interactive Widget for Hyperparameter Tuning

In [None]:
# Define sliders and dropdowns for hyperparameters
n_estimators_slider = IntSlider(
    value=100,
    min=10,
    max=500,
    step=10,
    description='n_estimators'
)

max_depth_slider = IntSlider(
    value=10,
    min=0,
    max=50,
    step=1,
    description='max_depth'
)

max_features_dropdown = Dropdown(
    options=['auto', 'sqrt', 'log2'],
    value='auto',
    description='max_features'
)

# Create interactive widget
interact(
    train_evaluate_rf,
    n_estimators=n_estimators_slider,
    max_depth=max_depth_slider,
    max_features=max_features_dropdown
);