# Model Tuning

## Manual Tuning

In [1]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, cross_val_score
import itertools
import numpy as np

# Load the dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Define the parameter grid
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 5, 10],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['sqrt', 'log2']
}

# Generate all possible combinations of parameters
param_combinations = list(itertools.product(
    param_grid['n_estimators'],
    param_grid['max_depth'],
    param_grid['min_samples_split'],
    param_grid['min_samples_leaf'],
    param_grid['max_features']
))

best_score = -np.inf
best_params = None

# Manual grid search
for (n_estimators, max_depth, min_samples_split, min_samples_leaf, max_features) in param_combinations:
    rf = RandomForestClassifier(
        n_estimators=n_estimators,
        max_depth=max_depth,
        min_samples_split=min_samples_split,
        min_samples_leaf=min_samples_leaf,
        max_features=max_features,
        random_state=42
    )
    
    # Cross-validation score (5-fold)
    scores = cross_val_score(rf, X_train, y_train, cv=5)
    mean_score = np.mean(scores)
    
    if mean_score > best_score:
        best_score = mean_score
        best_params = {
            'n_estimators': n_estimators,
            'max_depth': max_depth,
            'min_samples_split': min_samples_split,
            'min_samples_leaf': min_samples_leaf,
            'max_features': max_features
        }

print("Best Hyperparameters (manual search):", best_params)
print("Best CV Score:", best_score)

# Train the best model on the full training set
best_rf = RandomForestClassifier(**best_params, random_state=42)
best_rf.fit(X_train, y_train)
print("Test Accuracy:", best_rf.score(X_test, y_test))


Best Hyperparameters (manual search): {'n_estimators': 200, 'max_depth': None, 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_features': 'sqrt'}
Best CV Score: 0.9583333333333334
Test Accuracy: 1.0


## GridSearchCV - RandomForestClassifier

In [2]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

# Load the dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the parameter grid
param_grid = {'n_estimators': [100, 200, 300],
              'max_depth': [None, 5, 10],
              'min_samples_split': [2, 5, 10],
              'min_samples_leaf': [1, 2, 4],
              'max_features': ['sqrt', 'log2']}

# Create a Random Forest classifier
rf = RandomForestClassifier()

# Perform grid search with cross-validation
grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=5)
grid_search.fit(X_train, y_train)

# Print the best hyperparameters and score
print("Best Hyperparameters: ", grid_search.best_params_)
print("Best Score: ", grid_search.best_score_)


Best Hyperparameters:  {'max_depth': 5, 'max_features': 'sqrt', 'min_samples_leaf': 2, 'min_samples_split': 10, 'n_estimators': 100}
Best Score:  0.9666666666666666


In [3]:
print(grid_search.cv_results_)

{'mean_fit_time': array([0.06892042, 0.13724327, 0.20701966, 0.06916361, 0.13627687,
       0.20750451, 0.07025647, 0.13680148, 0.20490017, 0.06737251,
       0.14019628, 0.20547562, 0.06829505, 0.13734155, 0.20676241,
       0.0682868 , 0.13485279, 0.20367103, 0.06938634, 0.13749657,
       0.20311308, 0.0664268 , 0.13397679, 0.21167607, 0.06723623,
       0.13590746, 0.20627971, 0.07137971, 0.14024029, 0.20979395,
       0.06787753, 0.13483548, 0.20403614, 0.06949596, 0.13742895,
       0.20758348, 0.07007699, 0.13639731, 0.20699358, 0.06796627,
       0.14065194, 0.20994534, 0.07129331, 0.13535886, 0.20576382,
       0.06864195, 0.13835373, 0.20485959, 0.07062364, 0.13952651,
       0.20502887, 0.06974936, 0.14038801, 0.20845027, 0.07131314,
       0.13738108, 0.20568805, 0.07216611, 0.1341198 , 0.20731025,
       0.06628714, 0.1335609 , 0.20116405, 0.06695166, 0.13283291,
       0.20332909, 0.06637149, 0.13402319, 0.2017365 , 0.06920238,
       0.13640299, 0.20387459, 0.07073288, 0