<a href="https://colab.research.google.com/github/mithun-martin/MACHINE-LEARNING/blob/main/HyperParametreTuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score

data = load_iris()
X, y = data.data, data.target


X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


rf = RandomForestClassifier(random_state=42)

#Parameter Grid (Tuning 3 parameters)
param_grid = {
    'n_estimators': [50, 100, 150], #The number of decision trees in the random forest.
    'max_depth': [None, 5, 10],
    'min_samples_split': [2, 4, 6]


# Minimum number of samples required to split a node in the tree.
# Lower value (2) → tree can split even with very few samples → more complex model, higher chance of overfitting.
# Higher value (6) → requires more samples to split → simpler model, less overfitting.
}

# Step 6: Grid Search
grid_search = GridSearchCV(rf, param_grid, cv=3, scoring='accuracy')

# This is 3-fold cross-validation:
# The training data is split into 3 equal parts.
# The model is trained on 2 parts and tested on the remaining part.
# This is repeated 3 times, each time with a different part as the test set.
#The metric we want to maximize when choosing the best parameters.



grid_search.fit(X_train, y_train)

print("Best Parameters:", grid_search.best_params_)


best_rf = grid_search.best_estimator_
y_pred = best_rf.predict(X_test)


accuracy = accuracy_score(y_test, y_pred)


print("Accuracy:", accuracy)



Best Parameters: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 50}
Accuracy: 1.0
Precision: 1.0
Recall: 1.0


In [10]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score
import pandas as pd

# Load data
data = load_iris()
X, y = data.data, data.target

# Train/test split (use stratify to keep class balance)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# We'll try 3 different hyperparameter sets (your mam asked for 3 runs)
param_sets = [
    {'n_estimators': 50,  'max_depth': None, 'min_samples_split': 2},
    {'n_estimators': 100, 'max_depth': 5,    'min_samples_split': 4},
    {'n_estimators': 150, 'max_depth': 15,   'min_samples_split': 12},
]

results = []

for i, params in enumerate(param_sets, start=1):
    print(f"\n--- Run {i}: params = {params} ---") #So i is the run number and params is the dictionary like {'n_estimators':50, ...}.

    rf = RandomForestClassifier(
        n_estimators=params['n_estimators'],
        max_depth=params['max_depth'],
        min_samples_split=params['min_samples_split'],
        random_state=42
    )
    rf.fit(X_train, y_train)
    y_pred = rf.predict(X_test)

    acc = accuracy_score(y_test, y_pred)


    print(f"Accuracy:  {acc:.4f}")

    results.append({
        'run': i,
        'n_estimators': params['n_estimators'],
        'max_depth': str(params['max_depth']),
        'min_samples_split': params['min_samples_split'],
        'accuracy': acc,

    })




--- Run 1: params = {'n_estimators': 50, 'max_depth': None, 'min_samples_split': 2} ---
Accuracy:  0.9000

--- Run 2: params = {'n_estimators': 100, 'max_depth': 5, 'min_samples_split': 4} ---
Accuracy:  0.9667

--- Run 3: params = {'n_estimators': 150, 'max_depth': 15, 'min_samples_split': 12} ---
Accuracy:  0.9667
