In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import KFold, cross_val_score
from sklearn.ensemble import RandomForestClassifier 

In [2]:
iris = load_iris()
X, y = iris.data, iris.target

In [3]:
model = RandomForestClassifier()

In [4]:
k = 10
kf = KFold(n_splits=k, shuffle=True, random_state=42)

scores = cross_val_score(model, X, y, cv=kf)

print("Cross validation scores:", scores)
print("Mean cross validation score:", scores.mean())

Cross validation scores: [1.         1.         1.         0.93333333 1.         0.86666667
 0.86666667 1.         0.93333333 0.93333333]
Mean cross validation score: 0.9533333333333334


In [None]:
from sklearn.model_selection import StratifiedKFold

skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

scores = cross_val_score(model, X, y, cv=skf, scoring='accuracy')
print(scores)
print(scores.mean())

[1.         1.         1.         0.93333333 0.86666667 0.93333333
 1.         1.         0.93333333 0.86666667]
0.9533333333333334


In [6]:
from sklearn.model_selection import LeaveOneOut

loo = LeaveOneOut()
scores = cross_val_score(model, X, y, cv=loo)

print(scores)
print(scores.mean())

[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1.
 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1.]
0.9533333333333334


In [7]:
from sklearn.model_selection import LeavePOut

lpo = LeavePOut(p = 2)
scores = cross_val_score(model, X, y, cv=lpo, n_jobs=-1)
print(scores)
print(scores.mean())

[1. 1. 1. ... 1. 1. 1.]
0.9539149888143177


In [8]:
from sklearn.metrics import accuracy_score

# Hyperparameter tuning using manual method


parameters = [
    {"n_estimators": 100, "max_depth": 3},
    {"n_estimators": 100, "max_depth": 5},
    {"n_estimators": 100, "max_depth": 7},
]

for param in parameters:
    model.set_params(**param)
    model.fit(X, y)

    y_pred = model.predict(X)

    accuracy = accuracy_score(y, y_pred)
    print(f"Accuracy: {accuracy}, parameters: {param}")

Accuracy: 0.98, parameters: {'n_estimators': 100, 'max_depth': 3}
Accuracy: 1.0, parameters: {'n_estimators': 100, 'max_depth': 5}
Accuracy: 1.0, parameters: {'n_estimators': 100, 'max_depth': 7}


In [9]:
from sklearn.model_selection import GridSearchCV
# Hyperparameter tuning using Grid Search

param_grid = {
    "n_estimators": [50, 100, 150],
    "max_depth": [2, 3, 4],
    'criterion': ['gini', 'entropy'],
    'max_depth': range(5, 15, 5),
    'n_estimators': range(10, 100, 10)
}

grid_search = GridSearchCV(model, param_grid=param_grid, cv=kf, n_jobs=-1)
grid_search.fit(X, y)

print("Best parameters: ", grid_search.best_params_)
print("Best score: ", grid_search.best_score_)

Best parameters:  {'criterion': 'entropy', 'max_depth': 5, 'n_estimators': 10}
Best score:  0.9666666666666668


  _data = np.array(data, dtype=dtype, copy=copy,


In [10]:
# Hyperparameter tuning with random Randomized search technique

from sklearn.model_selection import RandomizedSearchCV

parameter_distribution = {
    'criterion': ['gini', 'entropy'],
    'max_depth': range(5, 50, 5),
    'n_estimators': range(10, 500, 10)
}

randomized_search = RandomizedSearchCV(model, param_distributions=parameter_distribution, n_iter=10, cv=5, verbose=2, random_state=0)
randomized_search.fit(X, y)

print(randomized_search.best_params_)
print(randomized_search.best_score_)

Fitting 5 folds for each of 10 candidates, totalling 50 fits
[CV] END ......criterion=gini, max_depth=20, n_estimators=40; total time=   0.0s
[CV] END ......criterion=gini, max_depth=20, n_estimators=40; total time=   0.0s
[CV] END ......criterion=gini, max_depth=20, n_estimators=40; total time=   0.0s
[CV] END ......criterion=gini, max_depth=20, n_estimators=40; total time=   0.0s
[CV] END ......criterion=gini, max_depth=20, n_estimators=40; total time=   0.0s
[CV] END .....criterion=gini, max_depth=45, n_estimators=150; total time=   0.2s
[CV] END .....criterion=gini, max_depth=45, n_estimators=150; total time=   0.3s
[CV] END .....criterion=gini, max_depth=45, n_estimators=150; total time=   0.2s
[CV] END .....criterion=gini, max_depth=45, n_estimators=150; total time=   0.2s
[CV] END .....criterion=gini, max_depth=45, n_estimators=150; total time=   0.1s
[CV] END ..criterion=entropy, max_depth=10, n_estimators=240; total time=   0.2s
[CV] END ..criterion=entropy, max_depth=10, n_es

### Cross validation