In [12]:
from sklearn.datasets import load_iris
from sklearn.model_selection import KFold, cross_val_score, LeaveOneOut
from sklearn.ensemble import RandomForestClassifier

In [3]:
data = load_iris()
X = data.data
y = data.target

In [5]:
kfolds = KFold(n_splits = 5, shuffle = True, random_state = 42) 
# shuffle = True to randomize data before splitting

In [None]:
rfModel = RandomForestClassifier(n_estimators = 100, random_state = 42)
rfModel.fit(X, y)
kfScores = cross_val_score(rfModel, X, y, cv = kfolds) 

# cross validation scores tell us how well the model performs on different folds
# higher the score, better the model
# 'cv' parameter takes the cross-validation strategy to be used

In [10]:
print("Scores for each fold: ", kfScores)

Scores for each fold:  [1.         0.96666667 0.93333333 0.93333333 0.96666667]


In [11]:
print("Average cross-validation score: ", kfScores.mean())

Average cross-validation score:  0.9600000000000002


In [None]:
"""
Why dont we mention any parameter inside LeaveOneOut()?
Because Leave-One-Out cross-validation (LOO-CV) inherently defines its own splitting strategy by leaving out one sample at a time for testing while using the rest for training. 
Therefore, it does not require any additional parameters to specify the number of splits or shuffling, unlike K-Folds cross-validation.
"""
loo = LeaveOneOut() 
looScores = cross_val_score(rfModel, X, y, cv = loo)
print("Leave-One-Out cross-validation score: ", looScores.mean())

Leave-One-Out cross-validation score:  0.9533333333333334
