Using K-Neighbors-Classifier

In [97]:
from sklearn.model_selection import cross_val_score
from sklearn.datasets import load_iris
from sklearn.neighbors import KNeighborsClassifier

In [98]:
iris= load_iris()

In [99]:
X, Y = iris.data, iris.target

In [100]:
knn= KNeighborsClassifier(n_neighbors=3)

In [101]:
cv_scores= cross_val_score(knn, X, Y, cv=5)

In [102]:
print(f"\nCross Validation Scores: {cv_scores}")
print(f"\nMean CV Section: {cv_scores.mean()}\n")


Cross Validation Scores: [0.96666667 0.96666667 0.93333333 0.96666667 1.        ]

Mean CV Section: 0.9666666666666668



Using K-Fold and Desicion Tree Classifier

In [103]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import KFold

In [104]:
clf= DecisionTreeClassifier(random_state=42)

In [105]:
kfolds= KFold(n_splits=5)
kfold_cv_scores= cross_val_score(clf, X, Y, cv= kfolds)

In [106]:
print(f"Cross Validation Scores by using K-Fold: {kfold_cv_scores}")
print(f"\nAverage CV Scores: {kfold_cv_scores.mean()}")
print(f"\nNumber of CV Scores used in Average: {len(kfold_cv_scores)}\n")

Cross Validation Scores by using K-Fold: [1.         1.         0.83333333 0.93333333 0.8       ]

Average CV Scores: 0.9133333333333333

Number of CV Scores used in Average: 5



Using Stratified K-Fold

In cases where classes are imbalanced we need a way to account for the imbalance in both the train and validation sets. To do so we can stratify the target classes, meaning that both sets will have an equal proportion of all classes.

In [107]:
from sklearn.model_selection import StratifiedKFold

In [108]:
sk_folds= StratifiedKFold(n_splits=5)

In [109]:
sk_folds_cv_scores= cross_val_score(clf, X, Y, cv=sk_folds)

In [110]:
print(f"Cross Validation Scores by using SK-Fold: {sk_folds_cv_scores}")
print(f"\nAverage CV Scores: {sk_folds_cv_scores.mean()}")
print(f"\nNumber of CV Scores used in Average: {len(sk_folds_cv_scores)}\n")

Cross Validation Scores by using SK-Fold: [0.96666667 0.96666667 0.9        0.93333333 1.        ]

Average CV Scores: 0.9533333333333334

Number of CV Scores used in Average: 5



Using Leave-One-Out (LOO)

Instead of selecting the number of splits in the training data set like k-fold LeaveOneOut, utilize 1 observation to validate and n-1 observations to train. This method is an exaustive technique.

In [111]:
from sklearn.model_selection import LeaveOneOut

In [112]:
loo= LeaveOneOut()

In [113]:
loo_cv_score= cross_val_score(clf, X, Y, cv=loo)

In [114]:
print(f"Cross Validation Scores by using Leave-One-Out: {loo_cv_score}")
print(f"\nAverage CV Scores: {loo_cv_score.mean()}")
print(f"\nNumber of CV Scores used in Average: {len(loo_cv_score)}\n")

Cross Validation Scores by using Leave-One-Out: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1.
 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1.]

Average CV Scores: 0.94

Number of CV Scores used in Average: 150

