# Cross Validation Example
https://www.w3schools.com/python/python_ml_cross_validation.asp

https://scikit-learn.org/stable/modules/cross_validation.html

In [1]:
# Dataset: iris
from sklearn import datasets
X, y = datasets.load_iris(return_X_y=True)

In [2]:
# In this example use decision tree classifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import KFold, cross_val_score

In [3]:
clf = DecisionTreeClassifier(random_state=42)

In [4]:
k_fold = KFold(n_splits=5)
scores = cross_val_score(clf, X, y, cv = k_fold)

In [5]:
print("Cross Validation scores: ", scores)
print("Average CV Score: ", scores.mean)
print("Number of CV Scores used in Averages: ", len(scores))

Cross Validation scores:  [1.         1.         0.83333333 0.93333333 0.8       ]
Average CV Score:  <built-in method mean of numpy.ndarray object at 0x7fccd9de9530>
Number of CV Scores used in Averages:  5


In [8]:
# For imbalanced data use stratified K-fold:
from sklearn.model_selection import StratifiedKFold
sk_fold = StratifiedKFold(n_splits = 5)
sk_scores = cross_val_score(clf, X, y, cv = sk_fold)
print("Cross Validation scores: ", sk_scores)
print("Average CV Score: ", sk_scores.mean)
print("Number of CV Scores used in Averages: ", len(sk_scores))

Cross Validation scores:  [0.96666667 0.96666667 0.9        0.93333333 1.        ]
Average CV Score:  <built-in method mean of numpy.ndarray object at 0x7fccd8cf7c90>
Number of CV Scores used in Averages:  5


### Note.  the average CV increases from the basic k-fold 

### Note. There are several other CV methods: 
Leave-One-Out (LOO)

Leave-P-Out (LPO)

Shuffle Split

### Prediction Using Cross Validation:

In [14]:
from sklearn import linear_model
from sklearn.model_selection import cross_val_predict
lasso = linear_model.Lasso()
y_pred = cross_val_predict(lasso, X, y, cv =3)
print(y_pred)

[1.5        1.5        1.5        1.5        1.5        1.5
 1.5        1.5        1.5        1.5        1.5        1.5
 1.5        1.5        1.5        1.5        1.5        1.5
 1.5        1.5        1.5        1.5        1.5        1.5
 1.5        1.5        1.5        1.5        1.5        1.5
 1.5        1.5        1.5        1.5        1.5        1.5
 1.5        1.5        1.5        1.5        1.5        1.5
 1.5        1.5        1.5        1.5        1.5        1.5
 1.5        1.5        1.28685466 1.23876503 1.3349443  1.11854095
 1.26280985 1.23876503 1.28685466 0.95022723 1.26280985 1.09449613
 0.99831686 1.16663058 1.11854095 1.28685466 1.02236168 1.21472021
 1.23876503 1.14258576 1.23876503 1.09449613 1.31089948 1.11854095
 1.3349443  1.28685466 1.1906754  1.21472021 1.31089948 1.35898911
 1.23876503 0.99831686 1.07045131 1.0464065  1.09449613 1.38303393
 1.23876503 1.23876503 1.28685466 1.21472021 1.14258576 1.11854095
 1.21472021 1.26280985 1.11854095 0.95022723 1.1666