In [6]:
# Import libraries
import numpy as np

In [7]:
# Load the digits data

# The deafult with 10 classes (digits 0-9)
from sklearn import datasets
digits = datasets.load_digits(n_class=10)

# Create the feature matrix
features = digits.data
print('The shape of the feature matrix: ', features.shape)

# Create the target array
target = digits.target
print('The shape of the target array: ', target.shape)
print('The unique classes in the target: ', np.unique(target))

The shape of the feature matrix:  (1797, 64)
The shape of the target array:  (1797,)
The unique classes in the target:  [0 1 2 3 4 5 6 7 8 9]


In [8]:
# Instantiate the standardizier
from sklearn.preprocessing import StandardScaler
standardizer = StandardScaler()

# Instantiate the classifier
from sklearn.linear_model import LogisticRegression
logreg = LogisticRegression(max_iter=150)

# Create the pipeline
from sklearn.pipeline import make_pipeline
pipeline = make_pipeline(standardizer, logreg)

# Instantiate the k-fold cross-validation 
from sklearn.model_selection import KFold, cross_val_score
kfold_cv = KFold(n_splits=5, shuffle=True, random_state=11)

In [9]:
# Fit the model using k-fold cross-validation
cv_scores = cross_val_score(pipeline, features, target,
                           cv=kfold_cv, scoring='accuracy')

In [10]:
# Print the mean score
print('All cv scores: ', cv_scores)

# Print the mean score
print('Mean of all cv scores: ', cv_scores.mean())

All cv scores:  [0.97222222 0.96944444 0.95543175 0.97493036 0.98050139]
Mean of all cv scores:  0.9705060352831941


# Sources
- Jake VanderPlas, [Python Data Science Handbook, Chapter 5.3,](https://jakevdp.github.io/PythonDataScienceHandbook/05.03-hyperparameters-and-model-validation.html) Hyperparameters and Model Validation
- Peter Worcester, [A Comparison of Grid Search and Randomized Search Using Scikit Learn](https://blog.usejournal.com/a-comparison-of-grid-search-and-randomized-search-using-scikit-learn-29823179bc85)
- Ron Zacharski, [A Programmer’s Guide to Data Mining, Chapter 5,](http://guidetodatamining.com/chapter5/) first 10 pages, for a great explanation of cross-validation with examples and pictures
- Sebastian Raschka, [Model Evaluation](https://sebastianraschka.com/blog/2018/model-evaluation-selection-part4.html)
- [Scikit-Learn User Guide, 3.1 Cross-validation](https://scikit-learn.org/stable/modules/cross_validation.html)
- [Scikit-Learn User Guide, 3.2 Tuning the hyper-parameters of an estimator](https://scikit-learn.org/stable/modules/grid_search.html)
- [sklearn.model_selection.cross_val_score](https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.cross_val_score.html)
- [sklearn.model_selection.RandomizedSearchCV](https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.RandomizedSearchCV.html)
- [xgboost, Notes on Parameter Tuning](https://xgboost.readthedocs.io/en/latest/tutorials/param_tuning.html)