Skip to content

Commit

Permalink
FIX all the examples to use the new cv classes
Browse files Browse the repository at this point in the history
  • Loading branch information
raghavrv committed Aug 12, 2015
1 parent d341dac commit 0d7d738
Show file tree
Hide file tree
Showing 37 changed files with 96 additions and 79 deletions.
4 changes: 2 additions & 2 deletions examples/applications/face_recognition.py
Expand Up @@ -31,9 +31,9 @@
import logging
import matplotlib.pyplot as plt

from sklearn.cross_validation import train_test_split
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.datasets import fetch_lfw_people
from sklearn.grid_search import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.decomposition import RandomizedPCA
Expand Down
2 changes: 1 addition & 1 deletion examples/calibration/plot_calibration.py
Expand Up @@ -36,7 +36,7 @@
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import brier_score_loss
from sklearn.calibration import CalibratedClassifierCV
from sklearn.cross_validation import train_test_split
from sklearn.model_selection import train_test_split


n_samples = 50000
Expand Down
2 changes: 1 addition & 1 deletion examples/calibration/plot_calibration_curve.py
Expand Up @@ -56,7 +56,7 @@
from sklearn.metrics import (brier_score_loss, precision_score, recall_score,
f1_score)
from sklearn.calibration import CalibratedClassifierCV, calibration_curve
from sklearn.cross_validation import train_test_split
from sklearn.model_selection import train_test_split


# Create dataset of classification task with many redundant and few
Expand Down
2 changes: 1 addition & 1 deletion examples/classification/plot_classifier_comparison.py
Expand Up @@ -31,7 +31,7 @@
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.cross_validation import train_test_split
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_moons, make_circles, make_classification
from sklearn.neighbors import KNeighborsClassifier
Expand Down
Expand Up @@ -30,9 +30,9 @@
from sklearn.cluster import FeatureAgglomeration
from sklearn.linear_model import BayesianRidge
from sklearn.pipeline import Pipeline
from sklearn.grid_search import GridSearchCV
from sklearn.externals.joblib import Memory
from sklearn.cross_validation import KFold
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import KFold

###############################################################################
# Generate data
Expand Down Expand Up @@ -60,7 +60,7 @@

###############################################################################
# Compute the coefs of a Bayesian Ridge with GridSearch
cv = KFold(len(y), 2) # cross-validation generator for model selection
cv = KFold(2) # cross-validation generator for model selection
ridge = BayesianRidge()
cachedir = tempfile.mkdtemp()
mem = Memory(cachedir=cachedir, verbose=1)
Expand Down
2 changes: 1 addition & 1 deletion examples/covariance/plot_covariance_estimation.py
Expand Up @@ -49,7 +49,7 @@

from sklearn.covariance import LedoitWolf, OAS, ShrunkCovariance, \
log_likelihood, empirical_covariance
from sklearn.grid_search import GridSearchCV
from sklearn.model_selection import GridSearchCV


###############################################################################
Expand Down
4 changes: 2 additions & 2 deletions examples/decomposition/plot_pca_vs_fa_model_selection.py
Expand Up @@ -35,8 +35,8 @@

from sklearn.decomposition import PCA, FactorAnalysis
from sklearn.covariance import ShrunkCovariance, LedoitWolf
from sklearn.cross_validation import cross_val_score
from sklearn.grid_search import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV

###############################################################################
# Create the data
Expand Down
8 changes: 4 additions & 4 deletions examples/ensemble/plot_gradient_boosting_oob.py
Expand Up @@ -33,8 +33,8 @@
import matplotlib.pyplot as plt

from sklearn import ensemble
from sklearn.cross_validation import KFold
from sklearn.cross_validation import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split


# Generate data (adapted from G. Ridgeway's gbm example)
Expand Down Expand Up @@ -75,10 +75,10 @@ def heldout_score(clf, X_test, y_test):


def cv_estimate(n_folds=3):
cv = KFold(n=X_train.shape[0], n_folds=n_folds)
cv = KFold(n_folds=n_folds)
cv_clf = ensemble.GradientBoostingClassifier(**params)
val_scores = np.zeros((n_estimators,), dtype=np.float64)
for train, test in cv:
for train, test in cv.split(X_train, y_train):
cv_clf.fit(X_train[train], y_train[train])
val_scores += heldout_score(cv_clf, X_train[test], y_train[test])
val_scores /= n_folds
Expand Down
2 changes: 1 addition & 1 deletion examples/ensemble/plot_partial_dependence.py
Expand Up @@ -51,7 +51,7 @@

from mpl_toolkits.mplot3d import Axes3D

from sklearn.cross_validation import train_test_split
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble.partial_dependence import plot_partial_dependence
from sklearn.ensemble.partial_dependence import partial_dependence
Expand Down
16 changes: 10 additions & 6 deletions examples/exercises/plot_cv_diabetes.py
Expand Up @@ -14,21 +14,25 @@
import numpy as np
import matplotlib.pyplot as plt

from sklearn import cross_validation, datasets, linear_model
from sklearn import datasets
from sklearn.linear_model import LassoCV
from sklearn.linear_model import Lasso
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

diabetes = datasets.load_diabetes()
X = diabetes.data[:150]
y = diabetes.target[:150]

lasso = linear_model.Lasso()
lasso = Lasso()
alphas = np.logspace(-4, -.5, 30)

scores = list()
scores_std = list()

for alpha in alphas:
lasso.alpha = alpha
this_scores = cross_validation.cross_val_score(lasso, X, y, n_jobs=1)
this_scores = cross_val_score(lasso, X, y, n_jobs=1)
scores.append(np.mean(this_scores))
scores_std.append(np.std(this_scores))

Expand All @@ -51,15 +55,15 @@
# performs cross-validation on the training data it receives).
# We use external cross-validation to see how much the automatically obtained
# alphas differ across different cross-validation folds.
lasso_cv = linear_model.LassoCV(alphas=alphas)
k_fold = cross_validation.KFold(len(X), 3)
lasso_cv = LassoCV(alphas=alphas)
k_fold = KFold(3)

print("Answer to the bonus question:",
"how much can you trust the selection of alpha?")
print()
print("Alpha parameters maximising the generalization score on different")
print("subsets of the data:")
for k, (train, test) in enumerate(k_fold):
for k, (train, test) in enumerate(k_fold.split(X, y)):
lasso_cv.fit(X[train], y[train])
print("[fold {0}] alpha: {1:.5f}, score: {2:.5f}".
format(k, lasso_cv.alpha_, lasso_cv.score(X[test], y[test])))
Expand Down
5 changes: 3 additions & 2 deletions examples/exercises/plot_cv_digits.py
Expand Up @@ -12,7 +12,8 @@


import numpy as np
from sklearn import cross_validation, datasets, svm
from sklearn.model_selection import cross_val_score
from sklearn import datasets, svm

digits = datasets.load_digits()
X = digits.data
Expand All @@ -25,7 +26,7 @@
scores_std = list()
for C in C_s:
svc.C = C
this_scores = cross_validation.cross_val_score(svc, X, y, n_jobs=1)
this_scores = cross_val_score(svc, X, y, n_jobs=1)
scores.append(np.mean(this_scores))
scores_std.append(np.std(this_scores))

Expand Down
Expand Up @@ -20,7 +20,8 @@
import matplotlib.pyplot as plt

from sklearn.svm import SVC
from sklearn.cross_validation import StratifiedKFold, permutation_test_score
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import permutation_test_score
from sklearn import datasets


Expand All @@ -39,7 +40,7 @@
X = np.c_[X, E]

svm = SVC(kernel='linear')
cv = StratifiedKFold(y, 2)
cv = StratifiedKFold(2)

score, permutation_scores, pvalue = permutation_test_score(
svm, X, y, scoring="accuracy", cv=cv, n_permutations=100, n_jobs=1)
Expand Down
4 changes: 2 additions & 2 deletions examples/feature_selection/plot_rfe_with_cross_validation.py
Expand Up @@ -10,7 +10,7 @@

import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.cross_validation import StratifiedKFold
from sklearn.model_selection import StratifiedKFold
from sklearn.feature_selection import RFECV
from sklearn.datasets import make_classification

Expand All @@ -23,7 +23,7 @@
svc = SVC(kernel="linear")
# The "accuracy" scoring is proportional to the number of correct
# classifications
rfecv = RFECV(estimator=svc, step=1, cv=StratifiedKFold(y, 2),
rfecv = RFECV(estimator=svc, step=1, cv=StratifiedKFold(2),
scoring='accuracy')
rfecv.fit(X, y)

Expand Down
2 changes: 1 addition & 1 deletion examples/feature_stacker.py
Expand Up @@ -20,7 +20,7 @@
# License: BSD 3 clause

from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.grid_search import GridSearchCV
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.datasets import load_iris
from sklearn.decomposition import PCA
Expand Down
9 changes: 5 additions & 4 deletions examples/gaussian_process/gp_diabetes_dataset.py
Expand Up @@ -25,7 +25,8 @@

from sklearn import datasets
from sklearn.gaussian_process import GaussianProcess
from sklearn.cross_validation import cross_val_score, KFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold

# Load the dataset from scikit's data sets
diabetes = datasets.load_diabetes()
Expand All @@ -43,9 +44,9 @@
gp.theta0 = gp.theta_ # Given correlation parameter = MLE
gp.thetaL, gp.thetaU = None, None # None bounds deactivate MLE

# Perform a cross-validation estimate of the coefficient of determination using
# the cross_validation module using all CPUs available on the machine
# Perform a cross-validated estimate of the coefficient of determination using
# the model_selection.cross_val_score using all CPUs available on the machine
K = 20 # folds
R2 = cross_val_score(gp, X, y=y, cv=KFold(y.size, K), n_jobs=1).mean()
R2 = cross_val_score(gp, X, y=y, cv=KFold(K), n_jobs=1).mean()
print("The %d-Folds estimate of the coefficient of determination is R2 = %s"
% (K, R2))
2 changes: 1 addition & 1 deletion examples/linear_model/plot_sgd_comparison.py
Expand Up @@ -14,7 +14,7 @@
import matplotlib.pyplot as plt
from sklearn import datasets

from sklearn.cross_validation import train_test_split
from sklearn.model_selection import train_test_split
from sklearn.linear_model import SGDClassifier, Perceptron
from sklearn.linear_model import PassiveAggressiveClassifier

Expand Down
2 changes: 1 addition & 1 deletion examples/missing_values.py
Expand Up @@ -28,7 +28,7 @@
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import Imputer
from sklearn.cross_validation import cross_val_score
from sklearn.model_selection import cross_val_score

rng = np.random.RandomState(0)

Expand Down
6 changes: 3 additions & 3 deletions examples/mixture/plot_gmm_classifier.py
Expand Up @@ -33,7 +33,7 @@
import numpy as np

from sklearn import datasets
from sklearn.cross_validation import StratifiedKFold
from sklearn.model_selection import StratifiedKFold
from sklearn.externals.six.moves import xrange
from sklearn.mixture import GMM

Expand All @@ -55,9 +55,9 @@ def make_ellipses(gmm, ax):

# Break up the dataset into non-overlapping training (75%) and testing
# (25%) sets.
skf = StratifiedKFold(iris.target, n_folds=4)
skf = StratifiedKFold(n_folds=4)
# Only take the first fold.
train_index, test_index = next(iter(skf))
train_index, test_index = next(iter(skf.split(iris.data, iris.target)))


X_train = iris.data[train_index]
Expand Down
3 changes: 1 addition & 2 deletions examples/model_selection/README.txt
Expand Up @@ -3,5 +3,4 @@
Model Selection
-----------------------

Examples concerning model selection, mostly contained in the
:mod:`sklearn.grid_search` and :mod:`sklearn.cross_validation` modules.
Examples related to the :mod:`sklearn.model_selection` module.
4 changes: 2 additions & 2 deletions examples/model_selection/grid_search_digits.py
Expand Up @@ -19,8 +19,8 @@
from __future__ import print_function

from sklearn import datasets
from sklearn.cross_validation import train_test_split
from sklearn.grid_search import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.svm import SVC

Expand Down
@@ -1,3 +1,4 @@

"""
==========================================================
Sample pipeline for text feature extraction and evaluation
Expand Down Expand Up @@ -56,7 +57,7 @@
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.linear_model import SGDClassifier
from sklearn.grid_search import GridSearchCV
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline

print(__doc__)
Expand Down
2 changes: 1 addition & 1 deletion examples/model_selection/plot_confusion_matrix.py
Expand Up @@ -30,7 +30,7 @@
import matplotlib.pyplot as plt

from sklearn import svm, datasets
from sklearn.cross_validation import train_test_split
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

# import some data to play with
Expand Down
28 changes: 18 additions & 10 deletions examples/model_selection/plot_learning_curve.py
Expand Up @@ -17,11 +17,11 @@

import numpy as np
import matplotlib.pyplot as plt
from sklearn import cross_validation
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.datasets import load_digits
from sklearn.learning_curve import learning_curve
from sklearn.model_selection import learning_curve
from sklearn.model_selection import ShuffleSplit


def plot_learning_curve(estimator, title, X, y, ylim=None, cv=None,
Expand All @@ -48,10 +48,20 @@ def plot_learning_curve(estimator, title, X, y, ylim=None, cv=None,
ylim : tuple, shape (ymin, ymax), optional
Defines minimum and maximum yvalues plotted.
cv : integer, cross-validation generator, optional
If an integer is passed, it is the number of folds (defaults to 3).
Specific cross-validation objects can be passed, see
sklearn.cross_validation module for the list of possible objects
cv : int, cross-validation generator or an iterable, optional
Determines the cross-validation splitting strategy.
Possible inputs for cv are:
- None, to use the default 3-fold cross-validation,
- integer, to specify the number of folds.
- An object to be used as a cross-validation generator.
- An iterable yielding train/test splits.
For integer/None inputs, if ``y`` is binary or multiclass,
:class:`StratifiedKFold` used. If classifier is False or if ``y`` is
neither binary nor multiclass, :class:`KFold` is used.
Refer :ref:`User Guide <cross_validation>` for the various
cross-validators that can be used here.
n_jobs : integer, optional
Number of jobs to run in parallel (default 1).
Expand Down Expand Up @@ -91,16 +101,14 @@ def plot_learning_curve(estimator, title, X, y, ylim=None, cv=None,
title = "Learning Curves (Naive Bayes)"
# Cross validation with 100 iterations to get smoother mean test and train
# score curves, each time with 20% data randomly selected as a validation set.
cv = cross_validation.ShuffleSplit(digits.data.shape[0], n_iter=100,
test_size=0.2, random_state=0)
cv = ShuffleSplit(n_iter=100, test_size=0.2, random_state=0)

estimator = GaussianNB()
plot_learning_curve(estimator, title, X, y, ylim=(0.7, 1.01), cv=cv, n_jobs=4)

title = "Learning Curves (SVM, RBF kernel, $\gamma=0.001$)"
# SVC is more expensive so we do a lower number of CV iterations:
cv = cross_validation.ShuffleSplit(digits.data.shape[0], n_iter=10,
test_size=0.2, random_state=0)
cv = ShuffleSplit(n_iter=10, test_size=0.2, random_state=0)
estimator = SVC(gamma=0.001)
plot_learning_curve(estimator, title, X, y, (0.7, 1.01), cv=cv, n_jobs=4)

Expand Down
2 changes: 1 addition & 1 deletion examples/model_selection/plot_precision_recall.py
Expand Up @@ -78,7 +78,7 @@
from sklearn import svm, datasets
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import average_precision_score
from sklearn.cross_validation import train_test_split
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import label_binarize
from sklearn.multiclass import OneVsRestClassifier

Expand Down

0 comments on commit 0d7d738

Please sign in to comment.