Skip to content
Please note that GitHub no longer supports Internet Explorer.

We recommend upgrading to the latest Microsoft Edge, Google Chrome, or Firefox.

Learn more
Permalink
Browse files

EXA Improve example plot_svm_anova.py (#11731)

  • Loading branch information
qinhanmin2014 authored and jnothman committed Jan 28, 2019
1 parent 59a1ef7 commit 1deb95a13e774050a0fb7ff1d896465e2bf937c1
Showing with 16 additions and 20 deletions.
  1. +16 −20 examples/svm/plot_svm_anova.py
@@ -4,37 +4,35 @@
=================================================
This example shows how to perform univariate feature selection before running a
SVC (support vector classifier) to improve the classification scores.
SVC (support vector classifier) to improve the classification scores. We use
the iris dataset (4 features) and add 36 non-informative features. We can find
that our model achieves best performance when we select around 10% of features.
"""
print(__doc__)

import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_digits
from sklearn.datasets import load_iris
from sklearn.feature_selection import SelectPercentile, chi2
from sklearn.model_selection import cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC


# #############################################################################
# Import some data to play with
X, y = load_digits(return_X_y=True)
# Throw away data, to be in the curse of dimension settings
X = X[:200]
y = y[:200]
n_samples = len(y)
X = X.reshape((n_samples, -1))
# add 200 non-informative features
X = np.hstack((X, 2 * np.random.random((n_samples, 200))))
X, y = load_iris(return_X_y=True)
# Add non-informative features
np.random.seed(0)
X = np.hstack((X, 2 * np.random.random((X.shape[0], 36))))

# #############################################################################
# Create a feature-selection transform and an instance of SVM that we
# Create a feature-selection transform, a scaler and an instance of SVM that we
# combine together to have an full-blown estimator

transform = SelectPercentile(chi2)

clf = Pipeline([('anova', transform), ('svc', SVC(gamma="auto"))])
clf = Pipeline([('anova', SelectPercentile(chi2)),
('scaler', StandardScaler()),
('svc', SVC(gamma="auto"))])

# #############################################################################
# Plot the cross-validation score as a function of percentile of features
@@ -44,17 +42,15 @@

for percentile in percentiles:
clf.set_params(anova__percentile=percentile)
# Compute cross-validation score using 1 CPU
this_scores = cross_val_score(clf, X, y, cv=5, n_jobs=1)
this_scores = cross_val_score(clf, X, y, cv=5)
score_means.append(this_scores.mean())
score_stds.append(this_scores.std())

plt.errorbar(percentiles, score_means, np.array(score_stds))

plt.title(
'Performance of the SVM-Anova varying the percentile of features selected')
plt.xticks(np.linspace(0, 100, 11, endpoint=True))
plt.xlabel('Percentile')
plt.ylabel('Prediction rate')

plt.ylabel('Accuracy Score')
plt.axis('tight')
plt.show()

0 comments on commit 1deb95a

Please sign in to comment.
You can’t perform that action at this time.