In [1]:
import numpy as np
import matplotlib.pyplot as plt
from svm_source import *
from sklearn import svm
from sklearn import datasets
from sklearn.utils import shuffle
from sklearn.preprocessing import StandardScaler
from sklearn.cross_validation import cross_val_score, train_test_split
from sklearn.grid_search import GridSearchCV
scaler = StandardScaler()

%matplotlib notebook
plt.style.use('ggplot')



# SVM on iris dataset

In [2]:
# Regardez le contenu des données: 
# http://scikit-learn.org/stable/auto_examples/datasets/plot_iris_dataset.html
# https://fr.wikipedia.org/wiki/Iris_%28jeu_de_donn%C3%A9es%29

iris = datasets.load_iris()
X = iris.data
X = scaler.fit_transform(X)

y = iris.target  # 3 classes: (Setosa, Versicolour, and Virginica)
X = X[:, :2]  # only sepal width and sepal length
# y = y[y != 0]

clf_linear = svm.SVC(C=1, kernel="linear")
clf_linear.fit(X, y)

X, y = shuffle(X, y)
# split into a training and testing set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

###############################################################################
# fit the model with linear vs polynomial kernel
###############################################################################

# Les differents Kernel et leurs paramètres
# http://scikit-learn.org/stable/modules/svm.html#svm-kernels

parameters = {'kernel': ['linear'], 'C': list(np.logspace(-3, 3, 5))}
svr = svm.SVC()
clf_linear = GridSearchCV(svr, parameters)
clf_linear.fit(X_train, y_train)
clf_linear.score(X_test, y_test)

print('Generalization score for linear kernel: %s, %s' %
      (clf_linear.score(X_train, y_train),
       clf_linear.score(X_test, y_test)))

Cs = list(np.logspace(-3, 3, 5))
gammas = 10. ** np.arange(1, 2)
degrees = np.r_[2, 3]
parameters = {'kernel': ['poly'], 'C': Cs, 'gamma': gammas, 'degree': degrees}

svr = svm.SVC()
clf_poly = GridSearchCV(svr, parameters)
clf_poly.fit(X_train, y_train)
clf_poly.score(X_test, y_test)
clf_poly.best_params_
print('Generalization score for polynomial kernel: %s, %s' %
      (clf_poly.score(X_train, y_train),
       clf_poly.score(X_test, y_test)))

plt.figure(figsize=(12, 5))
plt.subplot(131)
plot_2d(X, y)
plt.title("iris dataset")

plt.subplot(132)
plot_2d(X, y)
frontiere(clf_linear.predict, X, y)
plt.title("linear kernel")

plt.subplot(133)
plot_2d(X, y)
frontiere(clf_poly.predict, X, y)
plt.title("polynomial kernel")
plt.tight_layout()
plt.show()

Generalization score for linear kernel: 0.8125, 0.815789473684
Generalization score for polynomial kernel: 0.785714285714, 0.657894736842


<IPython.core.display.Javascript object>

In [63]:
###############################################################################
#               SVM GUI
###############################################################################

# please open a terminal and run python2 svm_gui.py
# Then, play with the applet : generate various datasets and observe the
# different classifiers you can obtain by varying the kernel

###############################################################################
#               Face Recognition Task
###############################################################################
"""
The dataset used in this example is a preprocessed excerpt
of the "Labeled Faces in the Wild", aka LFW_:

  http://vis-www.cs.umass.edu/lfw/lfw-funneled.tgz (233MB)

  _LFW: http://vis-www.cs.umass.edu/lfw/

"""

from time import time
import pylab as pl

from sklearn.cross_validation import train_test_split
from sklearn.datasets import fetch_lfw_people


####################################################################
# Download the data (if not already on disk); load it as numpy arrays
lfw_people = fetch_lfw_people(min_faces_per_person=70, resize=0.4,
                              color=True, funneled=False, slice_=None,
                              download_if_missing=True)
# data_home='.'

# introspect the images arrays to find the shapes (for plotting)
images = lfw_people.images / 255.
n_samples, h, w, n_colors = images.shape

# the label to predict is the id of the person
target_names = lfw_people.target_names.tolist()

####################################################################
# Pick a pair to classify such as
names = ['Tony Blair', 'Colin Powell']
# names = ['Donald Rumsfeld', 'Colin Powell']

idx0 = (lfw_people.target == target_names.index(names[0]))
idx1 = (lfw_people.target == target_names.index(names[1]))
images = np.r_[images[idx0], images[idx1]]
n_samples = images.shape[0]
y = np.r_[np.zeros(np.sum(idx0)), np.ones(np.sum(idx1))].astype(np.int)

####################################################################
# Extract features

# features using only illuminations
X = (np.mean(images, axis=3)).reshape(n_samples, -1)

# # or compute features using colors (3 times more features)
# X = images.copy().reshape(n_samples, -1)

# Scale features
X -= np.mean(X, axis=0)
X /= np.std(X, axis=0)

####################################################################
# Split data into a half training and half test set
# X_train, X_test, y_train, y_test, images_train, images_test = \
#    train_test_split(X, y, images, test_size=0.5, random_state=0)
# X_train, X_test, y_train, y_test = \
#    train_test_split(X, y, test_size=0.5, random_state=0)

indices = np.random.permutation(X.shape[0])
train_idx, test_idx = indices[:X.shape[0] / 2], indices[X.shape[0] / 2:]
X_train, X_test = X[train_idx, :], X[test_idx, :]
y_train, y_test = y[train_idx], y[test_idx]
images_train, images_test = images[train_idx, :, :, :], images[test_idx, :, :, :]

In [31]:
####################################################################
# Quantitative evaluation of the model quality on the test set
print "Fitting the classifier to the training set"
t0 = time()

Cs = 10. ** np.arange(-5, 6)
scores = []
for C in Cs:
    clf = svm.SVC(kernel="linear", C=C)
    clf.fit(X_train, y_train)
    scores += [clf.score(X_test, y_test)]
ind = np.argmax(scores)
print "Best C: {}".format(Cs[ind])

plt.figure()
plt.plot(Cs, scores)
plt.xlabel("Parametres de regularisation C")
plt.ylabel("Scores d'apprentissage")
plt.xscale("log")
plt.tight_layout()
plt.show()
print "Best score: {}".format(np.max(scores))

Fitting the classifier to the training set
Best C: 0.01


<IPython.core.display.Javascript object>

Best score: 0.910526315789


In [6]:
print "Predicting the people names on the testing set"
t0 = time()

# predict labels for the X_test images
clf = svm.SVC(kernel="linear", C=Cs[np.argmax(scores)])
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

print "done in %0.3fs" % (time() - t0)
print "Chance level : %s" % max(np.mean(y), 1. - np.mean(y))
print "Accuracy : %s" % clf.score(X_test, y_test)

Predicting the people names on the testing set
done in 0.777s
Chance level : 0.621052631579
Accuracy : 0.857894736842


In [7]:
####################################################################
# Qualitative evaluation of the predictions using matplotlib

prediction_titles = [title(y_pred[i], y_test[i], names)
                     for i in range(y_pred.shape[0])]

plot_gallery(images_test, prediction_titles)
pl.show()

<IPython.core.display.Javascript object>

In [7]:
def run_svm_cv(X, y):
    
    indices = np.random.permutation(X.shape[0])
    train_idx, test_idx = indices[:X.shape[0] / 2], indices[X.shape[0] / 2:]
    X_train, X_test = X[train_idx, :], X[test_idx, :]
    y_train, y_test = y[train_idx], y[test_idx]

    parameters = {'kernel': ['linear'], 'C': list(np.logspace(-3, 3, 5))}
    svr = svm.SVC()
    clf_linear = GridSearchCV(svr, parameters)
    clf_linear.fit(X_train, y_train)

    print('Generalization score for linear kernel: %s, %s \n' %
          (clf_linear.score(X_train, y_train), clf_linear.score(X_test, y_test)))

In [65]:
print "Score sans variable de nuisance"
run_svm_cv(X, y)

print "Score avec variable de nuisance"
n_features = X.shape[1]
# On rajoute des variables de nuisances
noise = np.random.rand(n_samples, 10000)
X_noisy = np.concatenate((X, noise), axis=1)
shake = np.random.permutation(X.shape[0])
X_noisy = X_noisy[shake]

# something weird (pca.transform return an array of shape (n_samples, min(n_components, n_samples, n_features)))
# y = y[shake]
run_svm_cv(X_noisy, y)


print "Score apres reduction de dimension"
from sklearn.decomposition import PCA

n_components = 8000  # jouer avec ce parametre
pca = PCA(n_components=n_components, whiten=True)
pca.fit(X_noisy)
X_noisy_pca = pca.transform(X_noisy)
run_svm_cv(X_noisy_pca, y)

Score sans variable de nuisance
Generalization score for linear kernel: 1.0, 0.905263157895 

Score avec variable de nuisance
Generalization score for linear kernel: 1.0, 0.505263157895 

Score apres reduction de dimension
Generalization score for linear kernel: 0.636842105263, 0.605263157895 



In [5]:
# Primal/Dual

C = 0.01
y = 2 * y - 1  # Pour avoir des classes 1 et -1

def primal_dual(tol):
    
    clf = svm.SVC(kernel='linear', C=C, tol=tol)
    clf.fit(X, y)
    loss = 1 - y * clf.decision_function(X)
    primal = 0.5 * np.linalg.norm(clf.coef_) ** 2 + C * np.sum(loss[loss > 0])

    # linear Kernel
    K = np.dot(X[clf.support_], X[clf.support_].T)
    dual_coef = clf.dual_coef_.ravel()  # dual_coef_ return a vector of y_i * alpha_i
    dual = np.linalg.norm(dual_coef, ord=1) - 0.5 * np.dot(dual_coef, np.dot(K, dual_coef))

    return primal, dual, primal - dual

tols = 10. ** (-np.arange(3, 16, 2))
dual_gaps = [primal_dual(tol)[2] for tol in tols]

plt.figure()
plt.plot(tols, -np.log10(dual_gaps))
plt.ylabel("duality gaps")
plt.xlabel("tolerances")
plt.show()

# Par forte dualité (les conditions de Slater sont vérifiées), le saut de dualité converge vers 0.
# A l'optimum, les valeurs des objectifs primal et dual coincident.

<IPython.core.display.Javascript object>