# Feature Selection

In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2
import ipywidgets
from ipywidgets import interact, interactive, interact_manual
import IPython
import pylab
pylab.rcParams['figure.figsize'] = (10, 5)

import numpy as np
import matplotlib.pyplot as plt
from utilities.util import gradient_descent
from utilities.load_data import linear_separable_data, circular_separable_data
from utilities import plot_helpers 
from utilities.classifiers import Perceptron, SVM, Logistic
from utilities.regularizers import L1Regularizer, L2Regularizer

In [None]:
num_points = 100  # Number of points per class
noise = 0.2  # Noise Level (needed for data generation).
TEST_FRACTION = .80
np.random.seed(42)
X, Y = linear_separable_data(num_points, noise=noise, dim=2)

fig = plt.subplot(111)
opt = {'marker': 'ro', 'label': '+', 'size': 8}
plot_helpers.plot_data(X[np.where(Y == 1)[0], 0], X[np.where(Y == 1)[0], 1], fig=fig, options=opt)
opt = {'marker': 'bs', 'label': '-', 'x_label': '$x$', 'y_label': '$y$', 'size': 8, 'legend': True}
plot_helpers.plot_data(X[np.where(Y == -1)[0], 0], X[np.where(Y == -1)[0], 1], fig=fig, options=opt)


In [None]:
# Separate into train and test sets!
indexes = np.arange(0, 2 * num_points, 1)
np.random.shuffle(indexes)
num_train = int(np.ceil(2 * TEST_FRACTION * num_points))

X_train = X[indexes[:num_train]]
Y_train = Y[indexes[:num_train]]

X_test = X[indexes[num_train:]]
Y_test = Y[indexes[num_train:]]

fig = plt.subplot(111)

opt = {'marker': 'ro', 'fillstyle': 'full', 'label': '+ Train', 'size': 8}
plot_helpers.plot_data(X_train[np.where(Y_train == 1)[0], 0], X_train[np.where(Y_train == 1)[0], 1], fig=fig, options=opt)
opt = {'marker': 'bs', 'fillstyle': 'full', 'label': '- Train', 'size': 8}
plot_helpers.plot_data(X_train[np.where(Y_train == -1)[0], 0], X_train[np.where(Y_train == -1)[0], 1], fig=fig, options=opt)

opt = {'marker': 'ro', 'fillstyle': 'none', 'label': '+ Test', 'size': 8}
plot_helpers.plot_data(X_test[np.where(Y_test == 1)[0], 0], X_test[np.where(Y_test == 1)[0], 1], fig=fig, options=opt)
opt = {'marker': 'bs', 'fillstyle': 'none', 'label': '- Test', 'size': 8, 
       'x_label': '$x$', 'y_label': '$y$', 'legend': True}
plot_helpers.plot_data(X_test[np.where(Y_test == -1)[0], 0], X_test[np.where(Y_test == -1)[0], 1], fig=fig, options=opt)


# L1 Regularization

The L-1 regularization method uses a regularizer of the form $R(w) = ||w||_1$ which is non-differentiable. However, the subgradient exits and is:

$$\partial(||w|||_1) = \left\{\begin{array}{cc} \text{sign} (w)& \text{if } w \neq 0 \\ [-1, 1]  & \text{if } w = 0 \end{array} \right. $$

This regularization method penalizes weights and induces sparsity in the solutions. That is, most of the entries of the solution $w^\star$ will be zero. 

In [None]:
num_positive = 50  # Number of points per class
num_negative = 50  # Number of points per class

noise = 0.3  # Noise Level (needed for data generation).

X, Y = linear_separable_data(num_positive, num_negative, offset=np.array([1, .2]), noise=noise, dim=2)
X = X - np.mean(X, axis=0)

from sklearn.linear_model import SGDClassifier

def regularization(regularizer, reg):
    np.random.seed(42)
    classifier = SGDClassifier(loss='perceptron', penalty=regularizer, alpha = np.power(10., reg), random_state=1)
    classifier.fit(X[:,:2], Y)
    
    X0, X1 = X[:, 0], X[:, 1]
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    h = .02
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))

    fig = plt.subplot(111)
    contour = plot_helpers.plot_contours(fig, classifier, xx, yy, cmap=plt.cm.jet, alpha=0.3)
    plt.colorbar(contour)
    opt = {'marker': 'r*', 'label': '+'}
    plot_helpers.plot_data(X[np.where(Y == 1)[0], 0], X[np.where(Y == 1)[0], 1], fig=fig, options=opt)
    opt = {'marker': 'bs', 'label': '-', 'legend': True}
    plot_helpers.plot_data(X[np.where(Y == -1)[0], 0], X[np.where(Y == -1)[0], 1], fig=fig, options=opt)
    
interact(regularization,
         regularizer=ipywidgets.RadioButtons(
             options=['l1', 'l2'],
             value='l1',
             description='Algorithm:',
             style={'description_width': 'initial'}),
         reg=ipywidgets.FloatSlider(
             value=-3,
             min=-3,
             max=0,
             step=0.5,
             description='Regularizer 10^:',
             style={'description_width': 'initial'},
             continuous_update=False)
         );

In [None]:
from sklearn import svm
from sklearn import datasets
import sklearn

def laplacian_kernel(X, Y, bw):
    rows = X.shape[0]
    cols = Y.shape[0]
    K = np.zeros((rows, cols))
    for col in range(cols):
        dist = bw * np.linalg.norm(X - Y[col, :], ord=1, axis=1)
        K[:, col] = np.exp(-dist)
    return K

# Our dataset and targets
n_samples = 200  # Number of points per class
tol = 1e-1

def svm_features(dataset, features, reg, bw, deg, noise):
    if dataset is 'blobs':
        X, Y = datasets.make_blobs(n_samples=n_samples, centers=2, random_state=3, cluster_std=10*noise)
    elif dataset is 'circles':
        X, Y = datasets.make_circles(n_samples=n_samples, factor=.5, noise=noise, random_state=42)
    elif dataset is 'moons':
        X, Y = datasets.make_moons(n_samples=n_samples, noise=noise, random_state=42)
    elif dataset == 'xor':
        np.random.seed(42)
        step = int(n_samples/4)
        
        X = np.zeros((n_samples, 2))
        Y = np.zeros(n_samples)
        
        X[0*step:1*step, :] = noise * np.random.randn(step, 2)
        Y[0*step:1*step] = 1
        X[1*step:2*step, :] = np.array([1, 1]) + noise * np.random.randn(step, 2)
        Y[1*step:2*step] = 1
        
        X[2*step:3*step, :] = np.array([0, 1]) + noise * np.random.randn(step, 2)
        Y[2*step:3*step] = -1
        X[3*step:4*step, :] = np.array([1, 0]) + noise * np.random.randn(step, 2)
        Y[3*step:4*step] = -1
    
    elif dataset == 'periodic':
        np.random.seed(42)
        step = int(n_samples/4)
        
        X = np.zeros((n_samples, 2))
        Y = np.zeros(n_samples)
        
        X[0*step:1*step, :] = noise * np.random.randn(step, 2)
        Y[0*step:1*step] = 1
        X[1*step:2*step, :] = np.array([0, 2]) + noise * np.random.randn(step, 2)
        Y[1*step:2*step] = 1
        
        X[2*step:3*step, :] = np.array([0, 1]) + noise * np.random.randn(step, 2)
        Y[2*step:3*step] = -1
        X[3*step:4*step, :] = np.array([0, 3]) + noise * np.random.randn(step, 2)
        Y[3*step:4*step] = -1
        
    X = X[Y <= 1, :]
    Y = Y[Y <=1 ]
    Y[Y==0] = -1
        
    # Add the 1 feature.  
    X = np.concatenate((X, np.ones((X.shape[0], 1))), axis=1)
    plot_support = True
    kernel = features
    if kernel == 'poly':
        gamma = 1
        coef0 = 0
    elif kernel == 'sigmoid':
        gamma = np.power(10., bw)
        coef0 = 0
    elif kernel == 'rbf':
        gamma = np.power(10., -bw)
        coef0 = 0
    elif kernel == 'laplacian':
        gamma = np.power(10., -bw)
        coef0 = 0
        kernel = lambda X, Y: laplacian_kernel(X, Y, gamma)
        plot_support = False

    classifier = svm.SVC(kernel=kernel, C=np.power(10., -reg), gamma=gamma, degree=deg, coef0=coef0, tol=tol)
    classifier.fit(X, Y)

    # plot the line, the points, and the nearest vectors to the plane
    plt.figure()
    plt.clf()
    fig = plt.axes()
    opt = {'marker': 'r*', 'label': '+'}
    plot_helpers.plot_data(X[np.where(Y == 1)[0], 0], X[np.where(Y == 1)[0], 1], fig=fig, options=opt)
    opt = {'marker': 'bs', 'label': '-'}
    plot_helpers.plot_data(X[np.where(Y == -1)[0], 0], X[np.where(Y == -1)[0], 1], fig=fig, options=opt)
    
    if plot_support:
        plt.scatter(classifier.support_vectors_[:, 0], classifier.support_vectors_[:, 1], s=80,
                    facecolors='none', edgecolors='k')

    mins = np.min(X, 0)
    maxs = np.max(X, 0)
    x_min = mins[0] - 1
    x_max = maxs[0] + 1
    y_min = mins[1] - 1
    y_max = maxs[1] + 1

    XX, YY = np.mgrid[x_min:x_max:200j, y_min:y_max:200j]  
    Xtest = np.c_[XX.ravel(), YY.ravel(), np.ones_like(XX.ravel())]
    Z = classifier.decision_function(Xtest)

    # Put the result into a color plot
    Z = Z.reshape(XX.shape)
    plt.contourf(XX, YY, Z > 0, cmap=plt.cm.jet, alpha=0.3)
    plt.contour(XX, YY, Z, colors=['k', 'k', 'k'], linestyles=['--', '-', '--'], levels=[-.99, 0, .99])

    plt.xlim(x_min, x_max)
    plt.ylim(y_min, y_max)


interact(svm_features, 
         dataset=['blobs', 'circles', 'moons', 'xor', 'periodic'],
         features=['poly', 'rbf', 'laplacian'], 
         reg=ipywidgets.FloatSlider(value=-3,
                                    min=-3,
                                    max=3,
                                    step=0.5,
                                    readout_format='.1f',
                                    description='Regularization 10^:',
                                    style={'description_width': 'initial'},
                                    continuous_update=False),
         bw=ipywidgets.FloatSlider(value=-1,
                                    min=-3,
                                    max=3,
                                    step=0.1,
                                    readout_format='.1f',
                                    description='Bandwidth 10^:',
                                    style={'description_width': 'initial'},
                                    continuous_update=False),  
         deg=ipywidgets.IntSlider(
                         value=1,
                         min=1,
                         max=10, 
                         step=1,
                         description='Degree of Poly:',
                         style={'description_width': 'initial'}),
         noise=ipywidgets.FloatSlider(value=0.05,
                                    min=0.01,
                                    max=0.3,
                                    step=0.01,
                                    readout_format='.2f',
                                    description='Noise level:',
                                    style={'description_width': 'initial'},
                                    continuous_update=False),  
        );