#### Reference
http://occam.olin.edu/sites/default/files/DataScienceMaterials/machine_learning_lecture_2/Machine%20Learning%20Lecture%202.html

In [10]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.grid_search import GridSearchCV
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression

In [2]:
data = load_breast_cancer()

In [3]:
type(data)

sklearn.utils.Bunch

In [4]:
type(data.data)

numpy.ndarray

In [5]:
X = data.data
X.shape

(569, 30)

In [6]:
y = data.target
y.shape

(569,)

In [7]:
## Values to be tried for the hyperparameter 'C'
tuned_params = [{'C': [0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000]}]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3)

In [8]:
## Using Grid Search
model = GridSearchCV(LogisticRegression(), tuned_params, scoring = 'accuracy')
model.fit(X_train, y_train)

GridSearchCV(cv=None, error_score='raise',
       estimator=LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False),
       fit_params={}, iid=True, n_jobs=1,
       param_grid=[{'C': [0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000]}],
       pre_dispatch='2*n_jobs', refit=True, scoring='accuracy', verbose=0)

In [9]:
print(model.best_estimator_)
print(model.score(X_test, y_test))

LogisticRegression(C=1, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)
0.959064327485


#### L1 regularization and Sparsity

In [11]:
clf = LogisticRegression(C=1, penalty = 'l1')
clf.fit(X_train, y_train)
w = clf.coef_    ## weight vector
print(np.count_nonzero(w))

11


#### This means only 11 out of the 30 feature weights are non zero

#### As 'C' decreases, lambda increases and sparsity also increases, also model will be underfit

In [12]:
clf = LogisticRegression(C=0.1, penalty = 'l1')
clf.fit(X_train, y_train)
w = clf.coef_
print(np.count_nonzero(w))

7


In [13]:
clf = LogisticRegression(C=0.01, penalty = 'l1')
clf.fit(X_train, y_train)
w = clf.coef_
print(np.count_nonzero(w))

4


In [14]:
clf = LogisticRegression(C=0.001, penalty = 'l1')
clf.fit(X_train, y_train)
w = clf.coef_
print(np.count_nonzero(w))

3


#### However increasing 'C' will decrease the sparsity

In [15]:
clf = LogisticRegression(C=10, penalty = 'l1')
clf.fit(X_train, y_train)
w = clf.coef_
print(np.count_nonzero(w))

16
