In [1]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.datasets import *
from sklearn.linear_model import LogisticRegression

In [2]:
data = load_breast_cancer()
type(data)

sklearn.utils.Bunch

In [3]:
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, train_size=.9)

In [4]:
model = LogisticRegression()

tuned_parameters = [{'C': [10**-4, 10**-2, 10**0, 10, 10**2, 10**4]}]
model = GridSearchCV(model, tuned_parameters, scoring='f1', cv=5)
model.fit(X_train, y_train)

print(model.best_estimator_)
print(model.score(X_test, y_test))

LogisticRegression(C=10000)
0.9315068493150684


In [5]:
# More Sparsity (Fewer elements of W* being non-zero) by increasing Lambda (decreasing C)

clf = LogisticRegression(C=0.1, penalty='l1', solver='liblinear')
clf.fit(X_train, y_train)
w = clf.coef_
print(w)
print(np.count_nonzero(w))

[[ 0.08645624  0.          0.28669311 -0.00249299  0.          0.
   0.          0.          0.          0.          0.          0.
   0.         -0.05483957  0.          0.          0.          0.
   0.          0.          0.76505657 -0.12514444 -0.14590411 -0.02187929
   0.          0.          0.          0.          0.          0.        ]]
8


In [6]:
clf = LogisticRegression(C=0.01, penalty='l1', solver='liblinear')
clf.fit(X_train, y_train)
w = clf.coef_
print(w)
print(np.count_nonzero(w))

[[ 0.          0.          0.15270301  0.00643301  0.          0.
   0.          0.          0.          0.          0.          0.
   0.         -0.01791942  0.          0.          0.          0.
   0.          0.          0.          0.          0.         -0.02137624
   0.          0.          0.          0.          0.          0.        ]]
4


In [7]:
clf = LogisticRegression(C=0.001, penalty='l1', solver='liblinear')
clf.fit(X_train, y_train);
w = clf.coef_
print(w)
print(np.count_nonzero(w))

[[ 0.          0.          0.06361274  0.00510925  0.          0.
   0.          0.          0.          0.          0.          0.
   0.          0.          0.          0.          0.          0.
   0.          0.          0.          0.          0.         -0.01068095
   0.          0.          0.          0.          0.          0.        ]]
3


In [8]:
clf = LogisticRegression(C=10, penalty='l1', solver='liblinear')
clf.fit(X_train, y_train);
w = clf.coef_
print(w)
print(np.count_nonzero(w))

[[ 2.99834779e+00  1.98645837e-01 -8.75707196e-02 -2.11689371e-02
   0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
   0.00000000e+00  0.00000000e+00  0.00000000e+00  1.68913300e+00
   8.37142599e-01 -2.17718083e-01  0.00000000e+00  0.00000000e+00
   8.69894242e-01  0.00000000e+00  0.00000000e+00  0.00000000e+00
   1.08738659e+00 -4.90755408e-01 -9.41728726e-02 -1.47926985e-02
   0.00000000e+00  0.00000000e+00 -4.22562462e-01 -3.80628813e+01
  -6.83270815e+00  0.00000000e+00]]
15
