# Regularized logistic regression

Here, we'll explore the effect of L2 regularization.

In [1]:
# # Train and validaton errors initialized as empty list
# train_errs = list()
# valid_errs = list()

# # Loop over values of C_value
# for C_value in [0.001, 0.01, 0.1, 1, 10, 100, 1000]:
#     # Create LogisticRegression object and fit
#     lr = LogisticRegression(C=C_value)
#     lr.fit(X_train, y_train)
    
#     # Evaluate error rates and append to lists
#     train_errs.append( 1.0 - lr.score(X_train, y_train) )
#     valid_errs.append( 1.0 - lr.score(X_valid, y_valid) )
    
# # Plot results
# plt.semilogx(C_values, train_errs, C_values, valid_errs)
# plt.legend(("train", "validation"))
# plt.show()

# Logistic regression and feature selection

we'll perform feature selection on the movie review sentiment data set using L1 regularization. The features and targets are already loaded for you in `X_train` and `y_train`

In [2]:
# from sklearn.model_selection import GridSearchCV
# from sklearn.linear_model import LogisticRegression
# import numpy as np
# # Specify L1 regularization
# lr = LogisticRegression(solver='liblinear', penalty='l1')

# # Instantiate the GridSearchCV object and run the search
# searcher = GridSearchCV(lr, {'C':[0.001, 0.01, 0.1, 1, 10]})
# searcher.fit(X_train, y_train)

# # Report the best parameters
# print("Best CV params", searcher.best_params_)

# # Find the number of nonzero coefficients (selected features)
# best_lr = searcher.best_estimator_
# coefs = best_lr.coef_
# print("Total number of features:", coefs.size)
# print("Number of selected features:", np.count_nonzero(coefs))

# Identifying the most positive and negative words

 we'll try to interpret the coefficients of a logistic regression fit on the movie review sentiment dataset. The model object is already instantiated and fit for you in the variable `lr`

In [3]:
# # Get the indices of the sorted cofficients
# inds_ascending = np.argsort(lr.coef_.flatten()) 
# inds_descending = inds_ascending[::-1]

# # Print the most positive words
# print("Most positive words: ", end="")
# for i in range(5):
#     print(vocab[inds_descending][i], end=", ")
# print("\n")

# # Print most negative words
# print("Most negative words: ", end="")
# for i in range(5):
#     print(vocab[inds_ascending][i], end=", ")
# print("\n")

# Getting class probabilities

Which of the following transformations would make sense for transforming the raw model output of a linear classifier into a class probability?

<center><img src="images/03.06.png"  style="width: 400px, height: 300px;"/></center>

- 3

# Regularization and probabilities

you will observe the effects of changing the regularization strength on the predicted probabilities.

In [4]:
# # Set the regularization strength
# model = LogisticRegression(C=1)

# # Fit and plot
# model.fit(X,y)
# plot_classifier(X,y,model,proba=True)

# # Predict probabilities on training points
# prob = model.predict_proba(X)
# print("Maximum predicted probability", prob[:,0].max())

<center><img src="images/03.07.svg"  style="width: 400px, height: 300px;"/></center>


In [5]:
# # Set the regularization strength
# model = LogisticRegression(C=0.1)

# # Fit and plot
# model.fit(X,y)
# plot_classifier(X,y,model,proba=True)

# # Predict probabilities on training points
# prob = model.predict_proba(X)
# print("Maximum predicted probability", np.max(prob))

<center><img src="images/03.08.svg"  style="width: 400px, height: 300px;"/></center>


# Visualizing easy and difficult examples

you'll visualize the examples that the logistic regression model is most and least confident about by looking at the largest and smallest predicted probabilities.

In [6]:
# lr = LogisticRegression()
# lr.fit(X,y)

# # Get predicted probabilities
# proba = lr.predict_proba(X)

# # Sort the example indices by their maximum probability
# proba_inds = np.argsort(np.max(proba,axis=1))

# # Show the most confident (least ambiguous) digit
# show_digit(proba_inds[-1], lr)

# # Show the least confident (most ambiguous) digit
# show_digit(proba_inds[0], lr)

# Counting the coefficients

If you fit a logistic regression model on a classification problem with 3 classes and 100 features, how many coefficients would you have, including intercepts?

- 303

# Fitting multi-class logistic regression

you'll fit the two types of multi-class logistic regression, one-vs-rest and softmax/multinomial, on the handwritten digits data set and compare the results. 

In [7]:
# # Fit one-vs-rest logistic regression classifier
# lr_ovr = LogisticRegression(multi_class='ovr', solver='liblinear', random_state=42)
# lr_ovr.fit(X_train, y_train)

# print("OVR training accuracy:", lr_ovr.score(X_train, y_train))
# print("OVR test accuracy    :", lr_ovr.score(X_test, y_test))

# # Fit softmax classifier
# lr_mn = LogisticRegression(multi_class='multinomial', solver='lbfgs', random_state=42)
# lr_mn.fit(X_train, y_train)

# print("Softmax training accuracy:", lr_mn.score(X_train, y_train))
# print("Softmax test accuracy    :", lr_mn.score(X_test, y_test))

# Visualizing multi-class logistic regression

 we'll continue with the two types of multi-class logistic regression, but on a toy 2D data set specifically designed to break the one-vs-rest scheme.

In [8]:
# # Print training accuracies
# print("Softmax     training accuracy:", lr_mn.score(X_train, y_train))
# print("One-vs-rest training accuracy:", lr_ovr.score(X_train, y_train))

# # Create the binary classifier (class 1 vs. rest)
# lr_class_1 = LogisticRegression(multi_class='ovr', solver='liblinear', C=100)
# lr_class_1.fit(X_train, y_train==1)

# # Plot the binary classifier (class 1 vs. rest)
# plot_classifier(X_train, y_train==1, lr_class_1)

# One-vs-rest SVM

As motivation for the next and final chapter on support vector machines, we'll repeat the previous exercise with a non-linear SVM.

In [None]:
# # We'll use SVC instead of LinearSVC from now on
# from sklearn.svm import SVC

# # Create/plot the binary classifier (class 1 vs. rest)
# svm_class_1 = SVC()
# svm_class_1.fit(X_train, y_train==1)
# plot_classifier(X_train, y_train==1, svm_class_1)