16.1 Training a Binary Classifier

In [1]:
# Load libraries
from sklearn.linear_model import LogisticRegression
from sklearn import datasets
from sklearn.preprocessing import StandardScaler

# Load data with only two classes
iris = datasets.load_iris()
features = iris.data[:100,:]
target = iris.target[:100]

# Standardize features
scaler = StandardScaler()
features_standardized = scaler.fit_transform(features)

# Create logistic regression object
logistic_regression = LogisticRegression(random_state=0)

# Train model
model = logistic_regression.fit(features_standardized, target)

16.2 Training a Multiclass Classifier

In [2]:
# Load libraries
from sklearn.linear_model import LogisticRegression
from sklearn import datasets
from sklearn.preprocessing import StandardScaler

# Load data
iris = datasets.load_iris()
features = iris.data
target = iris.target

# Standardize features
scaler = StandardScaler()
features_standardized = scaler.fit_transform(features)

# Create one-vs-rest logistic regression object
logistic_regression = LogisticRegression(random_state=0, multi_class="ovr")

# Train model
model = logistic_regression.fit(features_standardized, target)

16.3 Reducing Variance Through Regularization

In [3]:
# Load libraries
from sklearn.linear_model import LogisticRegressionCV
from sklearn import datasets
from sklearn.preprocessing import StandardScaler

# Load data
iris = datasets.load_iris()
features = iris.data
target = iris.target

# Standardize features
scaler = StandardScaler()
features_standardized = scaler.fit_transform(features)

# Create decision tree regression object
logistic_regression = LogisticRegressionCV(
    penalty='l2', Cs=10, random_state=0, n_jobs=-1)

# Train model
model = logistic_regression.fit(features_standardized, target)

In [4]:
# Regularization is a method of penalizing complex models to reduce their variance. Specifically, a penalty term is 
# added to the loss function we are trying to minimize, typically the L1 and L2 penalties. In the L1 penalty:

# α∑j=1p∣∣βˆj∣∣
# where βˆj is the parameters of the jth of p features being learned and α is a hyperparameter denoting the 
# regularization strength. With the L2 penalty:

# α∑j=1pβˆ2j
# Higher values of α increase the penalty for larger parameter values (i.e., more complex models). scikit-learn 
# follows the common method of using C instead of α where C is the inverse of the regularization strength: C=1α. 
#     To reduce variance while using logistic regression, we can treat C as a hyperparameter to be tuned to find the 
#     value of C that creates the best model. In scikit-learn we can use the LogisticRegressionCV class to efficiently 
#     tune C. LogisticRegressionCV’s parameter, Cs, can either accept a range of values for C to search over 
#     (if a list of floats is supplied as an argument) or if supplied an integer, will generate a list of that 
#     many candidate values drawn from a logarithmic scale between –10,000 and 10,000.

16.4 Training a Classifier on Very Large Data

In [5]:
# Load libraries
from sklearn.linear_model import LogisticRegression
from sklearn import datasets
from sklearn.preprocessing import StandardScaler

# Load data
iris = datasets.load_iris()
features = iris.data
target = iris.target

# Standardize features
scaler = StandardScaler()
features_standardized = scaler.fit_transform(features)

# Create logistic regression object
logistic_regression = LogisticRegression(random_state=0, solver="sag")

# Train model
model = logistic_regression.fit(features_standardized, target)

16.5 Handling Imbalanced Classes

In [6]:
# Load libraries
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn import datasets
from sklearn.preprocessing import StandardScaler

# Load data
iris = datasets.load_iris()
features = iris.data
target = iris.target

# Make class highly imbalanced by removing first 40 observations
features = features[40:,:]
target = target[40:]

# Create target vector indicating if class 0, otherwise 1
target = np.where((target == 0), 0, 1)

# Standardize features
scaler = StandardScaler()
features_standardized = scaler.fit_transform(features)

# Create decision tree regression object
logistic_regression = LogisticRegression(random_state=0, class_weight="balanced")

# Train model
model = logistic_regression.fit(features_standardized, target)