# LOGISTIC REGRESSION

Logistic regression is a supervised machine learning algorithm which is used for classification. This method predicts the probibilty that an observation belongs to a certain class using Sigmoid function. It is used for binary classification. 

In [1]:
# Logistic Regression
from sklearn.linear_model import LogisticRegression
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
import numpy as np 

iris = datasets.load_iris()
X = iris.data[:100, :]
y = iris.target[:100]

scaler = StandardScaler()
X_std = scaler.fit_transform(X)

lr = LogisticRegression(solver='lbfgs', random_state=0)

model = lr.fit(X_std, y)

observation = [[0.5, 0.5, 0.5, 0.5]]

print("The new observation class is", model.predict(observation), 
      "with probibility", np.max(model.predict_proba(observation)))

The new observation class is [1] with probibility 0.8226157648578827


# Multi-class classification
In order to apply logistic regression for the cases with more than two classes, one-vs-rest, "ovr", can be used. 

In [2]:
# One versus Rest Logistic Regression == multi class classifier

from sklearn.linear_model import LogisticRegression
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
import numpy as np 

iris = datasets.load_iris()
X = iris.data
y = iris.target

scaler = StandardScaler()
X_std = scaler.fit_transform(X)

lr = LogisticRegression(solver='lbfgs', random_state=0, multi_class="ovr")

model = lr.fit(X_std, y)

observation = [[0.5, 0.5, 0.5, 0.5]]

print("The new observation class is", model.predict(observation), 
      "with probibility", np.max(model.predict_proba(observation)))

The new observation class is [2] with probibility 0.5545472259424323


# Regularization 
In order to reduce the variance, regularization can be used. There are two ways of regularing the logistic method: l1 and l2 penalty methods. 

In [4]:
# regularized Logistic Regression 

from sklearn.linear_model import LogisticRegressionCV
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
import numpy as np 

iris = datasets.load_iris()
X = iris.data
y = iris.target

scaler = StandardScaler()
X_std = scaler.fit_transform(X)

lr = LogisticRegressionCV(penalty='l2', Cs=10, cv =5, random_state=0, multi_class='auto', n_jobs=-1)

model = lr.fit(X_std, y)

observation = [[0.5, 0.5, 0.5, 0.5]]
print("The new observation class is", model.predict(observation), 
      "with probibility", np.max(model.predict_proba(observation)))

The new observation class is [1] with probibility 0.9701403201280634


In [40]:
# Logistic Regression for large data using Stochastic Average Gradient SAG

from sklearn.linear_model import LogisticRegression
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
import numpy as np 

iris = datasets.load_iris()
X = iris.data
y = iris.target

scaler = StandardScaler()
X_std = scaler.fit_transform(X)

lr = LogisticRegression(solver='sag', random_state=0, multi_class="ovr")

model = lr.fit(X_std, y)

observation = [[0.5, 0.5, 0.5, 0.5]]
print("The new observation class is", model.predict(observation), 
      "with probibility", np.max(model.predict_proba(observation)))

The new observation class is [2] with probibility 0.5545717001308477


In [43]:
# Imbalaced Classes

from sklearn.linear_model import LogisticRegression
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
import numpy as np 

iris = datasets.load_iris()
X = iris.data
y = iris.target

# making imbalanced data
X = X[40:, :]
y = y[40:]

y = np.where((y==0), 0, 1)

scaler = StandardScaler()
X_std = scaler.fit_transform(X)

lr = LogisticRegression(solver='lbfgs', random_state=0, class_weight="balanced")

model = lr.fit(X_std, y)

observation = [[0.5, 0.5, 0.5, 0.5]]
print("The new observation class is", model.predict(observation), 
      "with probibility", np.max(model.predict_proba(observation)))

The new observation class is [1] with probibility 0.9983927886292678
