# Logistic Regression

Logistic Regression is a statistical method for predicting binary outcomes from data.

Examples of this are "yes" vs "no" or "high credit risk" vs "low credit risk".

These are categories that translate to probability of being a 0 or a 1

We can calculate logistic regression by adding an activation function as the final step to our linear model.

This converts the linear regression output to a probability.



In [None]:
import matplotlib.pyplot as plt
import pandas as pd

# Generate some data

In [None]:
from sklearn.datasets import make_blobs

X, y = make_blobs(centers=2, random_state=42)

print(f"Labels: {y[:10]}")
print(f"Data: {X[:10]}")

In [None]:
# Visualizing both classes
plt.scatter(X[:, 0], X[:, 1], c=y)

# Split our data into training and testing

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, 
                                                    y, 
                                                    random_state=1, 
                                                    stratify=y)

# Create a Logistic Regression Model

In [None]:
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(solver='lbfgs', random_state=1)
classifier

# Fit (train) or model using the training data

In [None]:
# Train the data
classifier.fit(X_train, y_train)

# Make predictions

In [None]:
# Predict outcomes for test data set
predictions = classifier.predict(X_test)
pd.DataFrame({"Prediction": predictions, "Actual": y_test})

# Validate the model using the test data

In [None]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, predictions)

In [None]:
# Generate a new data point (the red circle)
import numpy as np
new_data = np.array([[-2, 6]])
plt.scatter(X[:, 0], X[:, 1], c=y)
plt.scatter(new_data[0, 0], new_data[0, 1], c="r", marker="o", s=100)
plt.show()

In [None]:
# Predict the class (purple or yellow) of the new data point
predictions = classifier.predict(new_data)
print("Classes are either 0 (purple) or 1 (yellow)")
print(f"The new point was classified as: {predictions}")

In [None]:
# Script from 17.3.1
# Practice Logistic Regression
import matplotlib.pyplot as plt
import pandas as pd

from sklearn.datasets import make_blobs
X, y = make_blobs(centers=2, random_state=42)

print(f"Labels: {y[:10]}")
print(f"Data: {X[:10]}")

In [None]:
plt.scatter(X[:, 0], X[:, 1], c=y)

In [None]:
# Split the Dataset Into Train and Test Sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,
    y, random_state=1, stratify=y)

In [None]:
# Instantiate a Logistic Regression Model
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(solver='lbfgs', random_state=1)
classifier

In [None]:
LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
   intercept_scaling=1, l1_ratio=None, max_iter=100,
   multi_class='warn', n_jobs=None, penalty='12',
   random_state=1, solver='lbfgs', tol=0.0001, verbose=0,
   warm_start=False)

In [None]:
# Train the Logistic Regression Model
classifier.fit(X_train, y_train)

In [None]:
# Validate the Logistic Regression Model
predictions = classifier.predict(X_test)
pd.DataFrame({"Prediction": predictions, "Actual": y_test})

In [None]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, predictions)

In [None]:
import numpy as np
new_data = np.array([[-2, 6]])
plt.scatter(X[:, 0], X[:, 1], c=y)
plt.scatter(new_data[0, 0], new_data[0, 1], c="r", marker="o", s=100)
plt.show()

In [None]:
predictions = classifier.predict(new_data)
print("Classes are either 0 (purple) or 1 (yellow)")
print(f"The new point was classified as: {predictions}")

In [None]:
# Let's summarize the steps we took to use a logistic regression model:

# Create a model with LogisticRegression().
# Train the model with model.fit().
# Make predictions with model.predict().
# Validate the model with accuracy_score()