 # Logistic Regression

 Logistic Regression is a statistical method for predicting binary outcomes from data.

 Examples of this are "yes" vs "no" or "high credit risk" vs "low credit risk".

 These are categories that translate to probability of being a 0 or a 1

 We can calculate logistic regression by adding an activation function as the final step to our linear model.

 This converts the linear regression output to a probability.

In [1]:
#import the dependencies
import pandas as pd
from path import Path
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt

 # Generate some data

In [3]:
#read the file
file_path = Path("Resources/beauty_habits_clean.csv")
df=pd.read_csv(file_path, usecols=["id", "brand", "name", "category", "rating", "number_of_reviews", "love", "price", "online_only", "exclusive", "limited_edition", "good_rating"])
df

FileNotFoundError: [Errno 2] No such file or directory: 'Resources/beauty_habits_clean.csv'

In [None]:
#apply a label encoder to the "good rating" column to make it numerical True==1
le = LabelEncoder()
df['good_rating'] = le.fit_transform(df['good_rating'])

In [None]:
#Create X
X = df.copy()
X

In [None]:
# Define the features set.
X = X.drop(["id","category", "brand","name", "rating", "good_rating"], axis=1)
X.head()

In [None]:
# Define the target set.
y = df["good_rating"].ravel()
y[:5]

In [None]:
# Visualizing both classes
plt.scatter(y[:, 0], y[:, 1], c=y)

 # Split our data into training and testing

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, 
                                                    y, 
                                                    random_state=1, 
                                                    stratify=y)

 # Create a Logistic Regression Model

In [None]:
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(solver='lbfgs', random_state=1)
classifier

In [None]:
# LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
#    intercept_scaling=1, 11_ratio=None, max_iter=100,
#    multi_class='warn', n_jobs=None, penalty='12',
#    random_state=1, solver='lbfgs' tol=0.0001, verbose=0,
#    warm_start=False)


 # Fit (train) or model using the training data

In [None]:
# Train the data
classifier.fit(X_train, y_train)

 # Make predictions

In [None]:
# Predict outcomes for test data set
predictions = classifier.predict(X_test)
pd.DataFrame({"Prediction": predictions, "Actual": y_test})

# Validate the model using the test data

In [None]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, predictions)

In [None]:
# Generate a new data point (the red circle)
import numpy as np
new_data = np.array([[-2, 6]])
plt.scatter(X[:, 0], X[:, 1], c=y)
plt.scatter(new_data[0, 0], new_data[0, 1], c="r", marker="o", s=100)
plt.show()

In [None]:
# Predict the class (purple or yellow) of the new data point
predictions = classifier.predict(new_data)
print("Classes are either 0 (purple) or 1 (yellow)")
print(f"The new point was classified as: {predictions}")