#### Logistic regression
- It is a model used to predict the likelihood of a specific class or occurrence

Conditions for using logistic regression:
- If the information is binary
- If you require probabilistic outcomes
- When a linear decision boundary is required

Applications:
- To forecast survival in wounded patient in the medical profession
- To estimate the likelihood of a person suffering from a heart attack
- Predicting the likelihood of a procedure or a product failing
- Predicting a homeowner's possibility of defaulting on a lender

Steps to implementing Logistic regression
- Data preprocessing like filtering...
- Fitting KNN to the training set
- Predict the test results
- Test accuracy of the results





In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

data = pd.read_csv('Data/user_data.csv')
data.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0


In [3]:
X = data.iloc[:, [2,4]].values # select the 3rd and 5th columns. .values converts the results to Numpy array
y = data.iloc[:, 4].values
X

array([[19,  0],
       [35,  0],
       [26,  0],
       [27,  0],
       [19,  0],
       [27,  0],
       [27,  0],
       [32,  1],
       [25,  0],
       [35,  0],
       [26,  0],
       [26,  0],
       [20,  0],
       [32,  0],
       [18,  0],
       [29,  0],
       [47,  1],
       [45,  1],
       [46,  1],
       [48,  1],
       [45,  1],
       [47,  1],
       [48,  1],
       [45,  1],
       [46,  1],
       [47,  1],
       [49,  1],
       [47,  1],
       [29,  0],
       [31,  0],
       [31,  0],
       [27,  1],
       [21,  0],
       [28,  0],
       [27,  0],
       [35,  0],
       [33,  0],
       [30,  0],
       [26,  0],
       [27,  0],
       [27,  0],
       [33,  0],
       [35,  0],
       [30,  0],
       [28,  0],
       [23,  0],
       [25,  0],
       [27,  0],
       [30,  1],
       [31,  0],
       [24,  0],
       [18,  0],
       [29,  0],
       [35,  0],
       [27,  0],
       [24,  0],
       [23,  0],
       [28,  0],
       [22,  0

In [4]:
y

array([0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1,
       0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0,
       1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0,
       1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,
       0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1,

In [5]:
# split data into train and test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

In [None]:
# Feature scaling
# standardizes (normalizes) the training and test sets using StandardScaler
# This makes all features follow a standard normal distribution (mean 0, standard deviation 1).
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.fit_transform(X_test)
X_train

array([[ 0.58164944, -0.76635604],
       [-0.60673761,  1.30487651],
       [-0.01254409, -0.76635604],
       [-0.60673761,  1.30487651],
       [ 1.37390747,  1.30487651],
       [ 1.47293972,  1.30487651],
       [ 0.08648817, -0.76635604],
       [-0.01254409, -0.76635604],
       [-0.21060859, -0.76635604],
       [-0.21060859, -0.76635604],
       [-0.30964085, -0.76635604],
       [-0.30964085, -0.76635604],
       [ 0.38358493,  1.30487651],
       [ 0.8787462 ,  1.30487651],
       [ 2.06713324,  1.30487651],
       [ 1.07681071, -0.76635604],
       [ 0.68068169,  1.30487651],
       [-0.70576986, -0.76635604],
       [ 0.77971394, -0.76635604],
       [ 0.8787462 ,  1.30487651],
       [-1.20093113, -0.76635604],
       [ 2.1661655 ,  1.30487651],
       [-0.01254409, -0.76635604],
       [ 0.18552042,  1.30487651],
       [ 0.38358493, -0.76635604],
       [-0.30964085, -0.76635604],
       [ 0.97777845,  1.30487651],
       [ 0.97777845,  1.30487651],
       [-0.01254409,

In [9]:
X_test

array([[-0.54748976, -0.68599434],
       [ 0.15442019, -0.68599434],
       [-0.10879604, -0.68599434],
       [-0.54748976, -0.68599434],
       [-0.10879604, -0.68599434],
       [-0.81070599, -0.68599434],
       [-0.45975102, -0.68599434],
       [-0.0210573 ,  1.45773797],
       [-1.60035469, -0.68599434],
       [ 0.94406888, -0.68599434],
       [-0.54748976, -0.68599434],
       [-0.72296725, -0.68599434],
       [ 0.06668145, -0.68599434],
       [ 0.24215893, -0.68599434],
       [-1.4248772 , -0.68599434],
       [-0.37201227, -0.68599434],
       [ 0.06668145, -0.68599434],
       [-1.51261594, -0.68599434],
       [ 1.64597884,  1.45773797],
       [-0.10879604, -0.68599434],
       [-0.10879604, -0.68599434],
       [ 0.94406888,  1.45773797],
       [ 0.41763642, -0.68599434],
       [ 0.94406888,  1.45773797],
       [-1.16166097, -0.68599434],
       [ 1.11954637,  1.45773797],
       [-0.72296725, -0.68599434],
       [-0.63522851, -0.68599434],
       [ 0.06668145,