### This notebook contains the algorithm for Logistics Regression.

### 1. Importing libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

### 2. Importing Dataset

In [2]:
from sklearn import datasets
iris = datasets.load_iris()

In [3]:
iris.keys()

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename'])

### 3. Arranging the dataset into x and y

In [4]:
x = np.array(iris['data'])
y = np.array((iris['target']==2).astype(int))
y = y.reshape((len(y),1))

### 4. Splitting the dataset

In [5]:
from sklearn.model_selection import train_test_split

In [6]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.3)

### 5. Function:
1. Hypothesis
2. sigmoid
3. cost
4. gradient descent
5. predict

In [7]:
def hypo(theta,x):
    return np.dot(x,theta)

In [8]:
def sigmoid(z):
    
    s = 1/(1+np.exp(-z))
    return s

In [9]:
def cost(theta,x,y):
    m = len(y)
    z = sigmoid(hypo(theta,x))
    c = - np.dot(y.T,np.log(z)) - np.dot((1-y).T,np.log(1-z))
    return c/m   

In [60]:
def gra(x,y, iteration, alpha):
    m = len(y)
    x = np.c_[np.ones((x.shape[0],1)),x]
    theta = np.random.rand(x.shape[1],1)
        
    for i in range(iteration):
        
        gradient = np.dot(x.T,sigmoid(hypo(theta,x))-y )
        temp = theta - alpha*gradient
        theta = temp
    return theta

In [28]:
def predict_and_score(x,y, theta,threshold):
    x = np.c_[np.ones((x.shape[0],1)),x]
    h = hypo(theta,x)
    s = sigmoid(h)
    y_pred = (s>threshold).astype(int).reshape((len(y),1))
    
    count = 0
    for i in range(len(y_pred)):
        if y_pred[i] == y[i]:
            count+= 1
    score = count/len(y)
    
    return y_pred,score

### 6. Training the dataset

In [107]:
np.random.seed(42)
th= gra(x_train,y_train,100,0.1)
th

array([[-31.08660892],
       [-70.15726896],
       [-59.71108653],
       [ 92.87991761],
       [ 65.62992235]])

### 7.Testing the dataset

In [108]:
y_preds,score = predict_and_score(x_test,y_test,th,0.5)

### 8. Output

In [109]:
score

0.7111111111111111

## Using logistics regression from sklearn

In [84]:
from sklearn.linear_model import LogisticRegression

m = LogisticRegression()
m.fit(x_train,y_train.flatten())
y_p_m = m.predict(x_test)

In [85]:
m.score(x_test,y_test)

0.9777777777777777