# Logistic Regresion (Lecture: 03)

__Author__: Pablo César Rodríguez Aguayo

__Subject__: Machine Learning

__Date__: 12/03/2018

In [1]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn import datasets
from sklearn.metrics import mean_squared_error
from sklearn import linear_model

%matplotlib inline

 By doing this, we will be able to study the effect of each feature on the dependent variable 

In [2]:
data = pd.read_csv('heart.csv')
data.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [3]:
data.tail()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
298,57,0,0,140,241,0,1,123,1,0.2,1,0,3,0
299,45,1,3,110,264,0,1,132,0,1.2,1,0,3,0
300,68,1,0,144,193,1,1,141,0,3.4,1,2,3,0
301,57,1,0,130,131,0,1,115,1,1.2,1,1,3,0
302,57,0,1,130,236,0,0,174,0,0.0,1,1,2,0


In [4]:
normalize = lambda x: (x-x.mean())/x.std()

In [5]:
train, test = train_test_split(data, test_size=0.3)
m, n = train.shape
error_train = np.zeros(n)

nx_train = train.iloc[:,0:-1].values
ny_train = train.iloc[:,-1].values
# ---- 
nx_test = test.iloc[:,:-1].values
ny_test = test.iloc[:,-1].values

ny_train = ny_train.reshape(-1,1)
ny_test = ny_test.reshape(-1,1)

nx_train = normalize(nx_train)
nx_test = normalize(nx_test)

nx_train = np.append(np.ones(shape=(nx_train.shape[0],1)),nx_train,axis = 1)
nx_test = np.append(np.ones(shape=(nx_test.shape[0],1)),nx_test,axis = 1)

## Data Visualization

In [66]:
#infected = data.iloc[ny_train == 1]
#not_infected = data.iloc[ny_train == 0]

# plots
#plt.scatter(infected.iloc[:, 0], infected.iloc[:, 1], s=10, label='Infected')
#plt.scatter(not_infected.iloc[:, 0], not_infected.iloc[:, 1], s=10, label='Not infected')
#plt.legend()
#plt.show()

## Sigmoid

In [52]:
sigmoid = lambda z: 1 / (1 + np.exp(-z))

## Hyphotesis

In [48]:
def calculate_hyphotesis(x, theta):
    return sigmoid(np.dot(x, theta))

## Cost Function

In [53]:
def cost_function(theta, x, y):
    theta = theta.reshape(n,1)
    infunc = -y*(np.log(calculate_hyphotesis(x,theta))) - (1-y)*(np.log(1 - calculate_hyphotesis(x,theta)))
    return np.sum(infunc)/m

## Gradient Descent

In [54]:
def compute_gradient(theta,x,y):
    theta = theta.reshape(n,1)
    infunc2 = calculate_hyphotesis(x,theta)-y
    gradient =  np.dot(x.transpose(),infunc2)/m
    return gradient.flatten()

In [58]:
cost_temp = cost_function(np.zeros(n),nx_train,ny_train)
gradient_temp = compute_gradient(np.zeros(n),nx_train,ny_train)

In [61]:
import scipy.optimize as opt
theta = opt.minimize(fun = cost_function, x0 = theta, args = (nx_train, ny_train), method = 'TNC', jac = compute_gradient)
theta = theta.x

array([-120.53087085,   -0.6092372 , -117.76501969,   79.76914901,
         -2.15197311,   -0.18299497,  -11.20830361,   43.30823174,
          0.85534143,  -81.09405939,  -42.30415631,   55.84641516,
        -55.05525409,  -88.89527051])

## Predictions on Probabilities

In [77]:
def predict_probs(x, theta):
    return calculate_hyphotesis(x, theta)

def predict(x, theta, threshold=0.5):
    return 1*(predict_probs(x, theta) >= threshold)

In [79]:
%time y_hat = predict(nx_train, theta)
y_hat

CPU times: user 220 µs, sys: 48 µs, total: 268 µs
Wall time: 227 µs


array([0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1,
       0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1,
       0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
       1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0,
       1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1,
       0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0,
       1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0,
       1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
       0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1,
       1, 1, 1, 1, 0])