# Regressão Logística

w é o parâmetro, X vem da amostra, b seria o intercepto. Mas poderiamos fazer algo do tipo $w_{0} = b$ com $x_{0} = 1$

$z = w^{T}x + b$

Sigmoid function

$f(z) = \dfrac{1}{1+e^{-z}}$

onde $\hat{y} = a = sigmoid(z)$

Loss function

$\mathcal{L}(\hat{y}, y) = -ylog(\hat{y}) + (1-y)log(1-\hat{y})$

Cost function 

$J = \dfrac{1}{m} \sum\limits_{i=1}^m \mathcal{L}(\hat{y},y) $

Derivadas

Seja $A = sigmoid(w^{T}X + b)$

$\dfrac{\partial J}{\partial w} = \dfrac{1}{m} X(A - Y)$

$\dfrac{\partial J}{\partial b} = \dfrac{1}{m} \sum_{i=1}^{m} (a^{(i)} - y^{(i)})$

Atualização do parâmetro $\theta$

$\theta := \theta - \alpha d\theta$, onde $\alpha$ é o learning rate

<font color = 'red'>
Algoritmo:
    
- Chute inicial $\theta$ ou $w$ e $b$. Você irá utilizar a amostra de treino.
- Calcule o custo, e tente minimizar utilizando suas derivadas até os parâmetros "convergirem".
- Teste com sua amostra de teste.

In [2]:
import numpy as np
import pandas as pd
import sklearn as sk
import random
from sklearn.datasets import load_breast_cancer

In [3]:
data = load_breast_cancer()
X = data.data
y = data.target

In [4]:
X = (X - X.mean(axis=0))/X.std(axis=0)
# X = X / np.linalg.norm(X, axis=0)

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
#reshape
X_train = X_train.T
X_test = X_test.T

y_train = y_train.reshape(y_train.shape[0], -1).T
y_test = y_test.reshape(y_test.shape[0], -1).T

In [7]:
def sigmoid(z):
    
    f = 1/(1 + np.exp(-z))
    
    return f

In [8]:
def initial(dim):
    w = np.zeros(dim).reshape(dim, -1)
    b = 0
    
    return w, b

In [9]:
def forward(w, b, X, y):
     
    m = X.shape[1]
    
    A = sigmoid(np.dot(w.T, X) + b)
    J = -(1/m)*np.sum(y*np.log(A) + (1-y)*np.log(1-A))  
#     print(A)
    dw = (1/m)*np.dot(X, (A-y).T)
    db = (1/m)*np.sum(A-y)
    
    grads = {"dw": dw,
             "db": db}
    
    return grads, J

In [10]:
def optimize(w, b, X, Y, num_iterations, learning_rate, print_cost = False):
    
    costs = []
    
    for i in range(num_iterations):
        grads, J = forward(w, b, X, Y)
        
        dw = grads["dw"]
        db = grads["db"]
        
        w = w - learning_rate*dw
        b = b - learning_rate*db
        
        if i % 100 == 0:
            costs.append(J)
        
        if print_cost and i % 100 == 0:
            print ("Cost after iteration %i: %f" %(i, J))
            
    params = {"w": w,
              "b": b}
    
    grads = {"dw": dw,
             "db": db}
    
    return params, grads, costs

In [11]:
def predict(w, b, X):
    
    m = X.shape[1]
    y_prediction = np.zeros((1,m))
    w = w.reshape(X.shape[0], 1)
    
    A = sigmoid(np.dot(w.T, X) + b)
    
    y_prediction = (A > 0.5).astype(int)
#     assert(Y_prediction.shape == (1, m))
    
    return y_prediction

In [12]:
def model(X_train, Y_train, X_test, Y_test, num_iterations = 2000, learning_rate = 0.5, print_cost = False):
    
    # initialize parameters with zeros (≈ 1 line of code)
    w, b = initial(X_train.shape[0])

    # Gradient descent (≈ 1 line of code)
    parameters, grads, costs = optimize(w, b, X_train, Y_train, num_iterations, learning_rate, print_cost)
    
    # Retrieve parameters w and b from dictionary "parameters"
    w = parameters["w"]
    b = parameters["b"]
    
    # Predict test/train set examples (≈ 2 lines of code)
    Y_prediction_test = predict(w, b, X_test)
    Y_prediction_train = predict(w, b, X_train)


    print("train accuracy: {} %".format(100 - np.mean(np.abs(Y_prediction_train - Y_train)) * 100))
    print("test accuracy: {} %".format(100 - np.mean(np.abs(Y_prediction_test - Y_test)) * 100))

    
    d = {"costs": costs,
         "Y_prediction_test": Y_prediction_test, 
         "Y_prediction_train" : Y_prediction_train, 
         "w" : w, 
         "b" : b,
         "learning_rate" : learning_rate,
         "num_iterations": num_iterations}
    
    return d

In [13]:
modelo_logistico = model(X_train, y_train, X_test, y_test, 2000, 0.5, False)

train accuracy: 99.12087912087912 %
test accuracy: 98.24561403508773 %


In [14]:
#parametros iniciais
num_iterations = 2000
learning_rate = 0.1

# chute inicial
w, b = initial(X_train.shape[0])

# otimizacao por gradiente
parameters, grads, costs = optimize(w, b, X_train, y_train, num_iterations, learning_rate, False)

# parametros que foram obtidos pela otimizacao
w = parameters["w"]
b = parameters["b"]

# predicao
y_prediction_test = predict(w, b, X_test)
y_prediction_train = predict(w, b, X_train)

In [15]:
# utilizando metricas do sklearn, precisamos redimensionar as matrizes
y_train_true = y_train.reshape(y_train.shape[1])
y_test_true = y_test.reshape(y_test.shape[1])

y_train_pred = y_prediction_train.reshape(y_prediction_train.shape[1])
y_test_pred = y_prediction_test.reshape(y_prediction_test.shape[1])

In [17]:
from sklearn.metrics import accuracy_score

print(accuracy_score(y_train_true, y_train_pred))
print(accuracy_score(y_test_true, y_test_pred))

0.9868131868131869
0.9912280701754386


In [18]:
from sklearn.metrics import recall_score

print(recall_score(y_train_true, y_train_pred))
print(recall_score(y_test_true, y_test_pred))

0.993006993006993
1.0
