## Implementação prática de um algoritmo de Regressão Logística
### Fazendo uso da função de ativação sigmoidal (logistic)

$$logistic= \frac{1}{(1 + e^{-z})}$$

Tal que $z$ será:

$$ z = (a_1 \cdot x_1 + a_2 \cdot x_2 + ... + a_n \cdot x_n) + b $$

In [21]:
#Importação das bibliotecas necessárias
import numpy as np
import zipfile
import pandas as pd

### Definição do modelo e seus métodos:
* Precisão
* Acurácia
* Acurácia Balanceada
* Teste (Prediction)
* Treino (Fit)

In [22]:
class BinaryLogisticRegressor:
    
    def __init__(self, learning_rate=0.01, n_iterations=1000):
        self.learning_rate = learning_rate
        self.n_iterations = n_iterations
        self.weights = None
        self.bias = None
        #Definicao da função de ativação
        self._sigmoid = lambda x: 1 / (1 + np.exp(-x))
    
    def fit(self, X, y):
        n_samples, n_features = X.shape
        # Inicialização dos pesos
        self.weights = np.zeros(n_features)
        self.bias = 0
        
        # Gradiente descendente
        for _ in range(self.n_iterations):
            linear_model = np.dot(X, self.weights) + self.bias
            y_predicted = self._sigmoid(linear_model)
            
            # Calculo do gradiente
            dw = (1 / n_samples) * np.dot(X.T, (y_predicted - y))
            db = (1 / n_samples) * np.sum(y_predicted - y)
            
            # Atualização dos pesos
            self.weights -= self.learning_rate * dw
            self.bias -= self.learning_rate * db
    
    def predict(self, X):
        linear_model = np.dot(X, self.weights) + self.bias
        #Aplicacao da função de ativação para retornar a classe predita
        y_predicted = self._sigmoid(linear_model)
        #Classificação da classe predita de forma binária (0 ou 1)
        y_predicted_cls = [1 if i > 0.5 else 0 for i in y_predicted]
        return y_predicted_cls
    
    def accuracy(self, y_true, y_pred):
        accuracy = np.sum(y_true == y_pred) / len(y_true)
        return accuracy
    
    def precision(self, y_true, y_pred):
        true_positives = np.sum(y_true * y_pred)
        predicted_positives = np.sum(y_pred)
        precision = true_positives / predicted_positives
        return precision

    def balanced_accuracy(self, y_true, y_pred):
        positive = np.sum(y_true)
        negative = len(y_true) - positive
        true_positives = np.sum(y_true * y_pred)
        true_negatives = np.sum((1 - y_true) * (1 - y_pred))
        balanced_accuracy = (true_positives / positive + true_negatives / negative) / 2
        return balanced_accuracy

    def sensitivity(self, y_true, y_pred):
        true_positives = np.sum(y_true * y_pred)
        positive = np.sum(y_true)
        sensitivity = true_positives / positive
        return sensitivity
    
    def specificity(self, y_true, y_pred):
        true_negatives = np.sum((1 - y_true) * (1 - y_pred))
        negative = len(y_true) - np.sum(y_true)
        specificity = true_negatives / negative
        return specificity

In [23]:
#Definindo um classe para escalonamento dos dados 
#Para os seguintes metodos: MinMax, Standard e Normal
class Scaler:
        
        def __init__(self, method):
            self.method = method
        
        def fit(self, X):
            if self.method == 'minmax':
                self.min = np.min(X, axis=0)
                self.max = np.max(X, axis=0)
            elif self.method == 'std':
                self.mean = np.mean(X, axis=0)
                self.std = np.std(X, axis=0)
            elif self.method == 'normal':
                self.mean = np.mean(X, axis=0)
                self.std = np.std(X, axis=0)
            else:
                raise Exception('Método inválido ou não implementado')
        
        def transform(self, X):
            if self.method == 'minmax':
                return (X - self.min) / (self.max - self.min)
            elif self.method == 'std':
                return (X - self.mean) / self.std
            elif self.method == 'normal':
                return (X - self.mean) / self.std
            else:
                raise Exception('Método inválido ou não implementado')


In [24]:
#Importando o dataset
zf = zipfile.ZipFile('archive.zip')
data = pd.read_csv(zf.open('data.csv'))

In [25]:
#Visualizando as features presentes no modelo
data.head()

Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,...,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst,Unnamed: 32
0,842302,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,
1,842517,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,
2,84300903,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,
3,84348301,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,
4,84358402,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,


In [26]:
#Visualizando as features presentes no modelo
data.columns

Index(['id', 'diagnosis', 'radius_mean', 'texture_mean', 'perimeter_mean',
       'area_mean', 'smoothness_mean', 'compactness_mean', 'concavity_mean',
       'concave points_mean', 'symmetry_mean', 'fractal_dimension_mean',
       'radius_se', 'texture_se', 'perimeter_se', 'area_se', 'smoothness_se',
       'compactness_se', 'concavity_se', 'concave points_se', 'symmetry_se',
       'fractal_dimension_se', 'radius_worst', 'texture_worst',
       'perimeter_worst', 'area_worst', 'smoothness_worst',
       'compactness_worst', 'concavity_worst', 'concave points_worst',
       'symmetry_worst', 'fractal_dimension_worst', 'Unnamed: 32'],
      dtype='object')

In [27]:
data.shape

(569, 33)

In [28]:
#Selecionando as features que serão utilizadas no modelo
X = data.values[:, 2]
y = data.values[:, 1]

In [29]:
#Visualizando as classes presentes no dataset
np.unique(y)

array(['B', 'M'], dtype=object)

In [30]:
#Convertendo as classes para valores binários
y = np.where(y == 'M', 1, 0)

In [31]:
#Verificando a conversão
np.unique(y)

array([0, 1])

In [32]:
#Convertendo os dados para vetores do tipo np.array
X = np.array(X)
y = np.array(y)

In [33]:
#Instanciando o classificador e o escalonador
clf = BinaryLogisticRegressor()
sc= Scaler('minmax')