# Regressão Logística para Classificação de Câncer de Mama

A seguinte análise objetiva o estudo e desenvolvimento de um algoritmo, desenvolvendo as funções de ativação, de treinamento e previsão utilizando apenas a biblioteca NumPy, enquanto solidificação dos conhecimentos em Regressão Logística, Cálculo e Linguagem de Programação.

### Parte 1. Importação de Bibliotecas e Base

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split #Utilizado para separar o dataset em treinamento e teste
from sklearn import datasets
import matplotlib.pyplot as plt #Utilizado para a visualização dos dados

### Parte 2. Preparação dos dados

In [2]:
bc = datasets.load_breast_cancer() #Acessando o dataset
X, y = bc.data, bc.target #Separando o dataset em inputs (X) e outputs (y)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234)

In [3]:
#Acessando o conteúdo do dataset
print(bc.keys())

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename', 'data_module'])


In [4]:
#Acessando o nome dos features que estão sendo usado para classificação
print(bc['feature_names'])

['mean radius' 'mean texture' 'mean perimeter' 'mean area'
 'mean smoothness' 'mean compactness' 'mean concavity'
 'mean concave points' 'mean symmetry' 'mean fractal dimension'
 'radius error' 'texture error' 'perimeter error' 'area error'
 'smoothness error' 'compactness error' 'concavity error'
 'concave points error' 'symmetry error' 'fractal dimension error'
 'worst radius' 'worst texture' 'worst perimeter' 'worst area'
 'worst smoothness' 'worst compactness' 'worst concavity'
 'worst concave points' 'worst symmetry' 'worst fractal dimension']


### Parte 3. Criação de Classes e funções necessárias para cálculo de pesos e bias

In [5]:
#Função de Ativação - Sigmoide
def sigmoid(x):
    return 1/(1+np.exp(-x))

class LogisticRegression():
    #Inicialização dos parâmetros necessários
    def __init__(self, lr=0.001, n_iters=1000):
        self.lr = lr
        self.n_iters = n_iters
        self.weights = None
        self.bias = None
    
    #Inicialização do processo de treinamento
    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0
        
        #Laço de repetição utilizado para treinamento, considerando o número de iterações informado
        for _ in range(self.n_iters):
            linear_pred = np.dot(X, self.weights) + self.bias
            predictions = sigmoid(linear_pred)

            dw = (1/n_samples) * np.dot(X.T, (predictions - y))
            db = (1/n_samples) * np.sum(predictions-y)

            self.weights = self.weights - self.lr*dw
            self.bias = self.bias - self.lr*db
    
    def predict(self, X):
        linear_pred = np.dot(X, self.weights) + self.bias
        y_pred = sigmoid(linear_pred)
        class_pred = [0 if y<=0.5 else 1 for y in y_pred]
        return class_pred

#Inicialização de função de precisão
def accuracy(y_pred, y_test):
    return np.sum(y_pred==y_test)/len(y_test)

### Parte 4. Aplicação da Regressão Logística no dataset 

In [6]:
#Aplicação da Regressão Logística
clf = LogisticRegression(lr=0.01)
clf.fit(X_train,y_train)
y_pred = clf.predict(X_test)

#Cálculo da Precisão do Modelo Obtido
acc = accuracy(y_pred, y_test)
print(f"A precisão do modelo obtido é de: {acc.round(4)*100}%")

A precisão do modelo obtido é de: 92.11%


  return 1/(1+np.exp(-x))


In [8]:
features_data = pd.DataFrame(X_test, columns = ['mean radius', 'mean texture', 'mean perimeter', 'mean area',
 'mean smoothness', 'mean compactness', 'mean concavity',
 'mean concave points', 'mean symmetry', 'mean fractal dimension',
 'radius error', 'texture error', 'perimeter error' ,'area error',
 'smoothness error', 'compactness error', 'concavity error',
 'concave points error', 'symmetry error', 'fractal dimension error',
 'worst radius', 'worst texture', 'worst perimeter', 'worst area',
 'worst smoothness', 'worst compactness', 'worst concavity',
 'worst concave points', 'worst symmetry', 'worst fractal dimension'])
pred = pd.DataFrame(y_pred, columns=['Value_pred'])
result = pd.concat([features_data, pred], axis=1)

pred_class = []
for i in result.itertuples():
    if i.Value_pred == 1:
        pred_class.append("Benign")
    else: 
        pred_class.append("Malignant")
result["Cancer_Class"] = pred_class
result

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,Value_pred,Cancer_Class
0,11.670,20.02,75.21,416.2,0.10160,0.09453,0.04200,0.02157,0.1859,0.06461,...,87.00,550.6,0.1550,0.2964,0.27580,0.08120,0.3206,0.08950,1,Benign
1,10.800,9.71,68.77,357.6,0.09594,0.05736,0.02531,0.01698,0.1381,0.06400,...,73.66,414.0,0.1436,0.1257,0.10470,0.04603,0.2090,0.07699,1,Benign
2,12.450,16.41,82.85,476.7,0.09514,0.15110,0.15440,0.04846,0.2082,0.07325,...,97.82,580.6,0.1175,0.4061,0.48960,0.13420,0.3231,0.10340,1,Benign
3,9.465,21.01,60.11,269.4,0.10440,0.07773,0.02172,0.01504,0.1717,0.06899,...,67.03,330.7,0.1548,0.1664,0.09412,0.06517,0.2878,0.09211,1,Benign
4,13.650,13.16,87.88,568.9,0.09646,0.08711,0.03888,0.02563,0.1360,0.06344,...,99.71,706.2,0.1311,0.2474,0.17590,0.08056,0.2380,0.08718,1,Benign
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
109,11.750,20.18,76.10,419.8,0.10890,0.11410,0.06843,0.03738,0.1993,0.06453,...,88.91,543.9,0.1358,0.1892,0.19560,0.07909,0.3168,0.07987,1,Benign
110,18.610,20.25,122.10,1094.0,0.09440,0.10660,0.14900,0.07731,0.1697,0.05699,...,139.90,1403.0,0.1338,0.2117,0.34460,0.14900,0.2341,0.07421,0,Malignant
111,13.270,14.76,84.74,551.7,0.07355,0.05055,0.03261,0.02648,0.1386,0.05318,...,104.50,830.6,0.1006,0.1238,0.13500,0.10010,0.2027,0.06206,0,Malignant
112,13.430,19.63,85.84,565.4,0.09048,0.06288,0.05858,0.03438,0.1598,0.05671,...,116.60,993.6,0.1401,0.1546,0.26440,0.11600,0.2884,0.07371,0,Malignant
