In [1]:
import numpy as np
import pandas as pd

In [6]:
titanic = pd.read_csv('https://raw.githubusercontent.com/datasciencedojo/datasets/refs/heads/master/titanic.csv')
titanic

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


In [7]:
class RegresionLogisticaBootcamp:
    def __init__(self, learning_rate = 0.01, max_iter = 1000):
        self.learning_rate = learning_rate
        self.max_iter = max_iter
        self.pesos = None
        self.sesgo = None
    
    def _sigmoid(self, z):
        return 1 / (1 + np.exp(-z))
    
    def _binary_cross_entropy(self, y_true, y_pred):

        epsilon = 1e-15 # log(0)
        y_pred = np.clip(y_pred, epsilon, 1 - epsilon)

        return -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))
    
    def fit(self, df, target):
        X = df.drop(columns=target).values
        y = df[target].values

        n_filas, n_columnas = X.shape
        self.pesos = np.zeros(n_columnas)
        self.sesgo = 0

        for i in range(self.max_iter):

            modelo_lineal = np.dot(X, self.pesos) + self.sesgo

            y_pred = self._sigmoid(modelo_lineal)

            dw = (1 / n_filas) * np.dot(X.T, (y_pred - y))

            db = (1 / n_filas) * np.sum(y_pred - y)

            self.pesos -= self.learning_rate * dw
            self.sesgo -= self.learning_rate * db

            if i % 100 == 0:
                loss = self._binary_cross_entropy(y, y_pred)
                print(f'Iteración {i}: Loss {loss}')

    def predict_proba(self, df):

        if self.pesos is None or self.sesgo is None:
            raise Exception('El modelo no ha sido entrenado')
        
        modelo_lineal = np.dot(df.values, self.pesos) + self.sesgo

        return self._sigmoid(modelo_lineal)
                
    
    def predict(self, df, threshold = 0.5):

        if self.pesos is None or self.sesgo is None:
            raise Exception('El modelo no ha sido entrenado')
        
        probabilidad = self.predict_proba(df)

        return (probabilidad >= threshold).astype(int)


In [8]:
data = {
    "feature1": [2.5, 3.6, 1.8, 3.2, 4.5],
    "feature2": [1.2, 2.8, 3.6, 4.0, 3.5],
    "target": [0, 1, 0, 1, 1]
}
df = pd.DataFrame(data)

regresion_logistica = RegresionLogisticaBootcamp()

regresion_logistica.fit(df, 'target')

print('Pesos: ', regresion_logistica.pesos)
print('sesgo: ', regresion_logistica.sesgo)

test_data = pd.DataFrame({
    "feature1": [3.0, 4.0],
    "feature2": [2.5, 3.5]
})

y_pred = regresion_logistica.predict(test_data)

print('Predicciones: ', y_pred)

Iteración 0: Loss 0.6931471805599453
Iteración 100: Loss 0.5995959165820869
Iteración 200: Loss 0.5856213762025468
Iteración 300: Loss 0.5733099931585794
Iteración 400: Loss 0.5622528336445726
Iteración 500: Loss 0.5521517274183739
Iteración 600: Loss 0.5427871980853484
Iteración 700: Loss 0.5339984736253952
Iteración 800: Loss 0.5256680207347084
Iteración 900: Loss 0.5177098989255803
Pesos:  [ 0.60898974 -0.08844442]
sesgo:  -0.818287838870773
Predicciones:  [1 1]


In [9]:
from sklearn.linear_model import LogisticRegression

logr = LogisticRegression(max_iter=1000)

logr.fit(df[['feature1', 'feature2']], df['target'])

print('Coeficientes: ', logr.coef_)

print('Intercepto: ', logr.intercept_)

y_pred_sk = logr.predict(test_data)

print('Predicciones (sklearn): ', y_pred_sk)

Coeficientes:  [[0.98027395 0.50174216]]
Intercepto:  [-4.0512824]
Predicciones (sklearn):  [1 1]
