In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

In [2]:
def custom_sigmoid(z):
    return 1 / (1 + np.exp(-z))

def custom_logistic_regression(X, y, alpha, num_iterations):
    m, n = X.shape
    weights = np.zeros((n, 1))
    for i in range(num_iterations):
        z = np.dot(X, weights)
        h = custom_sigmoid(z)
        gradient = np.dot(X.T, (h - y)) / m
        weights -= alpha * gradient
    return weights

In [3]:
# Cargar datos
df = pd.read_csv("./data/Titanic.csv")

In [4]:
df

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
706,886,0,3,"Rice, Mrs. William (Margaret Norton)",female,39.0,0,5,382652,29.1250,,Q
707,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
708,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
709,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


In [5]:
# Realiza una limpieza básica de datos
df = df.drop(['PassengerId', 'Name', 'Ticket', 'Cabin'], axis=1)  # Elimina columnas no importantes
df = df.dropna()  # Elimina filas con valores perdidos

In [6]:
# Crea variables dummy para las columnas categóricas
df = pd.get_dummies(df, columns=['Sex', 'Embarked'])

In [7]:
df

Unnamed: 0,Survived,Pclass,Age,SibSp,Parch,Fare,Sex_female,Sex_male,Embarked_C,Embarked_Q,Embarked_S
0,0,3,22.0,1,0,7.2500,0,1,0,0,1
1,1,1,38.0,1,0,71.2833,1,0,1,0,0
2,1,3,26.0,0,0,7.9250,1,0,0,0,1
3,1,1,35.0,1,0,53.1000,1,0,0,0,1
4,0,3,35.0,0,0,8.0500,0,1,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...
706,0,3,39.0,0,5,29.1250,1,0,0,1,0
707,0,2,27.0,0,0,13.0000,0,1,0,0,1
708,1,1,19.0,0,0,30.0000,1,0,0,0,1
709,1,1,26.0,0,0,30.0000,0,1,1,0,0


In [8]:
X = df.drop('Survived', axis=1)

In [9]:
X

Unnamed: 0,Pclass,Age,SibSp,Parch,Fare,Sex_female,Sex_male,Embarked_C,Embarked_Q,Embarked_S
0,3,22.0,1,0,7.2500,0,1,0,0,1
1,1,38.0,1,0,71.2833,1,0,1,0,0
2,3,26.0,0,0,7.9250,1,0,0,0,1
3,1,35.0,1,0,53.1000,1,0,0,0,1
4,3,35.0,0,0,8.0500,0,1,0,0,1
...,...,...,...,...,...,...,...,...,...,...
706,3,39.0,0,5,29.1250,1,0,0,1,0
707,2,27.0,0,0,13.0000,0,1,0,0,1
708,1,19.0,0,0,30.0000,1,0,0,0,1
709,1,26.0,0,0,30.0000,0,1,1,0,0


In [10]:
y = df['Survived']

In [11]:
y

0      0
1      1
2      1
3      1
4      0
      ..
706    0
707    0
708    1
709    1
710    0
Name: Survived, Length: 711, dtype: int64

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [13]:
X_train

Unnamed: 0,Pclass,Age,SibSp,Parch,Fare,Sex_female,Sex_male,Embarked_C,Embarked_Q,Embarked_S
691,3,4.0,1,1,11.1333,0,1,0,0,1
266,1,38.0,0,1,153.4625,0,1,0,0,1
17,2,35.0,0,0,26.0000,0,1,0,0,1
477,1,27.0,0,0,30.5000,0,1,0,0,1
302,3,19.0,0,0,7.7750,0,1,0,0,1
...,...,...,...,...,...,...,...,...,...,...
359,3,5.0,2,1,19.2583,1,0,1,0,0
192,2,33.0,0,0,12.2750,0,1,0,0,1
629,3,25.0,0,0,7.8958,0,1,0,0,1
559,2,45.0,0,0,13.5000,1,0,0,0,1


In [14]:
# Entrena un modelo de regresión logística utilizando la función logística personalizada
theta = np.zeros(X_train.shape[1])
alpha = 0.01
num_iters = 10000

In [15]:
for i in range(num_iters):
    z = np.dot(X_train, theta)
    h = custom_sigmoid(z)
    gradient = np.dot(X_train.T, (h - y_train)) / y_train.size
    theta -= alpha * gradient

In [16]:
# Realiza predicciones en el conjunto de prueba
y_pred = np.round(custom_sigmoid(np.dot(X_test, theta)))

In [17]:
# Calcula la matriz de confusión y el porcentaje de precisión
cm = confusion_matrix(y_test, y_pred)
accuracy = np.trace(cm) / np.sum(cm)

In [18]:
# Imprime los resultados
print('Matriz de confusión:')
print(cm)
print('Porcentaje de precisión:', accuracy)

Matriz de confusión:
[[76  4]
 [38 25]]
Porcentaje de precisión: 0.7062937062937062


x_min, x_max = X.iloc[:, 0].min() - 1, X.iloc[:, 0].max() + 1
y_min, y_max = X.iloc[:, 1].min() - 1, X.iloc[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1), np.arange(y_min, y_max, 0.1))
Z = np.round(custom_sigmoid(np.dot(np.c_[xx.ravel(), yy.ravel()], theta)))
Z = Z.reshape(xx.shape)
plt.contourf(xx, yy, Z, alpha=0.4)
plt.scatter(X.iloc[:, 0], X.iloc[:, 1], c=y, s=20, edgecolor='k')
plt.xlabel('Age')
plt.ylabel('Fare')
plt.title('Predicciones de supervivencia del Titanic')
plt.show()