In [277]:
import sys
import numpy as np
import pandas as pd

In [278]:
def null_fix(dataset, column, fill):
    for i in range(dataset.shape[0]):
        if not isinstance(dataset[i, column], str):
            dataset[i, column] = fill

In [279]:
def one_hot_encoding(data, column, encoding):
   one_hot = np.zeros((len(data), len(encoding)))
   for i in range(len(data)):
      one_hot[i, encoding[data[i, column]]] = 1
   encoded = np.concatenate((data, one_hot), axis=1)
   return np.delete(encoded, column, 1)

In [280]:
def sigmoid(z):
    return  1.0 / (1.0 + np.exp(-z))

In [281]:
def predict(X, w, b):
    z = np.dot(X, w) + b
    y_pred = sigmoid(z)
    return y_pred

In [282]:
def log_loss(y, y_hat):
    eps = 1e-15
    J = -np.mean(y * np.log(y_hat + eps) - (1 - y) * np.log(1 - y_hat + eps))
    return J

In [283]:
def gradients(X, y, y_hat):
      m = X.shape[0]
      dw = (1/m) * np.dot(X.T, (y_hat - y))
      db = (1/m) * np.sum(y_hat - y)
      return dw, db

In [284]:
def train(X, y, lr, epochs, bs=30):
    m, n = X.shape
    w = np.random.randn(n)
    b = 0
    loss_history = []
    X = X.astype(np.float64)
    for epoch in range(epochs):
        for i in range((m-1)//bs + 1):
            b_start = i * bs
            b_end = b_start + bs
            xb = X[b_start:b_end]
            yb = y[b_start:b_end]
            y_hat = predict(xb, w, b)
            dw, db = gradients(xb, yb, y_hat)
            w -= lr*dw
            b -= lr*db
        loss = log_loss(y, sigmoid(np.dot(X, w) + b))
        loss_history.append(loss)
    return loss_history
 

In [285]:
train_data = pd.read_csv('train.csv')
test_data = pd.read_csv('test.csv')
y_train = train_data['Survived'].copy()
train_data.drop(['PassengerId','Name', 'Survived','Age', 'Ticket', 'Fare', 'Cabin'],inplace=True, axis=1)

In [286]:
x_train_numpy = train_data.values
x_test_numpy = test_data.values
null_fix(x_train_numpy, 4, 'S')
null_fix(x_test_numpy, 10, 'S')

In [287]:
embarked_encoding = {'S': 0, 'C': 1, 'Q': 2}
sex_encoding = {'male': 1, 'female': 0}
x_train_encoded1 = one_hot_encoding(x_train_numpy, 4, embarked_encoding)
x_train_encoded2 = one_hot_encoding(x_train_encoded1, 1, sex_encoding)
y_train_numpy = y_train.values

In [288]:
result = train(x_train_encoded2, y_train_numpy, 0.01, 100, 30)
np.set_printoptions(linewidth=100, threshold=np.inf)
display(result)

[-2.7335469653913087,
 -1.8271867761382277,
 -1.0519017459351019,
 -0.4837373674552094,
 -0.1319095933277173,
 0.06395271922578451,
 0.16845538290113055,
 0.22276313595962977,
 0.24944110419753215,
 0.2606915073553127,
 0.2632681344085793,
 0.26100113042250644,
 0.2560983307876249,
 0.24984218375593764,
 0.24297763855401858,
 0.23593436511307678,
 0.22895642191759885,
 0.22217866443212475,
 0.21567193923217784,
 0.2094698106362622,
 0.20358432494852802,
 0.19801527097059698,
 0.19275559304647413,
 0.18779453758188394,
 0.1831194710058508,
 0.1787169232268022,
 0.17457318218127177,
 0.17067462973456146,
 0.16700792942315057,
 0.1635601297731338,
 0.16031871969193964,
 0.15727165667344886,
 0.15440737951347822,
 0.15171481209077922,
 0.14918336187624304,
 0.14680291522454747,
 0.14456383061757283,
 0.14245693054471348,
 0.14047349243821536,
 0.13860523892933588,
 0.13684432759796875,
 0.13518334032564158,
 0.13361527231578804,
 0.13213352081015806,
 0.13073187350430066,
 0.12940449664793