In [173]:
import numpy as np

class LogisticRegression_hand:
    def __init__(self, learning_rate=0.01, num_iterations=50000, fit_intercept=True):
        self.learning_rate = learning_rate
        self.num_iterations = num_iterations
        self.fit_intercept = fit_intercept

    def __intercept(self, X):
        intercept = np.ones((X.shape[0], 1))
        return np.concatenate((intercept, X), axis=1)
    
    def __sigmoid_function(self, z):
        return 1 / (1 + np.exp(-z))
    
    def __loss(self, yp, y):
        return (-y * np.log(yp) - (1 - y) * np.log(1 - yp)).mean()
    
    def fit(self, X, y):
        if self.fit_intercept:
            X = self.__intercept(X)
        self.W = np.zeros(X.shape[1])
        
        for i in range(self.num_iterations):
            z = np.dot(X, self.W)
            yp = self.__sigmoid_function(z)
            gradient = np.dot(X.T, (yp - y)) / y.size
            self.W -= self.learning_rate * gradient
            z = np.dot(X, self.W)
            yp = self.__sigmoid_function(z)
            loss = self.__loss(yp, y)
    
    def predict_prob(self, X):
        if self.fit_intercept:
            X = self.__intercept(X)
        return self.__sigmoid_function(np.dot(X, self.W))
    
    def predict(self, X):
        return self.predict_prob(X).round()



In [175]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
df = pd.read_csv("~/Stat/Python/titanic/train.csv")
df = df.iloc[:, [1,5]]
df = df.dropna()
x = train_df.iloc[:,1]
y = train_df.iloc[:,0]
x_train, x_test, y_train, y_test = train_test_split(y, x, test_size=0.2)

In [176]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression(fit_intercept=True)
model_hand = LogisticRegression_hand()

In [178]:
x_train = np.array(x_train).reshape(-1,1)
y_train = np.array(y_train).reshape(-1,1)

In [179]:
model.fit(x_train,y_train.ravel())
model_hand.fit(x_train,y_train.ravel())

  return (-y * np.log(yp) - (1 - y) * np.log(1 - yp)).mean()
  return (-y * np.log(yp) - (1 - y) * np.log(1 - yp)).mean()
  return 1 / (1 + np.exp(-z))


In [180]:
[model.intercept_, model.coef_]

[array([-0.36571061]), array([[-0.00022177]])]

In [183]:
model_hand.predict(np.array(x_test).reshape(-1,1))

  return 1 / (1 + np.exp(-z))


array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [185]:
model.predict(np.array(x_test).reshape(-1,1))

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0])

In [147]:
model.score(x_train, y_train)

0.6179775280898876

In [186]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_train, model.predict(x_train))

array([[437,   0],
       [275,   0]])

In [187]:
confusion_matrix(y_train, model_hand.predict(x_train))

  return 1 / (1 + np.exp(-z))


array([[436,   1],
       [275,   0]])

In [188]:
from sklearn.metrics import accuracy_score
accuracy_score(y_train, model_hand.predict(x_train))

  return 1 / (1 + np.exp(-z))


0.6123595505617978