In [0]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

In [0]:
class Logistic_Regression:
  def __init__(self, learning_rate, max_iter, fit_intercept=True ):
    self.learning_rate = learning_rate
    self.max_iter = max_iter
    self.fit_intercept = fit_intercept
    
  def fit(self, X, y):
    if self.fit_intercept:
      intercept = np.ones((X.shape[0], 1))
      X = np.concatenate((intercept, X), axis=1)
        
    # weights initialization
    self.theta = np.zeros(X.shape[1])
        
    for i in range(self.max_iter):
      z = np.dot(X, self.theta)
      h = 1 / (1 + np.exp(-z))
      gradient = np.dot(X.T, (h - y)) /y.size
      self.theta = self.theta - self.learning_rate * gradient
      
    
  def predict(self, X):
    if self.fit_intercept:
      intercept = np.ones((X.shape[0], 1))
      X = np.concatenate((intercept, X), axis=1)
      z = np.dot(X, self.theta)
      sig = 1 / (1 + np.exp(-z))
    return (sig.round()).astype(int)

  def fit_sklearn(X_train,Y):
    from sklearn.linear_model import LogisticRegression
    model1=LogisticRegression()
    model1.fit(X_train,Y)
    return model1

  def predict_sklearn(X_test, model):
    predictions_sklearn = model.predict(X_test)
    #print(classification_report(yy_test, predictions))
    return predictions_sklearn


In [0]:
def run():
  train = pd.read_csv('xy_train.csv')
  train.drop("Cabin",inplace=True,axis=1)                   # lots of null values in Cabin column,so remove it
  train.dropna(inplace=True)                                # remove all rows with null values
  sex = pd.get_dummies(train["Sex"],drop_first=True)        # create dummy variables(female, male) for Sex column. Remove the first column because one column indicates the value of the other column
  embarked = pd.get_dummies(train["Embarked"])              # create dummy variables for Embarked column
  pclass = pd.get_dummies(train["Pclass"])                  # create dummy variables for Pclass column

  train = pd.concat([train,pclass,sex,embarked],axis=1)     # add dummy variables as features
  train.drop(["PassengerId","Pclass","Name","Sex","Ticket","Embarked"],axis=1,inplace=True)     #remove the original variables after adding dummy variables

  y = train["Survived"]
  X = train.drop("Survived",axis=1)
  XX = X.values                                             # numpy array of training data points
  yy = y.values                                             # numpy array containing 0 or 1 for each data point

  X_test = pd.read_csv('x_test.csv')
  y1 = pd.read_csv('y_test.csv')
  X_test["Survived"] = y1.drop("PassengerId", axis=1)
  X_test.drop("Cabin",inplace=True,axis=1)
  X_test.dropna(inplace=True)
  sex = pd.get_dummies(X_test["Sex"],drop_first=True)
  embarked = pd.get_dummies(X_test["Embarked"])
  pclass = pd.get_dummies(X_test["Pclass"])

  X_test = pd.concat([X_test,pclass,sex,embarked],axis=1)
  X_test.drop(["PassengerId","Pclass","Name","Sex","Ticket","Embarked"],axis=1,inplace=True)

  y_test = X_test["Survived"]
  X_test = X_test.drop("Survived",axis=1)
  XX_test = X_test.values                                   # numpy array of test data points
  yy_test = y_test.values                                   # numpy array containing 0 or 1 for each data point


  model = Logistic_Regression(learning_rate=0.004, max_iter=10000)
  Logistic_Regression.fit(model, XX, yy)
  predictions_train = Logistic_Regression.predict(model, XX)
  print("Accuracy on TRAINING data using fit method : ",accuracy_score(yy, predictions_train))
  predictions = Logistic_Regression.predict(model, XX_test)
  print("Accuracy on TEST data using fit method : ",accuracy_score(yy_test, predictions))

  model1 = Logistic_Regression.fit_sklearn(XX,yy)
  predictions_sklearn_train = Logistic_Regression.predict_sklearn(XX, model1)
  print("Accuracy on TRAINING data using fit_sklearn method : ",accuracy_score(yy, predictions_sklearn_train))
  predictions_sklearn = Logistic_Regression.predict_sklearn(XX_test, model1)
  print("Accuracy on TEST data using fit_sklearn method : ",accuracy_score(yy_test, predictions_sklearn))
  
  #print(classification_report(yy_test, predictions))
  #print(classification_report(yy_test, predictions_sklearn))
  #print(confusion_matrix(yy_test, predictions))
  #print(confusion_matrix(yy_test, predictions_sklearn))
  
  return predictions, predictions_sklearn

In [0]:
print(run())

Accuracy on TRAINING data using fit method :  0.8047752808988764
Accuracy on TEST data using fit method :  0.8761329305135952
Accuracy on TRAINING data using fit_sklearn method :  0.8019662921348315
Accuracy on TEST data using fit_sklearn method :  0.9063444108761329
(array([0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0,
       1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0,
       1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0,
       0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0,
       0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0,
       1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1,
       1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,
 

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
