<a href="https://colab.research.google.com/github/southeastwind13/deep_learning/blob/main/10_Manual_Logistic_Regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Import Lib
import numpy as np
import copy
import matplotlib.pyplot as plt

In [7]:
# Generate Logistics data
def generate_logisticdata(n:int = 200, theta = np.array([[4], [-2]])):
    np.random.seed(1)
    X = np.random.rand(n, 2)
    z = np.dot(X, theta)
    prob = 1 / (1 + np.exp(-z))
    y = np.random.binomial(1, prob.flatten())
    return (X, y)

In [8]:
X, y = generate_logisticdata()

In [9]:
# Split data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [10]:
# Reshape data to the correct dimension
# X (Nx:Number of feature, m:Number of Samples)
# Y (1, m)

X_train = X_train.T
y_train = y_train.reshape(1, y_train.shape[0])
X_test = X_test.T
y_test = y_test.reshape(1, y_test.shape[0])

In [11]:
# Sigmoid function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [12]:
# Initialize parameter function base on dimension
def initials_parameters(dim):
  w = np.zeros((dim, 1)) * 0.01
  b = 0

  return w, b

In [13]:
def propagate(w, b, X, Y):

  # -- Forward --

  # Number of features
  m = X.shape[1]

  # Find the A = (w.T * x) + b
  A = sigmoid(np.dot(w.T, X) + b)

  # Find total cost function for all samples (Log Loss)
  cost = (-1/m) * (np.dot(Y, np.log(A).T) + np.dot((1-Y), np.log(1-A).T))

  # -- Backward --

  dw = (1/m) * np.dot(X, (A-Y).T) # Gradient of cost with respect of w
  db = (1/m) * np.sum(A-Y) # Gradient of cost with respect of b

  cost = np.squeeze(np.array(cost))
  grads = {"dw": dw, "db": db}

  return grads, cost

In [14]:
def optimize(w, b, X, Y, num_iterations, learning_rate, print_cost = False):

  w = copy.deepcopy(w)
  b = copy.deepcopy(b)

  costs = []

  for i in range(num_iterations):

      # Propagate
      grads, cost = propagate(w, b, X, Y)

      # get gradient
      dw = grads["dw"]
      db = grads["db"]

      # update parameters
      w = w - learning_rate * dw  # need to broadcast
      b = b - learning_rate * db

      # Record the costs
      if i % 100 == 0:
        costs.append(cost)

      # Print the cost every 100 training examples
        if print_cost and i % 100 == 0:
          print ("Cost after iteration %i: %f" % (i, cost))

      params = {"w": w,
                "b": b}

      grads = {"dw": dw,
              "db": db}

  return params, grads, costs

In [15]:
def predict(w, b, X):

    m = X.shape[1]
    Y_prediction = np.zeros((1, m)) # Create temp memory space for y_prediction
    w = w.reshape(X.shape[0], 1)
    print(w)

    A = sigmoid(np.dot(w.T, X) + b)

    for i in range(A.shape[1]):

        if A[0, i] > 0.5 :
            Y_prediction[0,i] = 1
        else:
            Y_prediction[0,i] =  0

    return Y_prediction

In [16]:
def model(X_train, Y_train, X_test, Y_test, num_iterations=2000, learning_rate=0.5, print_cost=False):

  # 1. Initials parameters
  w, b = initials_parameters(X_train.shape[0])

  # 2. Optimize parameters
  parameters, grads, costs = optimize(w, b, X_train, Y_train, num_iterations, learning_rate, print_cost)

  # 3. Retrieve parameters w and b from dictionary "parameters"
  w = parameters["w"]
  b = parameters["b"]

  # 4. Prediction
  Y_prediction_test = predict(w, b, X_test)
  Y_prediction_train = predict(w, b, X_train)

  print("train accuracy: {} %".format(100 - np.mean(np.abs(Y_prediction_train - Y_train)) * 100))
  print("test accuracy: {} %".format(100 - np.mean(np.abs(Y_prediction_test - Y_test)) * 100))

  d = {"costs": costs,
         "Y_prediction_test": Y_prediction_test,
         "Y_prediction_train" : Y_prediction_train,
         "w" : w,
         "b" : b,
         "learning_rate" : learning_rate,
         "num_iterations": num_iterations}

  return d

In [17]:
d = model(X_train, y_train, X_test, y_test, num_iterations = 5000, learning_rate = 0.01, print_cost = False)

[[ 2.34625446]
 [-0.69253358]]
[[ 2.34625446]
 [-0.69253358]]
train accuracy: 73.75 %
test accuracy: 60.0 %


In [18]:
index = 7
print(f"y = {str(y_test[0, index])}, your predicted is a {d['Y_prediction_test'][0, index]}")

y = 1, your predicted is a 1.0
