In [16]:
import numpy as np
import matplotlib.pyplot as plt

def draw(x1, x2):
    ln = plt.plot(x1,x2)
    
def sigmoid(score):
    # Formula of the sigmoid activation function
    return 1 / (1 + np.exp(-score))

def calculate_error(p, y, points):
    m = points.shape[0]
    # general fromula for cross-entropy
    # -[SUM (yln(p) + (1-y)(ln(1-p))]
    cross_entropy = -(1/m) * (np.log(p).T * y + np.log(1 - p).T * (1 - y))
    return cross_entropy

def gradient_descent(line_parameters, points, y, alpha):
    m = points.shape[0]
    for i in range(2000):
        p = sigmoid(points * line_parameters)
        gradient = (points.T * (p - y)) / (alpha / m)
        line_parameters = (line_parameters - gradient) 
        w1 = line_parameters.item(0)  
        w2 = line_parameters.item(1)                   
        b = line_parameters.item(2)                   

        x1 = np.array([points[:, 0].min(), points[:, 0].max()])
        x2 = (-b -(w1* x1)) / w2
    draw(x1, x2)

# Number of total points in the model
n_pts = 100

bias = np.ones(n_pts)
np.random.seed(0)

# Red points in the top right corner 
top_region =  np.array([np.random.normal(10, 2, n_pts), np.random.normal(12, 2, n_pts), bias]).T
# Blue points in the bottom left corner
bottom_region = np.array([np.random.normal(5, 2, n_pts), np.random.normal(6, 2, n_pts), bias]).T

# Matrix (200, 3) containing all the coordinates as well as the bias
all_points = np.vstack((top_region, bottom_region))

# The wights and the bias
# w1x1 + w2x2 + b = 0
#w1 = -0.2
#w2 = -0.35
#b = 3.5

# Matrix (3, 1) containing the the weights and the bias
line_parameters = np.matrix([np.zeros(3)]).T

# The x-value of the left-most and the right-most points in the model graph 
#x1 = np.array([bottom_region[:, 0].min(), top_region[:, 0].max()])

# w1x1 + w2x2 + b = 0
#x2 = (-b -(w1* x1)) / w2

# (200, 1) Matrix which contains the linear combination of the points 
linear_combination = all_points * line_parameters

# Probability Matrix of (200, 1)
probabilities = sigmoid(linear_combination)

# Matrix of (200, 1) including of 100 zeros and 100 ones for the red and blue points
y = np.array([np.zeros(n_pts), np.ones(n_pts)]).reshape(200, 1)

_, ax = plt.subplots(figsize= (4,4))
ax.scatter(top_region[:, 0], top_region[:, 1], color = 'r')
ax.scatter(bottom_region[:, 0], bottom_region[:, 1], color = 'blue')
#draw(x1,x2)
gradient_descent(line_parameters, all_points, y, 0.06)
plt.show()

print(calculate_error(probabilities , y, all_points))

# Comments 

# The error can be written as: -[ln(P(red)) + ln(P(blue))]

# general fromula for cross-entropy
# -SUM (yln(p) + (1-y)(ln(1-p))

# Gradient Descent 
# pts = points
# p = probability
# y = label
# m = number of points
#  _
# \/ E = (pts * (p - y)) / m






