In [None]:
# Libraries
import numpy as np
import matplotlib.pyplot as plt
from utils import *
import copy
import math

%matplotlib inline

In [None]:
# load dataset
X_train, y_train = load_data("data/ex2data1.txt")

print("First five elements in X_train are:\n", X_train[:5])
print("Type of X_train:",type(X_train))

In [None]:
print("First five elements in y_train are:\n", y_train[:5])
print("Type of y_train:",type(y_train))

In [None]:
# Here are Plot examples
plot_data(X_train, y_train[:], pos_label="Admitted", neg_label="Not admitted")

# Seting the y-axis 
plt.ylabel('Exam 2 score') 
# Seting the x-axis 
plt.xlabel('Exam 1 score') 
plt.legend(loc="upper right")
plt.show()

In [None]:
#  Now applying Sigmoid Funct.
def sigmoid(z):
    g = 1/ ( 1 + np.exp(-z) )
    
    return g

value = 0
print (f"sigmoid({value}) = {sigmoid(value)}")

print ("sigmoid([ -1, 0, 1, 2]) = " + str(sigmoid(np.array([-1, 0, 1, 2]))))

sigmoid_test(sigmoid)

In [None]:
# Now applying Cost Function, (in which we also calculate loss), for gradient Descent
def compute_cost(X, y, w, b, *argv):
    m, n = X.shape
    loss = 0.0
    total_cost = 0.0
    loss_sum = 0.0
    for i in range(m):
        z_wb = 0
        
        for j in range(n):
            z_wb_ij = w[j]*X[i][j] 
            z_wb += z_wb_ij
        z_wb += b    
        f_wb = sigmoid(z_wb)
        
        loss = (-y[i]* np.log(f_wb)) - ((1-y[i]) * np.log(1 - f_wb))
        
        loss_sum += loss
        
    total_cost = (1/m) * loss_sum
    return total_cost


# Compute and display cost with w and b initialized to zeros
m, n = X_train.shape
initial_w = np.zeros(n)
initial_b = 0.
cost = compute_cost(X_train, y_train, initial_w, initial_b)
print('Cost at initial w and b (zeros): {:.3f}'.format(cost))

# Compute and display cost with non-zero w and b
test_w = np.array([0.2, 0.2])
test_b = -24.
cost = compute_cost(X_train, y_train, test_w, test_b)
print('Cost at test w and b (non-zeros): {:.3f}'.format(cost))

In [None]:
#  Applying Gradient Descent 
def compute_gradient(X, y, w, b, *argv): 
    m, n = X.shape
    dj_dw = np.zeros(w.shape)
    dj_db = 0.
    for i in range(m):
        z_wb = 0
        for j in range(n): 
            z_wb_ij = w[j]*X[i][j]  
            z_wb += z_wb_ij
        z_wb += b
        f_wb = sigmoid(z_wb)
        
        dj_db_i = f_wb -y[i]
        dj_db += dj_db_i
        
        for j in range(n):
            dj_dw_ij = dj_db_i * X[i][j] 
            dj_dw[j] += dj_dw_ij
            
    dj_dw = dj_dw / m
    dj_db =  dj_db / m
    return dj_db, dj_dw

# Compute and display gradient with w and b initialized to zeros
initial_w = np.zeros(n)
initial_b = 0.

dj_db, dj_dw = compute_gradient(X_train, y_train, initial_w, initial_b)
print(f'dj_db at initial w and b (zeros):{dj_db}' )
print(f'dj_dw at initial w and b (zeros):{dj_dw.tolist()}' )

# Compute and display cost and gradient with non-zero w and b
test_w = np.array([ 0.2, -0.5])
test_b = -24
dj_db, dj_dw  = compute_gradient(X_train, y_train, test_w, test_b)

print('dj_db at test w and b:', dj_db)
print('dj_dw at test w and b:', dj_dw.tolist())


In [None]:
def gradient_descent(X, y, w_in, b_in, cost_function, gradient_function, alpha, num_iters, lambda_): 
    m = len(X)
    
    # An array to store cost J and w's at each iteration primarily for graphing later
    J_history = []
    w_history = []
    
    for i in range(num_iters):

        # Calculate the gradient and update the parameters
        dj_db, dj_dw = gradient_function(X, y, w_in, b_in, lambda_)   
        
        w_in = w_in - alpha * dj_dw               
        b_in = b_in - alpha * dj_db              
       
        # Save cost J at each iteration
        if i<100000:
            cost =  cost_function(X, y, w_in, b_in, lambda_)
            J_history.append(cost)

        # Print cost every at intervals 10 times or as many iterations if < 10
        if i% math.ceil(num_iters/10) == 0 or i == (num_iters-1):
            w_history.append(w_in)
            print(f"Iteration {i:4}: Cost {float(J_history[-1]):8.2f}   ")
        
    return w_in, b_in, J_history, w_history 

np.random.seed(1)
initial_w = 0.01 * (np.random.rand(2) - 0.5)
initial_b = -8

# Some gradient descent settings
iterations = 10000
alpha = 0.001

w,b, J_history,_ = gradient_descent(X_train ,y_train, initial_w, initial_b, 
                                   compute_cost, compute_gradient, alpha, iterations, 0)

plot_decision_boundary(w, b, X_train, y_train)
# Seting the y-axis 
plt.ylabel('Exam 2 score') 
# Seting the x-axis
plt.xlabel('Exam 1 score') 
plt.legend(loc="upper right")
plt.show()

In [None]:
# to predict whether the label is either 0 or 1 for logistic regression, we can use the following function:
def predict(X, w, b): 
    m, n = X.shape   
    p = np.zeros(m)
    
    for i in range(m):   
        z_wb = 0
        for j in range(n): 
            z_wb += w[j]*X[i][j] 
        z_wb += b
        f_wb = sigmoid(z_wb)

        # Apply the threshold
        p[i] = f_wb >= 0.5
    return p

# Testing your predict code
np.random.seed(1)
tmp_w = np.random.randn(2)
tmp_b = 0.3    
tmp_X = np.random.randn(4, 2) - 0.5

tmp_p = predict(tmp_X, tmp_w, tmp_b)
print(f'Output of predict: shape {tmp_p.shape}, value {tmp_p}')

In [None]:
# loading dataset for regularized logistic regression
X_train, y_train = load_data("data/ex2data2.txt")

print("X_train:", X_train[:5])
print("Type of X_train:",type(X_train))

print("y_train:", y_train[:5])
print("Type of y_train:",type(y_train))
# now checking dimensions
print ('The shape of X_train is: ' + str(X_train.shape))
print ('The shape of y_train is: ' + str(y_train.shape))
print ('We have m = %d training examples' % (len(y_train)))

In [None]:
# visuallizing your data 
plot_data(X_train, y_train[:], pos_label="Accepted", neg_label="Rejected")

plt.ylabel('Microchip Test 2') 
plt.xlabel('Microchip Test 1') 
plt.legend(loc="upper right")
plt.show()

In [None]:
# now applying regularized cost function
def compute_cost_reg(X, y, w, b, lambda_ = 1):
    m, n = X.shape
    cost_without_reg = compute_cost(X, y, w, b) 
    reg_cost = 0.
    reg_cost_j = 0
    
    for j in range(n):
        reg_cost_j = w[j]**2
        reg_cost = reg_cost + reg_cost_j
    reg_cost = ( lambda_/(2 * m) ) * reg_cost
    total_cost = cost_without_reg + reg_cost

    return total_cost

# Now recalling it for checing implementation
X_mapped = map_feature(X_train[:, 0], X_train[:, 1])
np.random.seed(1)
initial_w = np.random.rand(X_mapped.shape[1]) - 0.5
initial_b = 0.5
lambda_ = 0.5
cost = compute_cost_reg(X_mapped, y_train, initial_w, initial_b, lambda_)

print("Regularized cost :", cost)

In [None]:

def compute_gradient_reg(X, y, w, b, lambda_ = 1):
    m, n = X.shape
    
    dj_db, dj_dw = compute_gradient(X, y, w, b)
    
    for j in range(n):
        dj_dw_j_reg = (lambda_/m) * w[j]
        dj_dw[j]= dj_dw[j] + dj_dw_j_reg
    return dj_db, dj_dw

X_mapped = map_feature(X_train[:, 0], X_train[:, 1])
np.random.seed(1) 
initial_w  = np.random.rand(X_mapped.shape[1]) - 0.5 
initial_b = 0.5

lambda_ = 0.5
dj_db, dj_dw = compute_gradient_reg(X_mapped, y_train, initial_w, initial_b, lambda_)

print(f"dj_db: {dj_db}", )
print(f"First few elements of regularized dj_dw:\n {dj_dw[:4].tolist()}", )


In [None]:
# Initialize fitting parameters
np.random.seed(1)
initial_w = np.random.rand(X_mapped.shape[1])-0.5
initial_b = 1.

# Set regularization parameter lambda_ (you can try varying this)
lambda_ = 0.01    

# Some gradient descent settings
iterations = 10000
alpha = 0.01

w,b, J_history,_ = gradient_descent(X_mapped, y_train, initial_w, initial_b, 
                                    compute_cost_reg, compute_gradient_reg, 
                                    alpha, iterations, lambda_)

In [None]:
plot_decision_boundary(w, b, X_mapped, y_train)
plt.ylabel('Microchip Test 2') 
plt.xlabel('Microchip Test 1') 
plt.legend(loc="upper right")
plt.show()

In [None]:
#Compute accuracy on the training set
p = predict(X_mapped, w, b)

print('Train Accuracy: %f'%(np.mean(p == y_train) * 100))