In [1]:
#Logistic regression ML example

In [2]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.datasets import make_moons

In [3]:
X, y = make_classification(n_features=2, n_redundant=0,
                           n_informative=2, random_state=1,
                           n_clusters_per_class=1)

In [4]:
def sigmoid(z):
    return 1.0/(1 + np.exp(-z))

In [5]:
def plot_decision_boundary(X, w, b):
    
    # X --> Inputs
    # w --> weights
    # b --> bias
    
    # The line is y = mx + c
    # So, Equate mx + c = w.X + b
    # Solving we find m and c
    x1 = [min(X[:,0]), max(X[:,0])]
    m = -w[0]/w[1]
    c = -b/w[1]
    x2 = m*x1 + c
    
    # Plotting
    fig = plt.figure(figsize=(10,8))
    plt.plot(X[:, 0][y==0], X[:, 1][y==0], "g^")
    plt.plot(X[:, 0][y==1], X[:, 1][y==1], "bs")
    plt.xlim([-2,2])
    plt.ylim([0, 2.2])
    plt.xlabel("Feature 1")
    plt.ylabel("Feature 2")
    plt.title('Decision Boundary')
    
    plt.plot(x1, x2, 'y-')

In [6]:
def loss(y, y_hat):
    loss = -np.mean(y*(np.log(y_hat)) - (1-y)*np.log(1-y_hat))
    return loss

In [7]:
def gradients(X, y, y_hat):
    # X --> Input
    # y --> true/target value
    # y_hat --> hypothesis/predictions
    # w --> weight (parameter)
    # b --> bias (parameter)
    
    # m-> number of training examples
    m = X.shape[0]
    
    # Gradient of loss w.r.t weights
    dw = (1/m)*np.dot(X.T, (y_hat - y))
    
    # Gradient of loss w.r.t bias
    db = (1/m)*np.sum((y_hat-y))
    
    return dw, db

In [8]:
def normalize(X):
    
    # X --> Input
    
    # m-> number of training examples
    # n-> number of features
    m, n = X.shape
    
    # Normalizing all the n features of X
    for i in range(n):
        X = (X - X.mean(axis=0))/X.std(axis=0)
        
    return X

In [9]:
def train(X, y, bs, epochs, lr):
    
    # X --> Input
    # y --> true/target value
    # bs --> Batch Size
    # epochs --> Number of Iterations
    # lr --> Learning rate
    
    # m-> number of training examples
    # n-> number of features
    m, n = X.shape
    
    # Initilizing weights and bias to zeros
    w = np.zeros((n,1))
    b = 0
    
    # Reshaping y
    y = y.reshape(m,1)
    
    # Normalizing the inputs
    x = normalize(X)
    
    #Empty list to store losses
    losses = []
    
    # Training Loop
    for epoch in range(epochs):
        for i in range((m-1)//bs + 1):
            
            # Defining starting batches, SGD
            start_i = i*bs
            end_i = start_i + bs
            xb = X[start_i:end_i]
            yb = y[start_i:end_i]
            
            # Calculating hypothesis/prediction
            y_hat = sigmoid(np.dot(xb, w) + b)
            
            # Getting the gradient of loss w.r.t parameters
            dw, db = gradients(xb, yb, y_hat)
            
            # Updating the parameters
            
            w -= lr*dw
            b -= lr*db
        
        #Calculating loss and appending it in the list
        l = loss(y, sigmoid(np.dot(X,w) + b))
        losses.append(l)
    # Returning weights, bias and losses(List)
    return w, b, losses

In [10]:
def predict(X):

    # X --> Input
    
    # Normalizing the inputs
    x = normalize(X)
    
    # Calculating predictions/y_hat
    preds = sigmoid(np.dot(X, w) + b)
    
    # Empty List to store predictions
    pred_class = []
    
    # if y_hat >= 0.5 --> round up to 1
    # if y_hat < 0.5 --> round down to 0
    pred_class = [1 if i > 0.5 else 0 for i in preds]
    
    return np.array(pred_class)

In [11]:
# Training
w, b, l = train(X, y, bs=100, epochs = 1000, lr = 0.01)

# Plotting Decision Boundary
#plot_decision_boundary(X, w, b)

In [12]:
def accuracy(y, y_hat):
    accuracy = np.sum(y == y_hat) / len(y)
    return accuracy

accuracy(X, y_hat=predict(X))


X, y = make_moons(n_samples=100, noise=0.24)

# Training
w, b, l = train(X, y, bs=100, epochs=1000, lr=0.01)


# Plotting Decision Boundary
#plot_decision_boundary(X, w, b)

accuracy(y, predict(X))



0.83

In [13]:
# Developing the hypothesis

def hypothesis(X, theta):
    ''' 
    Implementation of the sigmoid function
    Takes an input of X and theta'''
    
    z = np.dot(theta, X.T)
    
    output = 1 / (1 + np.exp(-z))
    # In case of this expression outputting 1 we mitiage by doing the following
    output = output - 0.000000000001
    return output

In [14]:
# Determining the cost function

def cost(X, y, theta):
    '''
    Implementation of the cost function
    Takes an input of X, y and theta'''
    
    h = hypothesis(X, theta)
    
    return - (1 / len(X)) * np.sum(y * np.log(h) + (1 - y) * np.log(1 - h))

In [15]:
# Updating theta values using gradient descent

def gradient_descent(X, y, theta, alpha, epochs):
    m = len(X)
    J = [cost(X, y, theta)]
    for i in range(0, epochs):
        h = hypothesis(X, theta)
        for i in range(0, len(X.columns)):
            theta[i] -= (alpha/m) * np.sum((h-y) * X.iloc[:, i])
        J.append(cost(X, y, theta))
    return J, theta

In [16]:
def predict(X, y, theta, alpha, epochs):
    J, th = gradient_descent(X, y, theta, alpha, epochs)
    h = hypothesis(X, theta)
    for i in range(len(h)):
        h[i] = 1 if h[i]>=0.5 else 0
    y = list(y)
    acc = np.sum([y[i] == h[i] for i in range(len(y))]) / len(y)
    return J, accuracy