In [2]:
import numpy as np
from data_prep import features, targets, features_test, targets_test

def sigmoid(x):
    """
    Calculate sigmoid
    """
    return 1 / (1 + np.exp(-x))

def update_weights(weights, features, targets, learnrate):
    """
    Complete a single epoch of gradient descent and return updated weights
    """
    del_w = np.zeros(weights.shape)

    # Loop through all records, x is the input, y is the target
    for x, y in zip(features.values, targets):
        # Calculate the output of f(h) by passing h (the dot product
        # of x and weights) into the activation function (sigmoid).
        output = sigmoid(np.dot(x,weights))

        # Calculate the error by subtracting the network output
        # from the target (y).
        error = y - output

        # Calculate the error term by multiplying the error by the
        # gradient. Recall that the gradient of the sigmoid f(h) is
        # f(h)*(1−f(h)) so you do not need to call any additional
        # functions and can simply apply this formula to the output and
        # error you already calculated.
        error_term = error * (output * (1 - output))
        # Update the weight step by multiplying the error term by
        # the input (x) and adding this to the current weight step.
        del_w += np.float64(x * error_term)
    
    n_records = features.shape[0]
    # Update the weights by adding the learning rate times the
    # change in weights divided by the number of records.
    weights += learnrate * (del_w / n_records)
    return weights

def gradient_descent(features, targets, epochs=1000, learnrate=0.5):
    """
    Perform the complete gradient descent process on a given dataset
    """
    # Use the same seed to make debugging easier
    np.random.seed(42)
    # Initialize loss and weights
    last_loss = None
    n_features = features.shape[1]
    weights = np.random.normal(scale=1/n_features**.5, size=n_features)

    # Repeatedly update the weights based on the number of epochs
    for e in range(epochs):
        weights = update_weights(weights, features, targets, learnrate)

        # Printing out the MSE on the training set every 10 epochs.
        if e % (epochs / 10) == 0:
            out = sigmoid(np.float64(np.dot(np.array(features.values), np.array(weights))))
            loss = np.mean((out - targets) ** 2)
            if last_loss and last_loss < loss:
                print("Train loss: ", loss, "  WARNING - Loss Increasing")
            else:
                print("Train loss: ", loss)
            last_loss = loss
            
    return weights

# Calculate accuracy on test data
weights = gradient_descent(features, targets)
tes_out = sigmoid(np.float64(np.dot(features_test, weights)))
predictions = tes_out > 0.5
accuracy = np.mean(predictions == targets_test)
print("Prediction accuracy: {:.3f}".format(accuracy))


Train loss:  0.2627609384996635
Train loss:  0.20928619409324875
Train loss:  0.20084292908073426
Train loss:  0.19862156475527873
Train loss:  0.1977985139668603
Train loss:  0.19742577912189863
Train loss:  0.1972350774624106
Train loss:  0.1971294562509248
Train loss:  0.19706766341315082
Train loss:  0.19703005801777368
Prediction accuracy: 0.725
