**Gradient descent**

In [1]:
# Importing the necessary libraries
import pandas as pd
import numpy as np
import json
import os

In [2]:
pd.set_option("display.expand_frame_repr", True)

In [3]:
# Importing the datasets
X_train = pd.read_csv("dataset/X_train.csv")
t_train = pd.read_csv("dataset/t_train.csv")

In [4]:
# Converting the dataframes to numpy arrays
X_train = np.array(X_train)
t_train = np.array(t_train)

In [5]:
# Reshaping the t_train array
t_train = t_train.reshape(-1)

In [6]:
# Adding the bias term
X_train = np.hstack([np.ones([X_train.shape[0], 1]), X_train])

In [7]:
# Printing the shapes
print("X_train shape: ", X_train.shape)
print("t_train shape: ", t_train.shape)

X_train shape:  (3196, 10)
t_train shape:  (3196,)


In [8]:
# Defining a function to save the results in a json file
def create_empty_json_file(filename):
    # Checking if the file exists
    if not os.path.isfile(f"results/{filename}"):
        # Creating the file
        with open(f"results/{filename}", "w") as json_file:
            json.dump([], json_file)

def save_results(results, filename):
    # Checking if the file exists
    create_empty_json_file(filename)

    # Opening the JSON file in read mode
    with open(f"results/{filename}", "r") as json_file:
        # Loading the content of the file
        json_content = json.load(json_file)

    # Adding the new results to the list
    json_content.append(results)

    # Opening the JSON file in write mode
    with open(f"results/{filename}", "w") as json_file:
        # Saving the results in the JSON file
        json.dump(json_content, json_file)

In [9]:
# Defining the sigmoid function
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [10]:
# Adding epsilon to avoid log(0) error
epsilon = 1e-5

# Defining the cost function
def cost_function(X, t, w):
    N = X.shape[0]
    y = sigmoid(np.dot(X, w))
    return (-np.sum(t * np.log(y + epsilon) + (1 - t) * np.log(1 - y + epsilon))) / N

In [11]:
# Defining the variables for the Gradient Descent
n_iterations = 700
n_parameters = X_train.shape[1]
n_samples = X_train.shape[0]
learning_rates = [0.001, 0.01, 0.1, 1, 3, 5]
threshold = 0.0000001

In [12]:
# Defining the Gradient Descent function
for lr in learning_rates:
    # Initializing a matrix in which save the parameter values at each iteration
    W = np.zeros([n_iterations, n_parameters])

    # Initializing the first parameter values randomly
    W[0, :] = np.random.normal(0, 0.1, n_parameters)

    # Initializing list to save the cost function values at each iteration
    cost_function_values = []

    # Computing the cost function value for the first parameter values
    cost_function_values.append(cost_function(X_train, t_train, W[0, :]))

    # Defining the Gradient Descent loop
    for i in range(1, n_iterations):
        # Calculating the sigmoid function
        h = sigmoid(np.dot(X_train, W[i - 1, :]))
        # Computing the gradient
        gradient = np.dot(X_train.T, (h - t_train)) / n_samples
        # Updating the parameters
        W[i, :] = W[i - 1, :] - lr * gradient

        # Computing the cost function value
        cost_function_values.append(cost_function(X_train, t_train, W[i, :]))

        # Checking the stopping criterion
        if np.abs(cost_function_values[i] - cost_function_values[i - 1]) < threshold:
            print(f"Convergence reached after {i} iterations")

            # Loop for append values in cost_function_values
            for j in range(0, n_iterations - (i) - 1):
                cost_function_values.append(cost_function_values[i])
            W[n_iterations - 1, :] = W[i, :]
            break

    # Saving the results
    results = {
        "learning_rate": lr,
        "cost_function_values": cost_function_values,
        "optimal_parameters": W[n_iterations - 1, :].tolist()
    }
    save_results(results, "gradient_descent.json")

Convergence reached after 198 iterations
Convergence reached after 23 iterations
Convergence reached after 6 iterations
Convergence reached after 7 iterations
