In [1]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import minimize
from scipy.optimize import Bounds
import seaborn as sns
import statistics
from tqdm import tqdm

In [3]:
def log_beta(beta0, i, c=1):
    beta = np.log(np.exp(c*beta0)+i)/c
    return beta

In [5]:
bounds = Bounds([0], [np.inf])
def langevin(x0, d, func, grad, maxiter = 100, eta0 = 0.001, beta0 = 1, beta_schedule = log_beta, c=1):
    """
    This code implements Gradient Langevin Algorithm with exact linesearch on the step size eta

    Args:
        x0 (numpy darray): initial point
        d (int): dimension of the objective function
        func (Callable): objective function
        grad (Callable): gradient of objective function
        maxiter (int): maximum number of iterations
        eta0 (float): initial value for eta
        beta0 (float): initial value for beta
        beta_schedule (Callable): annealing schedule for temperature, starting with beta0 and ending with beta1
        c (float): constant in logarithmic annealing schedule

    Output:
        f_list (numpy darray): the list of function values for each iteration
        x_list (numpy darray): the list of x values for each iteration
    """
    x_list = np.zeros((maxiter,d))
    x_list[0,:] = x0
    f_list = np.zeros((maxiter,))
    f_list[0] = func(x0)
    for i in range(1,maxiter):
        epsilon = np.random.normal(0, 1, d)
        beta = beta_schedule(beta0, i, c=c)
        def objective_function(eta):
            return func(x_list[i-1,:]- eta*grad(x_list[i-1,:]) + np.sqrt(2*eta/beta)*epsilon)
        # perform exact linesearch
        result = minimize(objective_function, eta0, method = "SLSQP", bounds=bounds)
        eta = result.x
        x_list[i,:] = x_list[i-1,:] - eta*grad(x_list[i-1,:]) + np.sqrt(2*eta/beta)*epsilon
        f_list[i] = func(x_list[i,:])
    return f_list, x_list

In [7]:
def gradient_descent(x0, d, func, grad, maxiter = 100, eta0 = 0.001):
    """
    This code implements Gradient Descent with exact linesearch on the step size eta

    Args:
        x0 (numpy darray): initial point
        d (int): dimension of the objective function
        func (Callable): objective function
        grad (Callable): gradient of objective function
        maxiter (int): maximum number of iterations
        eta0 (float): initial value for eta

    Output:
        f_list (numpy darray): the list of function values for each iteration
        x_list (numpy darray): the list of x values for each iteration
    """
    x_list = np.zeros((maxiter,d))
    x_list[0,:] = x0
    f_list = np.zeros((maxiter,))
    f_list[0] = func(x0)
    for i in range(1,maxiter):
        def objective_function(eta):
            return func(x_list[i-1,:]- eta*grad(x_list[i-1,:]))
        # perform exact linesearch
        result = minimize(objective_function, eta0, method = "SLSQP", bounds=bounds)
        eta = result.x
        x_list[i,:] = x_list[i-1,:] - eta*grad(x_list[i-1,:])
        f_list[i] = func(x_list[i,:])
    return f_list, x_list

In [9]:
def plot_and_compare(d, func, grad, maxiter=400, eta0_gld=1e-2, eta0_gd=1e-2, beta0=1, beta_schedule=log_beta, c=1, var=1):
    """
    This code implements Gradient Langevin and Gradient Descent and:
    1) plots the semi-log plot of average convergence curve;
    2) plots the boxplot of the final value of both algorithms;
    3) computes the number of iterations that achieves value below threshold;
    4) computes Q1, median and Q3 of the final value of both algorithms;
    5) plots the initial distribution of the sampled points and the final value of both algorithms. 

    Args:
        d (int): dimension of the objective function
        func (Callable): objective function
        grad (Callable): gradient of objective function
        maxiter (int): maximum number of iterations
        eta0_gld (float): initial value for eta used in Gradient Langevin
        eta0_gd (float): initial value for eta used in Gradient Descent
        beta0 (float): initial value for beta
        beta_schedule (Callable): annealing schedule for temperature, starting with beta0 and ending with beta1
        c (float): constant in logarithmic annealing schedule
        var (float): variance of the distribution where the initial point is sampled from
    
    """
    gld_result = None
    gd_result = None
    x = np.zeros((100,2))
    gld_x = np.zeros((100,2))
    gd_x = np.zeros((100,2))
    for i in tqdm(range(100)):
        x0 = np.random.normal(0,var, size=(2,))
        x[i,:] = x0
        f_list_gld, x_list_gld = langevin(x0, d, func, grad, maxiter=maxiter, eta0 = eta0_gld, beta0 = beta0, beta1=beta1, beta_schedule = beta_schedule, c=c)
        f_list_grad, x_list_gd = gradient_descent(x0, d, func, grad, maxiter=maxiter, eta0 = eta0_gd)
        gld_x[i,:] = x_list_gld[-1,:]
        gd_x[i,:] = x_list_gd[-1,:]
        f_list_gld = np.array(f_list_gld)
        f_list_grad = np.array(f_list_grad)
        f_list_gld = f_list_gld.reshape((maxiter,1))
        f_list_grad = f_list_grad.reshape((maxiter,1))
        if i == 0:
            gld_result = f_list_gld
            gd_result = f_list_grad
        else:
            gld_result = np.concatenate((gld_result, f_list_gld), axis = 1)
            gd_result = np.concatenate((gd_result, f_list_grad), axis = 1)

    # Extract final output
    gld_final = gld_result[-1,:]
    gd_final = gd_result[-1,:]

    mean_gld = np.mean(gld_result, axis=1)
    std_dev_gld = np.std(gld_result, axis=1)
    mean_gd = np.mean(gd_result, axis=1)
    std_dev_gd = np.std(gd_result, axis=1)

    plt.plot(np.arange(0,maxiter), np.log10(mean_gld), label = "Gradient Langevin")
    plt.plot(np.arange(0,maxiter), np.log10(mean_gd), label = "Gradient Descent")
    plt.legend()
    plt.xlabel("Number of Gradient Evaluations")
    plt.ylabel("log(f(x_k) - f^*)")
    plt.savefig("mean.jpg")

    plt.figure(figsize=(4,6))
    sns.boxplot(data=[gld_final, gd_final], palette='pastel')
    plt.title("Boxplot of Final Objective Value")
    plt.ylabel("Function Value")
    plt.xticks([0,1], ["Gradient Langevin", 'Gradient Descent'])  # if you just have one list
    plt.grid(True, axis='y', linestyle='--', alpha=0.5)
    plt.savefig("boxplot.jpg")
    plt.show()

    print("Average for GLD:", mean_gld[-1])
    print("Average for GD:", mean_gd[-1])

    threshold = 1e-6
    gld_count = sum(1 for x in gld_final if x < threshold)
    gd_count = sum(1 for x in gd_final if x < threshold)
    print("number of iterations that achieve below 1e-6 for Gradient Langevin:", gld_count)
    print("number of iterations that achieve below 1e-6 for Gradient Descent:", gd_count)

    threshold = 1e-7
    gld_count = sum(1 for x in gld_final if x < threshold)
    gd_count = sum(1 for x in gd_final if x < threshold)
    print("number of iterations that achieve below 1e-7 for Gradient Langevin:", gld_count)
    print("number of iterations that achieve below 1e-7 for Gradient Descent:", gd_count)

    gld_med = statistics.median(gld_final)
    gld_mean = statistics.mean(gld_final)
    # quartiles – split into 4 bins
    q1, q2, q3 = statistics.quantiles(gld_final, n=4)  # q2 == median
    print("Q1, Q2(median), Q3, mean for Gradient Langevin:", q1, q2, q3, gld_mean)
    
    gd_med = statistics.median(gd_final)
    gd_mean = statistics.mean(gd_final)
    # quartiles – split into 4 bins
    q1, q2, q3 = statistics.quantiles(gd_final, n=4)  # q2 == median
    print("Q1, Q2(median), Q3, mean for Gradient Descent:", q1, q2, q3, gd_mean)

    # Generate a grid over the domain
    x_s = np.linspace(-4*var, 4*var, 400)
    y_s = np.linspace(-4*var, 4*var, 400)
    X, Y = np.meshgrid(x_s, y_s)
    XY = np.stack([X.ravel(), Y.ravel()], axis=1) 
    Z = np.array([func(xy) for xy in XY])
    Z = Z.reshape(X.shape)
    
    # Plot the contour and overlay the points
    contours = plt.contourf(X, Y, Z, levels=50, cmap='viridis')
    plt.colorbar(contours, label='Function Value')
    plt.scatter(x[:, 0], x[:, 1], c='white', s=20, edgecolor='k', label='Sampled Initial Points')
    plt.legend()
    plt.savefig("initial_points.jpg")
    plt.show()
    
    contours = plt.contourf(X, Y, Z, levels=50, cmap='viridis')
    plt.colorbar(contours, label='Function Value')
    plt.scatter(gld_x[:,0], gld_x[:,1], c='white', s=20, edgecolor='k', label='Gradient Langevin Algorithm Output')
    plt.legend()
    plt.savefig("gld_conv.png")
    plt.show()
    
    contours = plt.contourf(X, Y, Z, levels=50, cmap='viridis')
    plt.colorbar(contours, label='Function Value')
    plt.scatter(gd_x[:,0], gd_x[:,1], c='white', s=20, edgecolor='k', label='Gradient Descent Algorithm Output')
    plt.legend()
    plt.savefig("gd_conv.png")
    plt.show()