In [1]:
import numpy as np
import matplotlib.pyplot as plt
from solvers import steepest_descent, newton, BFGS, DFP
from objectives import get_rosenbrock, get_lgt_obj, get_zakharov

Common plotting method for all 3 functions

In [50]:
def plot_results(dim, results, title, filename):
    plt.figure(figsize=(20, 10))  # Increase the size of the plot
    plt.subplot(1, 2, 1)
    for label, (function_history, gradient_norms, cumulative_times) in results.items():
        plt.plot(function_history[:100], label=f'{label} - Function Value', linewidth=2) 
    plt.title(f'{title} - Function Value', fontsize=20)
    plt.xlabel('Iterations', fontsize=32)
    plt.ylabel('Function Value', fontsize=32)
    plt.legend(fontsize=25)

    plt.subplot(1, 2, 2)
    for label, (function_history, gradient_norms, cumulative_times) in results.items():
        plt.plot(gradient_norms[:100], label=f'{label} - Gradient Norm', linewidth=2)  
    plt.title(f'{title} - Gradient Norm', fontsize=20)
    plt.xlabel('Iterations', fontsize=32)
    plt.ylabel('Gradient Norm', fontsize=32)
    plt.legend(fontsize=25)

    plt.tight_layout()
    plt.savefig(filename)
    plt.close()

Common method to save comparision in number of iterations for each optimization algo

In [3]:
import pandas as pd

def save_results_to_csv(results, filename):

    data = {
        'Algorithm': [],
        'Dimension_or_Lambda': [],
        'Time': [],
        'Final Function Value': [],
        'Final Gradient Norm': [],
        'Iterations': []
    }

    # Populate the dictionary with the results
    for dim, methods_results in results.items():
        for label, (function_history, gradient_norms,cumulative_times) in methods_results.items():
            data['Algorithm'].append(label)
            data['Dimension_or_Lambda'].append(dim)
            data['Time'].append(cumulative_times[-1])
            data['Final Function Value'].append(function_history[-1])
            data['Final Gradient Norm'].append(gradient_norms[-1])
            data['Iterations'].append(len(function_history))
        
    df_results = pd.DataFrame(data)
    df_results.to_csv(filename, index=False)


For Rosenbrock

In [52]:
dimensions = [2, 5, 10, 50]
results = {}

for dim in dimensions:
    f, gradf, hessf, x0 = get_rosenbrock(dim)
    results[dim] = {}

    methods = {
        'Steepest Descent': steepest_descent,
        'Newton': newton,
        'BFGS': BFGS,
        'DFP': DFP
    }

    for label, method in methods.items():
        if label == 'Newton':
            xsol, function_history, cumulative_times, gradient_norms = method(x0, f, gradf, hessf)
        else:
            xsol, function_history, cumulative_times, gradient_norms = method(x0, f, gradf)
        results[dim][label] = (function_history, gradient_norms, cumulative_times)
        print(f"{label} done for dim={dim}")

    plot_results(dim, results[dim], f'Rosenbrock Function Optimization for Dimension {dim}', f'rosenbrock_{dim}.png')
save_results_to_csv(results, 'rosenbrock_results.csv')

Steepest Descent done for dim=2
Newton done for dim=2
BFGS done for dim=2
DFP done for dim=2
Steepest Descent done for dim=5
Newton done for dim=5
BFGS done for dim=5
DFP done for dim=5
Steepest Descent done for dim=10
Newton done for dim=10
BFGS done for dim=10
DFP done for dim=10
Steepest Descent done for dim=50
Newton done for dim=50
BFGS done for dim=50
DFP done for dim=50


For Zakharov

In [51]:
results = {}
# Iterate over each dimension

f, gradf, hessf, x0 = get_zakharov(2)
results[0] = {}

methods = {
    'Steepest Descent': steepest_descent,
    'Newton': newton,
    'BFGS': BFGS,
    'DFP': DFP
}

for label, method in methods.items():
    if label == 'Newton':
        xsol, function_history, cumulative_times, gradient_norms = method(x0, f, gradf, hessf)
    else:
        xsol, function_history, cumulative_times, gradient_norms = method(x0, f, gradf)
    results[0][label] = (function_history, gradient_norms, cumulative_times)
    print(f"{label} done for dim={2}")

plot_results(2, results[0], f'Zakharov Function Optimization for Dimension {2}', f'zakharov_2.png')
save_results_to_csv(results, 'zakharov_results.csv')


Steepest Descent done for dim=2
Newton done for dim=2
BFGS done for dim=2
DFP done for dim=2


For Logistic Regression on MNIST

In [53]:
import time
lambdas = [0.001, 0.01, 0.1]
results = {}

for lam in lambdas:
    f, gradf, hessf, x0 = get_lgt_obj(lam)
    results[lam] = {}

    methods = {
        'Steepest Descent': lambda x0, f, gradf: steepest_descent(x0, f, gradf, c0=0.001, c1=0.9, t0=1e-3, grad_tol=1e-4),
        'Newton': lambda x0, f, gradf, hessf: newton(x0, f, gradf, hessf, c0=0.001, c1=0.9, t0=1e-3, grad_tol=1e-4),
        'BFGS': lambda x0, f, gradf: BFGS(x0, f, gradf, c0=0.001, c1=0.8, t0=1e-3, grad_tol=1e-3),
        'DFP': lambda x0, f, gradf: DFP(x0, f, gradf, c0=0.001, c1=0.5, t0=1e-3, grad_tol=1e-3)
    }

    for label, method in methods.items():
        print(f"Running {label} for λ={lam}")
        start_time = time.time()
        if label == 'Newton':
            xsol, function_history, cumulative_times, gradient_norms = method(x0, f, gradf, hessf)
        else:
            xsol, function_history, cumulative_times, gradient_norms = method(x0, f, gradf)
        results[lam][label] = (function_history, gradient_norms, cumulative_times)
        end_time = time.time()
        print(f"{label} done for λ={lam} in {end_time - start_time} seconds in cumulative time {cumulative_times[-1]}")

    plot_results(lam, results[lam], f'LGT Function Optimization with λ={lam}', f'lgt_{lam}.png')
    
save_results_to_csv(results, 'lgt_results.csv')


Running Steepest Descent for λ=0.001
Steepest Descent done for λ=0.001 in 2.600860118865967 seconds in cumulative time 183
Running Newton for λ=0.001
Newton done for λ=0.001 in 8.772995710372925 seconds in cumulative time 183
Running BFGS for λ=0.001
BFGS done for λ=0.001 in 14.251598834991455 seconds in cumulative time 409
Running DFP for λ=0.001
DFP done for λ=0.001 in 12.488960981369019 seconds in cumulative time 720
Running Steepest Descent for λ=0.01
Steepest Descent done for λ=0.01 in 2.861194133758545 seconds in cumulative time 248
Running Newton for λ=0.01
Newton done for λ=0.01 in 11.562780141830444 seconds in cumulative time 248
Running BFGS for λ=0.01
BFGS done for λ=0.01 in 10.199692249298096 seconds in cumulative time 304
Running DFP for λ=0.01
DFP done for λ=0.01 in 5.242429971694946 seconds in cumulative time 309
Running Steepest Descent for λ=0.1
Steepest Descent done for λ=0.1 in 2.0232250690460205 seconds in cumulative time 219
Running Newton for λ=0.1
Newton done for

Tuning c0 and c1 - simply for debugging


In [47]:
import time

# Define search grid
c1_values = [0.5, 0.7, 0.9]

# Fixed parameters
c0_fixed = 0.001  # Fixed c0 value
lambda_fixed = 0.001  # Regularization parameter
t0_fixed = 1e-3  # Small step size
grad_tol_fixed = 1e-4  # Small enough tolerance

# Load LGT problem
f, gradf, hessf, x0 = get_lgt_obj(lambda_fixed)

# Storage for results
results = {}

for c1 in c1_values:
    # print(i,c1)
    results[c1] = {}

    methods = {
        # 'Steepest Descent': lambda x, f, gradf: steepest_descent(x, f, gradf, c0=c0_fixed, c1=c1, t0=t0_fixed, grad_tol=grad_tol_fixed),
        'Newton': lambda x, f, gradf, hessf: newton(x, f, gradf, hessf, c0=c0_fixed, c1=c1, t0=t0_fixed, grad_tol=grad_tol_fixed),
        'BFGS': lambda x, f, gradf: BFGS(x, f, gradf, c0=c0_fixed, c1=c1, t0=t0_fixed, grad_tol=grad_tol_fixed),
        'DFP': lambda x, f, gradf: DFP(x, f, gradf, c0=c0_fixed, c1=(c1 - 0.4), t0=t0_fixed, grad_tol=grad_tol_fixed)
    }

    for label, method in methods.items():
        start_time = time.time()
        if label == 'Newton':
            xsol, function_history, cumulative_times, gradient_norms = method(x0, f, gradf, hessf)
        else:
            xsol, function_history, cumulative_times, gradient_norms = method(x0, f, gradf)
        end_time = time.time()

        results[c1][label] = {
            "iterations": len(function_history),
            "final_f": round(function_history[-1], 5),
            "final_grad_norm": round(gradient_norms[-1], 5),
            "time": cumulative_times[-1]
        }

        print(f"{label} done for c1={c1} in {end_time - start_time} seconds")
        print(f"Iterations: {len(function_history)}, Final Function Value: {round(function_history[-1], 5)}, Final Gradient Norm: {round(gradient_norms[-1], 5)}")

# Convert to a heatmap-friendly format


Newton done for c1=0.5 in 8.88641619682312 seconds
Iterations: 164, Final Function Value: 0.03127, Final Gradient Norm: 0.0001
BFGS done for c1=0.5 in 6.263967990875244 seconds
Iterations: 172, Final Function Value: 0.03127, Final Gradient Norm: 0.0001
DFP done for c1=0.5 in 18.34773874282837 seconds
Iterations: 912, Final Function Value: 0.03127, Final Gradient Norm: 9e-05
Newton done for c1=0.7 in 6.870253086090088 seconds
Iterations: 143, Final Function Value: 0.03127, Final Gradient Norm: 0.0001
BFGS done for c1=0.7 in 17.31073808670044 seconds
Iterations: 423, Final Function Value: 0.03127, Final Gradient Norm: 0.0001
DFP done for c1=0.7 in 104.17127990722656 seconds
Iterations: 896, Final Function Value: 0.03127, Final Gradient Norm: 0.0001
Newton done for c1=0.9 in 9.158504962921143 seconds
Iterations: 184, Final Function Value: 0.03127, Final Gradient Norm: 0.0001
BFGS done for c1=0.9 in 45.334630727767944 seconds
Iterations: 1396, Final Function Value: 0.03127, Final Gradient 

In [48]:
data = []
for c0, methods in results.items():
    for method, values in methods.items():
        data.append([method, c0, values["iterations"], values["final_f"], values["final_grad_norm"]])

df = pd.DataFrame(data, columns=["Method", "c1", "Iterations", "Final Function Value", "Final Gradient Norm"])

In [49]:
# store df from previous cell to csv
# Store df from previous cell to csv
df.to_csv('lgt_hyperparam_tuning.csv', index=False)