In [1]:
import numpy as np
from scipy.linalg import solve
from decimal import Decimal, getcontext

getcontext().prec = 100



# Steepest descent function

In [2]:


def steepest_descent(function, f_gradient, x0, epsilon=1e-4, max_iter=100000, c1=1e-4):
    """
    Steepest descent optimization algorithm with Wolfe conditions.
    
    Args:
    - function: Objective function to minimize.
    - f_gradient: Function to compute the gradient of the objective function.
    - x0: Initial guess.
    - epsilon: Convergence criterion based on the norm of the gradient.
    - max_iter: Maximum number of iterations.
    - c1: Parameter for Armijo condition.
    - c2: Parameter for curvature condition.
    
    Returns:
    - x_opt: Optimal solution.
    - num_iterations: Number of iterations until convergence.
    """
    x = x0
    num_iterations = 0

    gradient = f_gradient(x)
    direction = -gradient
            
    smth_else = direction.T @ direction

   
    while True:
        # Step 1: Check convergence criterion
        gradient = f_gradient(x)
      
        if np.linalg.norm(gradient) < epsilon or num_iterations >= max_iter:
            break
        
        # Step 2: Compute direction (negative gradient)
        direction = -gradient
        
        step_size = smth_else/ direction.T @ direction
        # Step 3: Update x with step size satisfying Wolfe conditions
        grad_x_dir =  np.dot(gradient, direction)
        while True:
            # Armijo condition
            if function(x + step_size * direction) <= function(x) + c1 * step_size * grad_x_dir:
                break      
            step_size *= 0.9
            # print("STEP LENGHT:", step_size)
        smth_else = -step_size * direction.T @ direction

        # Step 4: Update x
        x = x + step_size * direction
        
        # Step 5: Increase iteration count
        num_iterations += 1
    
    return x, num_iterations


# Hilbert matrix function

In [3]:
def hilbert_matrix(n):
     """
    Hilbert matrix.

    Args:
    - n: number of dimensions

    Returns:
    - Hilbert matrix with n dimensions 
    """
     return np.array([[1 / (i + j - 1) for j in range(1, n + 1)] for i in range(1, n + 1)])


## Conjugate gradient

For сonjugate gradient we are using more precise values of numbers 

In [4]:
def cg_solver(A, b, x0, tol=Decimal(1e-6), max_iter=1000):
    """
    Conjugate Gradient Solver for Linear Systems.

    This function implements the conjugate gradient method to solve linear systems of the form Ax = b.

    Args:
    - A: Coefficient matrix of the linear system.
    - b: Right-hand side vector of the linear system.
    - x0: Initial guess for the solution.
    - tol: Tolerance for convergence (default: 1e-6).
    - max_iter: Maximum number of iterations (default: 1000).

    Returns:
    - x: Approximate solution to the linear system.
    - iterations: Number of iterations until convergence.

    """

    x = x0
    r = np.dot(A, x)
    r = [Decimal(value) for value in r]
    r = np.array(r)
    b = np.array([Decimal(value) for value in b])
    r = b - r
    p = -r
    residual_norm = Decimal(np.linalg.norm(r))
    iterations = 0
    
    while residual_norm > tol and iterations < max_iter:
    
        A_decimal = np.array([[Decimal(value) for value in row] for row in A])
        
        Ap = A_decimal @ p
        
        alpha = Decimal(r.T @ r) / (p.T @ Ap)
        x = np.array([Decimal(value) for value in x])  
        x =  x + alpha * p  
        r_new = r + alpha * Ap
        beta = Decimal(r_new.T @ r_new) / (r.T @ r)
        p = -r_new + beta * p
        r = r_new
        residual_norm = Decimal(np.linalg.norm(r))
        iterations += 1
        
    return x, iterations

## Solving quadratic function with steepest descent and Conjugate gradient
Here we solving quadratic function with different dimensions of Hilbert matrix and comraning with exact solution x* (Qx = b).
- **dimensions = [5,8,12,20,30]**
- starting points for **SD** is **exact solution + noise**
- starting points for **CG** is **zeros**

In [5]:
dimensions = [5,8,12,20,30]
# dimensions = [5,8,12]

for n in dimensions:
    # Generate Hilbert matrix Q and vector b
    Q = hilbert_matrix(n)
    b = np.ones(n)

    x0 = np.zeros(n)


    #Quadratic function and it's gradient

    def quadratic_function(x):
        return 0.5 * np.dot(x, np.dot(Q, x)) - np.dot(b, x)

    
    def gradient_quadratic_function(x):
        return np.dot(Q, x) - b
    
  
    print(f"Dimension: {n}") 
    print('*'*100)
    print("STEEPEST DESCENT\n")

    # Solve linear system Qx = b to find exact solution x*
    exact_solution = solve(Q, b) 
    noise = np.random.normal(0, 1, size=n) 
    x0_SD = exact_solution + noise  
    
    # print("Starting points", x0)
    # print("\n")
    # print("Exact solution :",exact_solution)
    
    # Optimize using steepest descent
    solution_SD, num_iterations_SD = steepest_descent(quadratic_function, 
                                                gradient_quadratic_function, 
                                                x0_SD)
    
    print(f"Solution SD: {solution_SD} \nNumber of iterations SD: {num_iterations_SD}")

    print("\nCONJUGATE GRADIENT")
    
    x0_CG = np.zeros(n)
    solution_CG, num_iterations_CG = cg_solver(Q, b, x0_CG)
    print(f"Solution CG: {solution_CG} \nNumber of iterations CG: {num_iterations_CG}")


    print('_'*100)

Dimension: 5
****************************************************************************************************
STEEPEST DESCENT

Solution SD: [    5.40439021  -120.53900019   629.51959784 -1119.87708561
   629.94477442] 
Number of iterations SD: 100000

CONJUGATE GRADIENT
Solution CG: [Decimal('-4.999999999989692689439392639799913552445224869860575861374048138620182990352016536423130888541908510')
 Decimal('119.9999999998360078468098087866396329109703147996264076005534481353734064052378135538665253524914973')
 Decimal('-629.9999999993677290976998716633050738316697808687946705653088086132909210483134909740510169305898713')
 Decimal('1119.999999999121518268198213528419399993191279503021812123610484575983575872807072797136446969804305')
 Decimal('-629.9999999995972621569257233314289555357738013473209549377384764637190912660018752166848390455373552')] 
Number of iterations CG: 5
____________________________________________________________________________________________________
Dimensio

  exact_solution = solve(Q, b)


Solution SD: [-1.22469343e+01  1.82691687e+03 -6.35486925e+04  9.48423272e+05
 -7.55539445e+06  3.58352874e+07 -1.07176656e+08  2.07239527e+08
 -2.58446725e+08  2.00601048e+08 -8.81030667e+07  1.67194335e+07] 
Number of iterations SD: 100000

CONJUGATE GRADIENT
Solution CG: [Decimal('9.608817957053070641389379310176078876020834960617147866602948585043665534925174539054833409833396840')
 Decimal('-815.3954743187867052914593401517018192609416444244613618734671120243445076471906834659620116006274560')
 Decimal('16496.58192719707389224815781192545220749154499530747841630431690146327149594958773277220527425733392')
 Decimal('-135510.5652248625864464767215784837069242367962474416197669528088853782446367073692295662458113300359')
 Decimal('536482.0768489097468458374589634672344399588188442387319160259774630522744213959028230013313444074722')
 Decimal('-1025400.889447593486696573778618330315849560994292370620940082208480730812649911936795424696685199237')
 Decimal('642579.601772412430747333798

  exact_solution = solve(Q, b)


Solution SD: [-2.62139654e+01  4.30237391e+03 -1.89865805e+05  3.62631905e+06
 -3.72981102e+07  2.29900621e+08 -8.93067432e+08  2.19698466e+09
 -3.19974825e+09  1.90348954e+09  1.66222840e+09 -3.44431179e+09
  6.70327261e+08  2.80681504e+09 -2.55090193e+09  7.25686986e+08
 -7.41746060e+08  1.37490617e+09 -9.17188995e+08  2.10483377e+08] 
Number of iterations SD: 100000

CONJUGATE GRADIENT
Solution CG: [Decimal('10.97398463137440515602271017783417644200254010711841934288385889479851157551628388698330571844436522')
 Decimal('-1050.865041667615461948009521305417933612630775792313290939164012189274303771788923479961711186797849')
 Decimal('23955.47637883338629888669626210582556350023162871809465416607816455408870336119208366333999413758273')
 Decimal('-220422.2548436554626224126569564669481095786698636814896540109638618516864601284546472027641564016284')
 Decimal('965342.0233580153192668212582030685980863581977160721636483596838414231841088760477040400451693309848')
 Decimal('-1990107.4208

  exact_solution = solve(Q, b)


Solution SD: [-1.76898406e+01  2.50529913e+03 -9.30028426e+04  1.43443477e+06
 -1.11133332e+07  4.54428966e+07 -8.72613489e+07  7.20189497e+06
  2.63016791e+08 -4.12804562e+08  4.65909807e+08 -1.32778299e+09
  2.22340808e+09 -9.67189566e+08 -1.33485668e+08 -1.54773867e+09
  1.73502252e+09 -2.33952925e+08  1.89805272e+09 -2.36541695e+09
 -1.06047520e+09  1.40449738e+09  1.06823900e+09 -7.88639371e+08
 -7.70915772e+08  4.85022971e+08  6.42532592e+08 -7.28502509e+08
  1.63214928e+08  3.23737213e+07] 
Number of iterations SD: 100000

CONJUGATE GRADIENT
Solution CG: [Decimal('12.54723534369395884771081470758315593373915631670731405569644259018057181154536091680502097140551258')
 Decimal('-1386.777414670394825223866044095356416616686575857634667287241888679634744896227490928554938368878526')
 Decimal('36691.46108365100523785925257584901810584970571045333991693894200420738684410949402546236687928264588')
 Decimal('-396073.69475325235521570794644693883137057321594230039740264358361507377689188

When we compared the steepest descent and conjugate gradient methods for solving quadratic functions, we noticed some big differences. With the steepest descent method, it used up all its chances to solve the quadratic function, no matter how many dimensions the Hilbert matrix had. Similarly, the conjugate gradient method also used up all its tries when dealing with a 30-dimensional matrix. However, when we reduced the dimensions to 5, both methods gave similar results. But when we went beyond 5 dimensions, we saw significant differences in the solutions provided by these two methods. This shows that the steepest descent and conjugate gradient methods behave quite differently when solving quadratic functions.