In [31]:
import numpy as np
from scipy.linalg import solve
from decimal import Decimal, getcontext

getcontext().prec = 100



# Steepest descent function

In [32]:


def steepest_descent(function, f_gradient, x0, epsilon=1e-4, max_iter=100000, c1=1e-4):
    """
    Steepest descent optimization algorithm with Wolfe conditions.
    
    Args:
    - function: Objective function to minimize.
    - f_gradient: Function to compute the gradient of the objective function.
    - x0: Initial guess.
    - epsilon: Convergence criterion based on the norm of the gradient.
    - max_iter: Maximum number of iterations.
    - c1: Parameter for Armijo condition.
    - c2: Parameter for curvature condition.
    
    Returns:
    - x_opt: Optimal solution.
    - num_iterations: Number of iterations until convergence.
    """
    x = x0
    num_iterations = 0

    gradient = f_gradient(x)
    direction = -gradient
            
    smth_else = direction.T @ direction

   
    while True:
        # Step 1: Check convergence criterion
        gradient = f_gradient(x)
      
        if np.linalg.norm(gradient) < epsilon or num_iterations >= max_iter:
            break
        
        # Step 2: Compute direction (negative gradient)
        direction = -gradient
        
        step_size = smth_else/ direction.T @ direction
        # Step 3: Update x with step size satisfying Wolfe conditions
        grad_x_dir =  np.dot(gradient, direction)
        while True:
            # Armijo condition
            if function(x + step_size * direction) <= function(x) + c1 * step_size * grad_x_dir:
                break      
            step_size *= 0.9
            # print("STEP LENGHT:", step_size)
        smth_else = -step_size * direction.T @ direction

        # Step 4: Update x
        x = x + step_size * direction
        
        # Step 5: Increase iteration count
        num_iterations += 1

  
    
    print("Final gradient norm:", gradient)   

    x = np.array([float(value) for value in x])
    
    return x, num_iterations


# Hilbert matrix function

In [33]:
def hilbert_matrix(n):
     """
    Hilbert matrix.

    Args:
    - n: number of dimensions

    Returns:
    - Hilbert matrix with n dimensions 
    """
     return np.array([[1 / (i + j - 1) for j in range(1, n + 1)] for i in range(1, n + 1)])


## Conjugate gradient

For сonjugate gradient we are using more precise values of numbers 

In [34]:
def cg_solver(A, b, x0, tol=Decimal(1e-6), max_iter=1000):
    """
    Conjugate Gradient Solver for Linear Systems.

    This function implements the conjugate gradient method to solve linear systems of the form Ax = b.

    Args:
    - A: Coefficient matrix of the linear system.
    - b: Right-hand side vector of the linear system.
    - x0: Initial guess for the solution.
    - tol: Tolerance for convergence (default: 1e-6).
    - max_iter: Maximum number of iterations (default: 1000).

    Returns:
    - x: Approximate solution to the linear system.
    - iterations: Number of iterations until convergence.

    """

    x = x0
    r = np.dot(A, x)
    r = [Decimal(value) for value in r]
    r = np.array(r)
    b = np.array([Decimal(value) for value in b])
    r = b - r
    p = -r
    residual_norm = Decimal(np.linalg.norm(r))
    iterations = 0
    
    while residual_norm > tol and iterations < max_iter:
    
        A_decimal = np.array([[Decimal(value) for value in row] for row in A])
        
        Ap = A_decimal @ p
        
        alpha = Decimal(r.T @ r) / (p.T @ Ap)
        x = np.array([Decimal(value) for value in x])  
        x =  x + alpha * p  
        r_new = r + alpha * Ap
        beta = Decimal(r_new.T @ r_new) / (r.T @ r)
        p = -r_new + beta * p
        r = r_new
        residual_norm = Decimal(np.linalg.norm(r))
        iterations += 1
        

    
    print("Final gradient norm:", residual_norm)

    x = np.array([float(value) for value in x])
        
    return x, iterations
        
  

## Solving quadratic function with steepest descent and Conjugate gradient
Here we solving quadratic function with different dimensions of Hilbert matrix and comraning with exact solution x* (Qx = b).
- **dimensions = [5,8,12,20,30]**
- starting points for **SD** is **exact solution + noise**
- starting points for **CG** is **zeros**

In [35]:
dimensions = [5,8,12,20,30]
# dimensions = [5,8,12]

for n in dimensions:
    # Generate Hilbert matrix Q and vector b
    Q = hilbert_matrix(n)
    b = np.ones(n)

    x0 = np.zeros(n)


    #Quadratic function and it's gradient

    def quadratic_function(x):
        return 0.5 * np.dot(x, np.dot(Q, x)) - np.dot(b, x)

    
    def gradient_quadratic_function(x):
        return np.dot(Q, x) - b
    
  
    print(f"Dimension: {n}") 
    print('*'*100)
    print("STEEPEST DESCENT\n")

    # Solve linear system Qx = b to find exact solution x*
    exact_solution = solve(Q, b) 
    noise = np.random.normal(0, 1, size=n) 
    x0_SD = exact_solution + noise  
    
    # print("Starting points", x0)
    # print("\n")
    # print("Exact solution :",exact_solution)
    
    # Optimize using steepest descent
    solution_SD, num_iterations_SD = steepest_descent(quadratic_function, 
                                                gradient_quadratic_function, 
                                                x0_SD)
    dist_to_solution_SD = np.linalg.norm(exact_solution - solution_SD) 
    print("Distant from solution SD:", dist_to_solution_SD)
    
    print(f"Solution SD: {solution_SD} \nNumber of iterations SD: {num_iterations_SD}")

    print("\nCONJUGATE GRADIENT")
    
    x0_CG = np.zeros(n)
    solution_CG, num_iterations_CG = cg_solver(Q, b, x0_CG)
    dist_to_solution_CG = np.linalg.norm(exact_solution - solution_CG) 
    print("Distant from solution CG:", dist_to_solution_CG)
    print(f"Solution CG: {solution_CG} \nNumber of iterations CG: {num_iterations_CG}")


    print('_'*100)

Dimension: 5
****************************************************************************************************
STEEPEST DESCENT

Final gradient norm: [1.41803699 0.72833724 0.48738971 0.36450109 0.29017402]
Distant from solution SD: 1.8036247812828234
Solution SD: [    6.21705584  -119.46123369   629.89900118 -1119.3300038
   628.98882612] 
Number of iterations SD: 100000

CONJUGATE GRADIENT
Final gradient norm: 4.362585917722405764282883728491949678651014845473771234377266710420888395998294034406012246981006540E-89
Distant from solution CG: 2872.368360786777
Solution CG: [  -5.  120. -630. 1120. -630.] 
Number of iterations CG: 5
____________________________________________________________________________________________________
Dimension: 8
****************************************************************************************************
STEEPEST DESCENT

Final gradient norm: [-0.56592479 -0.39465496 -0.32206851 -0.27464184 -0.23978083 -0.21278235
 -0.1911925  -0.17352275]
Dista

  exact_solution = solve(Q, b)


Final gradient norm: [0.79721741 0.4827401  0.3376216  0.25266504 0.19711705 0.15828361
 0.12986094 0.10834201 0.09161704 0.0783415  0.0676196  0.05883284]
Distant from solution SD: 3.524533336591125
Solution SD: [-1.26593384e+01  1.82785893e+03 -6.35492523e+04  9.48423843e+05
 -7.55539363e+06  3.58352877e+07 -1.07176655e+08  2.07239527e+08
 -2.58446727e+08  2.00601047e+08 -8.81030655e+07  1.67194335e+07] 
Number of iterations SD: 100000

CONJUGATE GRADIENT
Final gradient norm: 6.466145993833794101868851712656275895582491757266270669370003845468053632973568506372315384982178401E-7
Distant from solution CG: 413389745.8508265
Solution CG: [ 9.60881796e+00 -8.15395474e+02  1.64965819e+04 -1.35510565e+05
  5.36482077e+05 -1.02540089e+06  6.42579602e+05  6.57590809e+05
 -8.04244710e+05 -6.63072100e+05  1.24127990e+06 -4.65506744e+05] 
Number of iterations CG: 10
____________________________________________________________________________________________________
Dimension: 20
***************

  exact_solution = solve(Q, b)


Final gradient norm: [-6.39057887 -4.88600671 -4.08309138 -3.54974246 -3.15942597 -2.85712552
 -2.61397815 -2.4130134  -2.2434473  -2.09802777 -1.97166055 -1.86064246
 -1.76220521 -1.67423069 -1.59506583 -1.5233995  -1.45817551 -1.39853275
 -1.34376103 -1.29326907]
Distant from solution SD: 8.81788187624377
Solution SD: [-2.50133356e+01  4.30168295e+03 -1.89868192e+05  3.62631738e+06
 -3.72981120e+07  2.29900619e+08 -8.93067433e+08  2.19698466e+09
 -3.19974825e+09  1.90348954e+09  1.66222839e+09 -3.44431180e+09
  6.70327262e+08  2.80681504e+09 -2.55090193e+09  7.25686986e+08
 -7.41746060e+08  1.37490617e+09 -9.17188996e+08  2.10483375e+08] 
Number of iterations SD: 100000

CONJUGATE GRADIENT
Final gradient norm: 6.247623538244119522042585815978123929295687631917687020417270444132947613600915563476081535989414510E-7
Distant from solution CG: 7270082119.249533
Solution CG: [ 1.09739846e+01 -1.05086504e+03  2.39554764e+04 -2.20422255e+05
  9.65342023e+05 -1.99010742e+06  1.25271616e+06  1

  exact_solution = solve(Q, b)


Final gradient norm: [-4.2194893  -2.76711074 -2.13521703 -1.7646059  -1.51598538 -1.33554241
 -1.19756242 -1.08804374 -0.99864675 -0.92406409 -0.86074328 -0.80620756
 -0.75867332 -0.71681941 -0.67964526 -0.64637739 -0.61640789 -0.58925147
 -0.56451589 -0.54188    -0.52107844 -0.50188979 -0.48412754 -0.46763331
 -0.45227226 -0.43792798 -0.42449995 -0.41190074 -0.40005396 -0.38889244]
Distant from solution SD: 6.861125767249022
Solution SD: [-1.78796560e+01  2.50444748e+03 -9.30056137e+04  1.43443556e+06
 -1.11133348e+07  4.54428958e+07 -8.72613457e+07  7.20189745e+06
  2.63016791e+08 -4.12804564e+08  4.65909807e+08 -1.32778299e+09
  2.22340808e+09 -9.67189566e+08 -1.33485669e+08 -1.54773867e+09
  1.73502252e+09 -2.33952925e+08  1.89805272e+09 -2.36541695e+09
 -1.06047520e+09  1.40449738e+09  1.06823900e+09 -7.88639369e+08
 -7.70915773e+08  4.85022973e+08  6.42532594e+08 -7.28502510e+08
  1.63214926e+08  3.23737176e+07] 
Number of iterations SD: 100000

CONJUGATE GRADIENT
Final gradient

When we compared the steepest descent and conjugate gradient methods for solving quadratic functions, we noticed some big differences. With the steepest descent method, it used up all its chances to solve the quadratic function, no matter how many dimensions the Hilbert matrix had. However, when we reduced the dimensions to 5, both methods gave similar results. But when we went beyond 5 dimensions, we saw significant differences in the solutions provided by these two methods. This shows that the steepest descent and conjugate gradient methods behave quite differently when solving quadratic functions.