##  Gradient Descent with Backtracking Line Search:

In [284]:
# Imports
import matplotlib.pyplot as plt
import numpy as np
from numpy import log
import shutil
import sys
import os.path

Given function:
\begin{align*}
    f(x_1, x_2, x_3) = x_{3} \log \Big( e^{\frac{x_{1}} {x_{3}}}+ e^{\frac{x_{2}} {x_{3}}} \Big) + (x_{3}-2)^2 + e^{\frac{1}{x_{1} + x_{2}}}
\end{align*}

$ \textbf{dom} \; f: \{ \mathbf{x} \in \mathbb{R}^3 : x_1 +x _2 >0, x_3 > 0 \}  $

In [285]:
# Defining our function
def my_f(x):    
    val = x[2] * log(np.exp(x[0] / x[2]) + np.exp(x[1] / x[2])) + (x[2] - 2)**2 + np.exp(1/(x[0] + x[1]))
    return val

Defining the first derivative: 

$\nabla f = [ \partial f/\partial x_1 \; \partial f/\partial x_2 \; \partial f/\partial x_3]^T   $

$$ \implies \nabla f = \begin{Bmatrix}
\frac{e^{\frac{x_{1}} {x_{3}}}}{e^{\frac{x_{1}} {x_{3}}}+ e^{\frac{x_{2}} {x_{3}}}} - \frac{e^{ \frac{1}{x_1 + x_2}}}{(x_1 +x_2)^2}  \\ \\
\frac{e^{\frac{x_{2}} {x_{3}}}}{e^{\frac{x_{1}} {x_{3}}}+ e^{\frac{x_{2}} {x_{3}}}} - \frac{e^{ \frac{1}{x_1 + x_2}}}{(x_1 +x_2)^2} \\ \\
 log(e^{\frac{x_{1}} {x_{3}}}+ e^{\frac{x_{2}} {x_{3}}}) - \frac{x_1 e^{\frac{x_{1}} {x_{3}}} + x_2 e^{\frac{x_{2}} {x_{3}}}}{x_3 ( e^{\frac{x_{1}} {x_{3}}}+ e^{\frac{x_{2}} {x_{3}}}) } + 2(x_3-2)
\end{Bmatrix}$$

In [286]:
# Defining the first derivative of the function
def nabla_f(x):
    x1, x2, x3 = x[0], x[1], x[2]
    f = np.array([
        [np.exp(x1 / x3) / (np.exp(x1 / x3) + np.exp(x2 / x3)) - (1/((x1+x2)**2))*np.exp(1/(x1 + x2))],
        [np.exp(x2 / x3) / (np.exp(x1 / x3) + np.exp(x2 / x3)) - (1/((x1+x2)**2))*np.exp(1/(x1 + x2))],
        [np.log(np.exp(x1 / x3) + np.exp(x2 / x3)) - (x1 * np.exp(x1 / x3) + x2 * np.exp(x2 / x3)) /
         (x3 * (np.exp(x1 / x3) + np.exp(x2 / x3))) + 2 * (x[2] - 2)]
    ])
    return f

Defining parameters for backtracking search:

In [287]:
alp = 0.4
beta = 0.5
eps = 10**(-5)

Start Point:

In [288]:
x_start = np.array([3,4,5])

Ensuring domain:

$ \text{While} \; x + t\Delta x \notin \textbf{dom} f, \text{ set } t := \beta t $ \
where, $\Delta x = -\nabla f(x)$

In [289]:
# Ensuring Domain
def domain_t(x):
    t = 1
    while True:
        v = x - t * nabla_f(x).flatten()
        e3 = v[2]
        e2 = v[1]
        e1 = v[0]

        if e3 > 0 and (e2+e1>0):
            return t  # Exit the loop and return 't' if the condition is met

        # If (e3) or (e1+ e2) is negative , adjust 't' and update 'x'
        t *= beta

    return None  # Return None if the condition doesn't satisfy within the maximum iterations (which can be defined)

Backtracking algorithm:

$
\text{Given a descent direction } \Delta x = -\nabla f(x) \text{ for } f \text{ at } x \in \textbf{dom} f, \alpha \in (0, 0.5), \beta \in (0, 1).$

\begin{array}{l}
\text{Set } t := 1. \\ 
\text{Ensure domain:} \; \text{While} \; x + t\Delta x \notin \textbf{dom} f, \text{ set } t := \beta t \\
\text{While } f(x + t\Delta x) > f(x) + \alpha t \nabla f(x)^T \Delta x, \text{ set } t := \beta t.
\end{array}



In [290]:
# Backtracking Algorithm
def Backtrack_t(x):
    t = domain_t(x)
    xv = x - t * nabla_f(x).flatten()
    le = my_f(xv)                                              # Left expression
    re1 = my_f(x)
    re2 = np.dot(nabla_f(x).flatten(), nabla_f(x).flatten())
    re = re1 - alp * t * re2                                   # Right expression

    while le > re:
        t *= beta
        xv = x - t * nabla_f(x).flatten()
        le = my_f(xv)
        re = re1 - alp * t * re2
    return t     

### Algorithm: Gradient Descent

1. **Input:** Starting point $x$ in $\text{dom} \, f$

2. **Repeat until stopping criterion is satisfied:**

    a. $\Delta x := -\nabla f(x)$
    
    b. **Line search:** Choose step size $t$ via backtracking line search
    
    c. **Update:** $x := x + t \Delta x$


In [291]:
# Running Gradient Descent with Backtracking Line Search
norm_nabla_f = np.dot(nabla_f(x_start).flatten(), nabla_f(x_start).flatten())**0.5

iter =0
while norm_nabla_f > eps:
    direction = -nabla_f(x_start).flatten()
    t = Backtrack_t(x_start)
    x_start = x_start + t * direction
    norm_nabla_f = np.dot(nabla_f(x_start).flatten(), nabla_f(x_start).flatten())**0.5
    iter=iter+1

print("Optimal solution:", x_start)
fopt = my_f(x_start)
print("Optimal function value:", fopt)
print("Number of iterations taken to converge:", iter)

Optimal solution: [0.92618727 0.92622965 1.65342641]
Optimal function value: 3.908113786397637
Number of iterations taken to converge: 30
