The aim of this assignment is coding the steepest descent algorithm without including any additional packages. The minimum values of 2 following objective functions will be found:

$$f(x_1,x_2) = (5x_1-x_2)^2 + (x_1-2)^2 + x_1 - 2x_2 + 12$$

$$f(x_1,x_2) = 100(x_2-x_1^2)^2 + (1-x_1)^2$$

In [3]:
objective1 <- expression(
  (5*x1 - x2)^4 + (x1 - 2)^2 + x1 -2*x2 + 12
)

objective2 <- expression(
  100*(x2-x1^2)^2 + (1-x1)^2
)

Here is the function for calculating the norm of a vector.

In [4]:
norm_vec <- function(vec){
  sqrt(sum(vec^2))
}

Golden Section Method function is used for finding the minimum of a one-dimensional function.

In [5]:

GoldenSectionMethod <- function(a,b,e2,func) {
  gamma <- 1.618
  x <- b - (1/gamma)*(b-a)
  y <- a + (1/gamma)*(b-a)
  fx <- func(x)
  fy <- func(y)
  
  while (b-a>=e2) {
    if(fx > fy) {
      a <- x
      x <- y
      y <- a + (1/gamma)*(b-a)
      fx <- fy
      fy <- func(y)
    }
    
    else {
      b <- y
      y <- x
      x <- b - (1/gamma)*(b-a)
      fy <- fx
      fx <- func(x)
    }
  }
  
  return(x)
}

Argmin function is used to find argument minimum of the alpha parameter.

In [6]:
argmin <- function(objective,xk,e2,direction){
  
  func <- function(alpha){
    return(objective(xk+(alpha*direction)))
  }
  min <- GoldenSectionMethod(-100,100,e2,func)
  return(min)
}

Steepest descent algorithm is shown below.

In [7]:
steepest_descent <- function(e1,e2,exprs,x0){
  
  objective_st <- function(xvec){
    x1_st <- xvec[1]
    x2_st <- xvec[2]
    return(eval(exprs,envir = list(x1=x1_st,x2=x2_st)))
  }
  
  
  k <- 0
  x <- list()
  alpha <- list()
  direction_list <- list()
  x[[as.character(k)]] <- x0
  
  while(TRUE){
    
    d1  <- eval(D(exprs,"x1"),envir = list(x1=x[[as.character(k)]][1],x2=x[[as.character(k)]][2]))
    d2  <- eval(D(exprs,"x2"),envir = list(x1=x[[as.character(k)]][1],x2=x[[as.character(k)]][2]))
    direction <- c(-d1,-d2)
    direction <- direction/norm_vec(direction)
    direction_list[[as.character(k)]] <- direction
    alpha[[as.character(k)]] <- argmin(objective_st,x[[as.character(k)]],e2,direction)
    x[[as.character(k+1)]] <- x[[as.character(k)]]+(alpha[[as.character(k)]]*direction)
    k <- k + 1
    if(abs(objective_st(x[[as.character(k)]])-objective_st(x[[as.character(k-1)]]))<e1){
      return(list("x"=x,"alpha"=alpha,"direction"=direction_list))
    }
  }
}

Give result function is used for compute the results of the steepest descent algorithm with different parameters.

In [8]:
give_result <- function(e1,e2,exprs,x0){
  
  objective_res <- function(xvec){
    x1_res <- xvec[1]
    x2_res <- xvec[2]
    eval(exprs,envir = list(x1=x1_res,x2=x2_res))
  }
  
  sol <- steepest_descent(e1,e2,exprs,x0)
  x1 <- numeric(0)
  x2 <- numeric(0)
  f <- numeric(0)
  a <- numeric(0)
  d1 <- numeric(0)
  d2 <- numeric(0)
  
  for(i in 0:length(sol$x)){
    x1 <- c(x1,sol$x[[as.character(i)]][1])
    x2 <- c(x2,sol$x[[as.character(i)]][2])
    a <- c(a,sol$alpha[[as.character(i)]])
    f <- c(f,objective_res(sol$x[[as.character(i)]]))
    d1 <- c(d1,sol$direction[[as.character(i)]][1])
    d2 <- c(d2,sol$direction[[as.character(i)]][2])
  }
  
  x1 <- as.character(round(x1,3))
  x2 <- as.character(round(x2,3))
  d1 <- as.character(round(d1,3))
  d2 <- as.character(round(d2,3))
  f <- round(f,3)
  X <- paste(x1,x2,sep=" , ")
  D <- paste(d1,d2,sep=" , ")
  Xk1  <- c(X[2:(length(X))],NA)
  a[length(a)+1]=NA
  D[length(D)+1]=NA
  
  results <- data.frame("Iteration"=(0:(length(sol$x)-1)),"X(k)"=X,"f(x1,x2)"=f,"d"=D,"alpha"=a,"X(k+1)"=Xk1)
  return(results)
}

Here is the result table of objective function 1, with different epsilon values and different initial points.

In [9]:
give_result(0.001,0.005,objective1,c(31,20))
give_result(0.005,0.005,objective1,c(100,100))

Iteration,X.k.,f.x1.x2.,d,alpha,X.k.1.
<int>,<chr>,<dbl>,<chr>,<dbl>,<chr>
0,"31 , 20",332151469.0,"-0.981 , 0.196",26.613654353,"4.903 , 25.219"
1,"4.903 , 25.219",-24.862,"0.252 , 0.968",1.7611594,"5.348 , 26.923"
2,"5.348 , 26.923",-25.291,"-0.968 , 0.252",0.106076463,"5.245 , 26.95"
3,"5.245 , 26.95",-25.849,"0.261 , 0.965",0.772074534,"5.447 , 27.695"
4,"5.447 , 27.695",-26.018,"-0.965 , 0.262",0.052583651,"5.396 , 27.709"
5,"5.396 , 27.709",-26.207,"-0.136 , 0.991",0.02489991,"5.393 , 27.734"
6,"5.393 , 27.734",-26.213,"0.992 , 0.128",0.007541442,"5.4 , 27.735"
7,"5.4 , 27.735",-26.218,"0.219 , 0.976",2.836657527,"6.022 , 30.503"
8,"6.022 , 30.503",-26.786,"-0.976 , 0.219",0.072857103,"5.951 , 30.519"
9,"5.951 , 30.519",-27.136,"0.41 , 0.912",0.024226233,"5.96 , 30.541"


Iteration,X.k.,f.x1.x2.,d,alpha,X.k.1.
<int>,<chr>,<dbl>,<chr>,<dbl>,<chr>
0,"100 , 100",25600010000.0,"-0.981 , 0.196",78.69674,"22.831 , 115.434"
1,"22.831 , 115.434",240.568,"-0.17 , -0.985",23.72981,"18.797 , 92.049"
2,"18.797 , 92.049",142.798,"-0.985 , 0.17",0.6096871,"18.196 , 92.153"
3,"18.196 , 92.153",110.094,"-0.22 , -0.975",7.386606,"16.569 , 84.948"
4,"16.569 , 84.948",90.512,"0.975 , -0.22",0.1891425,"16.753 , 84.906"
5,"16.753 , 84.906",78.288,"-0.225 , -0.974",5.309843,"15.559 , 79.732"
6,"15.559 , 79.732",66.04,"0.974 , -0.225",0.1617731,"15.717 , 79.696"
7,"15.717 , 79.696",58.005,"-0.231 , -0.973",3.726041,"14.855 , 76.071"
8,"14.855 , 76.071",50.347,"0.973 , -0.231",0.1378628,"14.989 , 76.039"
9,"14.989 , 76.039",45.058,"-0.275 , -0.962",0.9452466,"14.73 , 75.13"


Here is the result table of objective function 2, with different epsilon values and different initial points. However, objective function 2 is the Rosenbrock's curved valley function and it doesn't have a symmetric shape. Steepest descent algortihm couldn't approach the optimum sollution in such non-symmetric functions.

In [10]:
give_result(0.001,0.005,objective2,c(5,5))
give_result(0.005,0.005,objective2,c(100,100))

Iteration,X.k.,f.x1.x2.,d,alpha,X.k.1.
<int>,<chr>,<dbl>,<chr>,<dbl>,<chr>
0,"5 , 5",40016.0,"-0.995 , 0.099",7.429212635,"-2.392 , 5.739"
1,"-2.392 , 5.739",11.533,"-0.935 , -0.356",-5.5289392787,"2.775 , 7.707"
2,"2.775 , 7.707",3.156,"0.961 , -0.277",-6.0763630954,"-3.064 , 9.389"
3,"-3.064 , 9.389",16.518,"1 , 0.005",6.1334203142,"3.069 , 9.421"
4,"3.069 , 9.421",4.281,"-0.99 , -0.139",6.055556785,"-2.928 , 8.58"
5,"-2.928 , 8.58",15.434,"-0.745 , -0.667",0.0005668612,"-2.928 , 8.58"
6,"-2.928 , 8.58",15.434,,,


Iteration,X.k.,f.x1.x2.,d,alpha,X.k.1.
<int>,<chr>,<dbl>,<chr>,<dbl>,<chr>
0,"100 , 100",9801010000.0,"-1 , 0.005",89.97853,"10.023 , 100.45"
1,"10.023 , 100.45",81.408,"-1 , 0.018",0.0007950133,"10.022 , 100.45"
2,"10.022 , 100.45",81.411,,,
