## SQP

In [1]:
# QP

# Linesearch with penalty

# Active set strategy

# BFGS for approximating Hessian



In [2]:
# Import libraries
import torch as t
import numpy as np
from matplotlib import pyplot as plt

import torch.nn as nn
from torch.autograd import Variable
from torch.autograd.functional import jacobian, hessian

In [3]:
# Part 1
"""
Decision variable: x0
State variables: x1
"""

m = 2 # No. of constraints
n = 2 # No. of variables
d = n - m # No. of decision variables


def f(x):
    # Define the objective function
    f = lambda x: x[0] ** 2 + (x[1] - 3) ** 2
    # Define the constraints
    g1 = lambda x: (x[1] ** 2) - (2 * x[0])
    g2 = lambda x: (x[1] - 1) ** 2 + (5 * x[0]) - 15
    
    return f(x), g1(x), g2(x)


def Lag(x, mu):

    # Define the objective function
    f = lambda x: x[0] ** 2 + (x[1] - 3) ** 2
    # Define the constraints
    g1 = lambda x: (x[1] ** 2) - (2 * x[0])
    g2 = lambda x: (x[1] - 1) ** 2 + (5 * x[0]) - 15

    # return f(x) + mu.T @ t.tensor([[g1(x)], [g2(x)]])
    return f(x) + mu[0] * g1(x) + mu[1] * g2(x)


# Compute Jacobian
def jac(x, n=n):
    J = t.zeros((m+1, n))
    for i in range(m+1):
       J[i] =  jacobian(f, (x))[i] # 'jacobian' function in Pytorch returns a tuple of tensors. Copying each tensor slice into a new tensor for the ease of indexing.
    return J


# Compute Jacobian
def jacL(x, mu, n=n):
    J = t.zeros((1, n))
    # for i in range(n):
    J[0] =  jacobian(Lag, (x, mu))[0] # 'jacobian' function in Pytorch returns a tuple of tensors. Copying each tensor slice into a new tensor for the ease of indexing.
    return J

# Evaluate Constraints
def hFunc(x, m=m, n=n):
    H = t.zeros((m, 1))
    for i in range(m):
        H[i] =  f(x)[d + 1 + i]
    return H


def Lag1(x, mu):

    return f(x)[0] + mu @ hFunc(x)


In [4]:
# Armijo Line 
def F(alpha, x, s, wj):
    dx = alpha * s
    H = hFunc(x + dx)
    
    F = f(x + dx)[0] + t.sum(wj.T @ t.max(t.tensor([0]), H))
    return F

def phi(alpha, x, s, wj, t0=0.5):
    phi = F(alpha, x, s, wj) + t0 * alpha * dFda(alpha, x, s, wj) 
    
    return phi

def dFda(alpha, x, s, wj):
    J =  jac(x)
    H = hFunc(x)
    
    dgdx = J[1:, :]
    # print(dgdx.shape)
    
    dgda = dgdx @ s.reshape(-1, 1)
    # print(dgda.shape)
    
    dgda[(t.max(t.tensor([0]), H) <= 0)] = 0
    
    dFda = J[0, :].T @ s + t.sum(wj.T @ dgda)
    
    return dFda

def lineSearch(x, s, mu, wj0, k, K=25):
    alpha = 1
    i = 0
    
    if k == 0:
        wj = t.abs(mu)
        print('wj:', wj)
    
    else:
        wj = t.max(t.abs(mu), 0.5 * (wj0 + t.abs(mu)))
        print('wj:', wj)
        
    print('\nFa: ', F(alpha, x, s, wj))
    print('\nphi: ', phi(alpha, x, s, wj))
    print('\ndFda: ',dFda(alpha, x, s, wj))
        
    while F(alpha, x, s, wj) > phi(alpha, x, s, wj) and i < K:
        alpha = 0.5 * alpha
        print(alpha)
        # wj = t.max(t.abs(mu), 0.5 * (wj + t.abs(mu)))
        i += 1
    return alpha, wj

In [5]:
# x = t.tensor([1., -0.1], dtype=t.float, requires_grad=True)
# mu = t.tensor([1., 1.], dtype=t.float, requires_grad=True)
# W = t.eye(n)
# # s = t.tensor([-1.61881188, 6.23811881], requires_grad=True)

# s = t.tensor([-1.6, 6.5], requires_grad=True)
# # wt = t.tensor([1., 1.], dtype=t.float)
# wt = t.tensor([5., 5.], dtype=t.float)


In [6]:
# a, w1 = lineSearch(x, s, mu, wt, k=3)

In [7]:
def BFGS(W, x, s, dx, mu):
    Lx0 =  jacL(x - dx, mu)
    # print(Lx0.shape)
    Lx1 =  jacL(x, mu)
    # print(Lx1.shape)
    Q = dx.T @ W @ dx
    # print(dx)
    # print(dx @ (Lx1 - Lx0).T)
    # print((Lx1 - Lx0))
    
    if dx @ (Lx1 - Lx0).T >= 0.2 * Q:
        theta = 1
        # print(theta)
    else:
        theta = (0.8 * Q) / (Q - dx @ (Lx1 - Lx0).T)
        # print(theta)

    y = theta * (Lx1 - Lx0) + (1 - theta) * (W @ dx)
    # print(W)
    # print((s.T @ W @ s))
    W = W + ((y.T @ y) / (y @ s.T)) - (((W @ s).reshape(-1, 1) @ (s.T @ W).reshape(1, -1)) / (s.T @ W @ s))
    # print((W))
    
    return W 

In [8]:
def QP(x, W, A):
    # W = t.eye(n)
    A = jac(x)[1:, :]
    fx = jac(x)[0]
    h = hFunc(x)


    C = t.vstack((t.hstack((W, A.T)), t.hstack((A, t.zeros(A.shape[0], A.shape[0])))))
    C

    D = - t.vstack((fx.reshape(-1, 1), h)) # Check if this negative sign is important
    D

    X = t.linalg.solve(C, D)
    s = X[:n, :]
    mu = X[n:, :]
    
    return s, mu

In [9]:
def activeSet(x, s, mu):
    active = []
    add = []
    remove = []
    
    A = jac(x)[1:, :]
    
    constraintQP = A @ s + hFunc(x)

    val1 , idx1 = t.max(constraintQP, 0)
    print(constraintQP)

    if val1  > 0:
        add.append(idx1.item())


    val2 , idx2 = t.min(mu, 0)

    if val2 < 0:
        remove.append(idx2.item())
    else:
        flag = True

    # active.append(add)
    # active.remove(remove)
    
    print('Add indices; ', add)
    print('Add indices; ', remove)
    
    return active
    

In [None]:
active = [0]
W = t.eye(n, dtype=t.float)
x = t.tensor([1., 1.], dtype=t.float, requires_grad=True)

In [10]:
activeSet(x, s, mu)

NameError: name 'x' is not defined

In [11]:
def solveQP(x, W, active):
    # W = t.eye(n)
    A = jac(x)[1:, :]
    dfx = jac(x)[0]
    h = hFunc(x)

    if len(active) == 0:
        X = t.linalg.solve(W, -dfx)
        s = X
        mu = []
        
    else:
        A = A[active]
        h = h[active]
        
        C = t.vstack((t.hstack((W, A.T)), t.hstack((A, t.zeros(A.shape[0], A.shape[0])))))
        d = - t.vstack((dfx.reshape(-1, 1), h)) # Check if this negative sign is important
        
        X = t.linalg.solve(C, d)
        s = X[:n, :]
        mu = X[n:, :]
    
    return s, mu

In [None]:
def QP(x, W):
    flag = False
    while flag != True:
        active = activeSet(x, s, mu)

        s, mu = solveQP(x, W, active)

In [None]:
# Part 4
# Initialization
def SQP(x, mu, W, wj):
    # Initialize variables
    e = t.norm(jacL(x, mu))
    tol = 1e-3 # Error threshold

    xSol = x.detach().numpy()
    fVal = [f(x)[0].item()]
    alphaSol = [1]
    eVal = [e]

    k = 0
    while e > tol:
    # for k in range(10):
        
        s, mu = QP(x, W)
        mu = t.tensor(mu)
        s = s.reshape(1, -1)[0]
        # print(s)
        
        # Part 4.1
        # Inexact line search
        # alpha, wj = lineSearch(x, s, mu, wj, k)
        alpha = 1

        # Update the point 
        dx = alpha * s
        with t.no_grad():
            x = x + alpha * dx
        
        # Part 4.4
        # LM Solver
        W = BFGS(W, x, s, dx, mu)

        # Part 4.5
        e = t.norm(jacL(x, mu))
        print(e)
        # Store important information in every iteration
#         xSol = np.vstack((xSol, x.detach().numpy())) # Record x values in each iteration
#         fVal.append(f(x)[0].item()) # Record f values in each iteration
#         alphaSol.append(alpha) # Record alpha values in each iteration
#         eVal.append(e)
        print(f'\nk: {k}, x: {x}')
        k += 1
#         print (f"Iteration: {k:<5} Alpha: {alpha:<10} x: {str(x.detach().numpy()) :<40} f(x): {fVal[k]:<20} Error: {e:<20}")
    # return xSol, fVal, alphaSol, eVal
    # return xSol

In [None]:
x = t.tensor([1., 0.], dtype=t.float, requires_grad=True)
mu = t.tensor([0., 0.], dtype=t.float, requires_grad=True)
W = t.eye(n, dtype=t.float)
wj = t.tensor([0., 0.], dtype=t.float)
SQP(x, mu, W, wj)
