## **FZCGS**: Faster Zeroth-Order Conditional Gradient Sliding Method

In [None]:
import numpy as np
import random
import tensorflow as tf
from tensorflow import keras



### Model

In [None]:
model = keras.Sequential([
    keras.layers.Input(shape=(3,)),  # Input layer
    keras.layers.Dense(64, activation='relu'),  # Hidden layer with 64 neurons and ReLU activation
    keras.layers.Dense(3, activation='softmax')  # Output layer with softmax activation
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

### Parameters

In [None]:
x = None #x_0 in input to FZCGS
n = 5 # number of component functions?

OMEGA = [] # this is the R^d convex feasible set

K = 1000
L = None

S1 = random.sample(x, n)
S2 = random.sample(x, np.sqrt(n))


d = None # what is d?

q = len(S2)

mu= 1 / np.sqrt(d*K)
gamma = 1/3*L
eta = 1/K

### Coordinate-Wise Gradient Estimator and Conditional Gradient

In [None]:
def f(x):
    return model.predict(x)

def estimate_gradient(x): #mu smoothing parameter, e_j (R^d) basis vector where only j-th element is 1, otherwise 0.
    grad = np.zeros(x.shape)
    for j in range(1, d):
        e = np.zeros(x.shape)
        e[j] = 1  # Set the j-th element of the basis vector to 1 
        part = ((f(x + mu*e) - f(x - mu*e)) / (2*mu)) * e
        grad = grad + part
    return grad



def V(g: np.array, u: np.array, gamma, u_t: np.array) -> np.array:
    dotp = []
    for x in OMEGA: #original convex set
        dotp.append((np.inner(g + (1/gamma)*(u_t - u), u_t - x)))
    return np.max(dotp)


def condg(g: np.array, u: np.array, gamma, eta):
    t = 1

    u_t= u
    while(True):
        v_t = V(g, u, gamma, u_t)

        if v_t <= eta:  # v_t is indeed the Frank-Wolfe gap.
            break
        
        norm = np.linalg.norm(v_t - u_t, ord=1)**2
        arg = np.inner((1/gamma) * (u-u_t) - g, v_t-u_t) / ((1/gamma) * norm)
        alpha_t = np.min([1, arg])
        u_t = (1-alpha_t) * u[t] + alpha_t * v_t

        t = t+1

    return u_t 

In [None]:
def FZCGS(x_0, q, mu, K, eta, gamma, n):
    v_k = np.array()
    x_k = x_0

    for k in range(0, K-1):
        v_k_prev = v_k # store previous v_k
        x_k_prev = x_k # store previous x_k
        if np.mod(k, q) == 0: 
            v_k = estimate_gradient(x_k)
        else:
            g = estimate_gradient(x_k) 
            g_prev = estimate_gradient(x_k_prev)
            v_k = (1/len(S2)) * np.sum((g - g_prev) + v_k_prev)
        
        x_k = condg(v_k, x_k, gamma, eta)
    
    return random.sample(x_k, 1)
    