In [None]:
import numpy as np
import random
import matplotlib.pyplot as plt
from dppy.multivariate_jacobi_ope import MultivariateJacobiOPE
import scipy.stats as stats
from scipy.stats import multivariate_normal
from scipy.sparse.linalg.eigen.arpack import eigsh as largest_eigsh
from scipy.io import savemat
from scipy.io import loadmat
from scipy.optimize import minimize
from dppy.finite_dpps import FiniteDPP
import math
import array

In [None]:
from Data_generation import generate_data_uniform
from Data_generation import generate_data_beta
from Data_generation import generate_data_mixture_Gaussian
from Jacobi_parameter import fit_Jacobi_parameters

In [None]:
# Function for dividing the data into batches
def get_batches(X,y,batch_size,row,sampleops,i):
    if sampleops.name == 'iid':
        idx = random.sample(range(row), batch_size)
    if sampleops.name == 'dpp':
        idx = sampleops.DPP_list[i]
    idx = np.sort(idx)
    X_new = X[idx,:]
    y_new = y[idx]
    return X_new, y_new, idx

# Function for computing the gradient
def get_gradient(X,y,theta,weight,loss_type,elambda):
    hypothesis = np.dot(X, theta)
    if loss_type == 'linear_regression':
        loss = hypothesis - y
    elif loss_type == 'logistic_regression':
        loss = - y * (1 - 1 / (1 + np.exp(- hypothesis * y)))
    gradient = np.dot(X.T, weight * loss) + elambda * theta
    return gradient

# Function for generating the alternative gradient
def get_gradient_dppway2(X,y,theta,batch_size,loss_type,elambda,sampleops):
    N, d = X.shape
    hypothesis = np.dot(X, theta)
    if loss_type == 'linear_regression':
        loss = hypothesis - y
    elif loss_type == 'logistic_regression':
        loss = - y * (1 - 1 / (1 + np.exp(- hypothesis * y)))
    Xsample = dpp.sample()
    gradient = 0
    for i in range(batch_size):
        xsam = Xsample[i,:]
        tmp = np.dot(np.ones((N,1)),np.reshape(xsam,(1,d))) - X
        weight = np.reshape(sampleops.var.pdf(tmp), (N,1))
        nablahat = np.dot(X.T, weight * loss) / N
        gradient = gradient + nablahat / sampleops.dpp.K(xsam, eval_pointwise=False) / sampleops.dpp.eval_w(xsam)
    gradient = gradient + elambda * theta
    return gradient

# Function for computing the function value
def get_fun_value(X,y,theta,N,loss_type,elambda):
    hypothesis = np.dot(X, theta)
    if loss_type == 'linear_regression':
        loss = hypothesis - y
        fun_value = 0.5 * np.dot(loss.T,loss) / N + 0.5 * elambda * np.dot(theta.T,theta)
    elif loss_type == 'logistic_regression':
        fun_value = np.sum(np.log(1 + np.exp(-hypothesis * y))) / N + 0.5 * elambda * np.dot(theta.T,theta)
    return fun_value

# Function for generating the DPP kernel for first way of gradient estimation
def generate_DPP_kernel(X,N,p,dpp,gammatildeX):
    Kq = dpp.K(X, eval_pointwise=False)
    qX = dpp.eval_w(X)
    D = np.diag(np.sqrt(np.divide(qX, gammatildeX)))
    Ktilde = 1. / N * D @ Kq @ D
    evals_large_sparse, evecs_large_sparse = largest_eigsh(Ktilde, p, which='LM')
    evals_large_sparse = np.ones(p)
    Ktilde = np.dot(evecs_large_sparse,evecs_large_sparse.T)
    diagKtilde = np.diag(Ktilde)
    return evals_large_sparse, evecs_large_sparse, diagKtilde

# Function for sampling the finite DPP
def generate_DPP_list_of_samples(eig_vals, eig_vecs, maxit):
    DPP = FiniteDPP(kernel_type='correlation',projection=True,
                    **{'K_eig_dec': (eig_vals, eig_vecs)})
    for _ in range(maxit):
        DPP.sample_exact(mode='GS')
    return DPP.list_of_samples

In [None]:
# Mini-Batch SGD
def MiniBatchSGD(X,y,theta,loss_type,elambda,batch_size,maxiter,sampleops,thetastar=np.nan):
    N, d = X.shape
    if sampleops.name == 'iid':
        weight = (1 / batch_size) * np.ones((N, 1))
    if sampleops.name == 'dpp':
        weight = np.reshape(1 / sampleops.kernel_diag / N, (N,1))
    loss_total = np.array(maxiter*[[0.0]])
    gradient_total = np.array(maxiter*[[0.0]])
    if ~np.isnan(thetastar).all():
        error = np.array(maxiter*[[0.0]])
    for i in range(maxiter):
        if sampleops.name == 'dpp2':
            gradient = get_gradient_dppway2(X,y,theta,batch_size,loss_type,elambda,sampleops)
        else:
            X_batch, y_batch, idx = get_batches(X,y,batch_size,N,sampleops,i)
            gradient = get_gradient(X_batch,y_batch,theta,weight[idx],loss_type,elambda)
        theta = theta - (1/(i+1)**0.9) * gradient
        loss_total[i] = get_fun_value(X,y,theta,N,loss_type,elambda)
        gradient_total[i] = np.linalg.norm(get_gradient(X,y,theta,(1 / N) * np.ones((N, 1)),loss_type,elambda), 2)
        if ~np.isnan(thetastar).all():
            error[i] = np.linalg.norm(theta - thetastar, 2)
    if np.isnan(thetastar).all():
        return theta, loss_total, gradient_total
    if ~np.isnan(thetastar).all():
        return theta, loss_total, gradient_total, error

In [None]:
class sample_ops:
    def __init__(self):
        self.name = []

N, d = 1000, 2
losstype = 'linear_regression'

X, y = generate_data_uniform(N, d)
# X ,y = generate_data_mixture_Gaussian(N, d, 4)
if losstype == 'logistic_regression':
    y = np.sign(y)
    tmp = np.argwhere(y == 0)
    if tmp.size > 0:
        y[tmp] = 1
if losstype == 'linear_regression':
    Z = np.concatenate((X,y),axis = 1)
    dcom = d + 1
elif losstype == 'logistic_regression':
    Z = X
    dcom = d
    
theta0 = np.array(d*[[0.0]])
lambda_input = 0.1

In [None]:
if losstype == 'linear_regression':
    inv = np.linalg.inv(np.dot(X.T, X) + N * lambda_input * np.identity(d))
    theta_direct = np.dot(inv, np.dot(X.T, y))
if losstype == 'logistic_regression':
    def objfun(x):
        x = np.reshape(x,(d,1))
        hypothesis = np.dot(X, x)
        fun_value = np.sum(np.log(1 + np.exp(-hypothesis * y))) / N + 0.5 * lambda_input * np.dot(x.T,x)
        fun_value = fun_value[0,0]
        return fun_value
    theta_solve = minimize(objfun, np.reshape(theta0,(d,)), tol=1e-16)
    theta_direct = np.reshape(theta_solve.x,(d,1))
gradient = get_gradient(X,y,theta_direct,(1 / N) * np.ones((N, 1)),losstype,lambda_input)
norm_gradient = np.linalg.norm(gradient, 2)
print('True solution obtained, with norm of gradient = ',norm_gradient)

In [None]:
## generate DPP kernel and gammatilde
jac_params = fit_Jacobi_parameters(Z)
gammatilde = stats.gaussian_kde(Z.T)
gammatilde.set_bandwidth(bw_method='silverman')
gammatildeZ = gammatilde.evaluate(Z.T)

p4 = 10
maxit4 = int(2 * N / p4)
ops2 = sample_ops()
ops2.name = 'dpp'
dpp = MultivariateJacobiOPE(p4, jac_params)
eig_vals, eig_vecs, diagKtilde = generate_DPP_kernel(Z,N,p4,dpp,gammatildeZ)
ops2.kernel_diag = diagKtilde
ops2.DPP_list = generate_DPP_list_of_samples(eig_vals, eig_vecs, maxit4)

theta4, loss_total4, grad_total4, error4 = MiniBatchSGD(X, y, theta0, loss_type=losstype, elambda=lambda_input,
                     batch_size=p4, maxiter=maxit4, sampleops=ops2, thetastar=theta_direct)

In [None]:
plt.rcParams.update({'font.size': 20})
plot1 = plt.figure(3)
plt.plot(range(0,maxit4*p4,p4),loss_total4, label='dpp_p10')
plt.xlabel('budget')
plt.ylabel('function value')
plt.legend(fontsize=20)
plt.show()

plt.rcParams.update({'font.size': 20})
plot2 = plt.figure(4)
plt.plot(range(0,maxit4*p4,p4),grad_total4, label='dpp_p10')
plt.xlabel('budget')
plt.ylabel('norm of gradient')
plot2.suptitle('Norm of gradient v.s. budget')
plt.legend(fontsize=20)
plt.yscale('log')
plt.show()

plt.rcParams.update({'font.size': 20})
plot3 = plt.figure(5)
plt.plot(range(0,maxit4*p4,p4),error4, label='dpp_p10')
plt.xlabel('budget')
plt.ylabel('$||\Theta_t-\Theta_*||_2$')
plt.legend(fontsize=20)
plt.yscale('log')
plt.show()