In [None]:
import numpy as np
import random
import matplotlib.pyplot as plt
from dppy.multivariate_jacobi_ope import MultivariateJacobiOPE
import scipy.stats as stats
from scipy.stats import multivariate_normal
from scipy.sparse.linalg.eigen.arpack import eigsh as largest_eigsh
from scipy.io import savemat
from scipy.io import loadmat
from scipy.optimize import minimize
from dppy.finite_dpps import FiniteDPP
import math
import array
from random import seed
from random import randint
from scipy.stats.distributions import chi2

In [None]:
from Data_generation import generate_data_uniform
from Data_generation import generate_data_beta
from Data_generation import generate_data_mixture_Gaussian
from Jacobi_parameter import fit_Jacobi_parameters

In [None]:
# Function for dividing the data into batches
def get_batches(X,y,batch_size,row,sampleops,i):
    if sampleops.name == 'iid':
        idx = random.sample(range(row), batch_size)
    if sampleops.name == 'dpp':
        idx = sampleops.DPP_list[i]
    idx = np.sort(idx)
    X_new = X[idx,:]
    y_new = y[idx]
    return X_new, y_new, idx

# Function for computing the gradient
def get_gradient(X,y,theta,weight,loss_type,elambda):
    hypothesis = np.dot(X, theta)
    if loss_type == 'linear_regression':
        loss = hypothesis - y
    elif loss_type == 'logistic_regression':
        loss = - y * (1 - 1 / (1 + np.exp(- hypothesis * y)))
    gradient = np.dot(X.T, weight * loss) + elambda * theta
    return gradient

# Function for generating the alternative gradient
def get_gradient_dppway2(X,y,theta,batch_size,loss_type,elambda,sampleops):
    N, d = X.shape
    hypothesis = np.dot(X, theta)
    if loss_type == 'linear_regression':
        loss = hypothesis - y
    elif loss_type == 'logistic_regression':
        loss = - y * (1 - 1 / (1 + np.exp(- hypothesis * y)))
    Xsample = dpp.sample()
    gradient = 0
    for i in range(batch_size):
        xsam = Xsample[i,:]
        tmp = np.dot(np.ones((N,1)),np.reshape(xsam,(1,d))) - X
        weight = np.reshape(sampleops.var.pdf(tmp), (N,1))
        nablahat = np.dot(X.T, weight * loss) / N
        gradient = gradient + nablahat / sampleops.dpp.K(xsam, eval_pointwise=False) / sampleops.dpp.eval_w(xsam)
    gradient = gradient + elambda * theta
    return gradient

# Function for computing the function value
def get_fun_value(X,y,theta,N,loss_type,elambda):
    hypothesis = np.dot(X, theta)
    if loss_type == 'linear_regression':
        loss = hypothesis - y
        fun_value = 0.5 * np.dot(loss.T,loss) / N + 0.5 * elambda * np.dot(theta.T,theta)
    elif loss_type == 'logistic_regression':
        fun_value = np.sum(np.log(1 + np.exp(-hypothesis * y))) / N + 0.5 * elambda * np.dot(theta.T,theta)
    return fun_value

# Function for generating the DPP kernel for first way of gradient estimation
def generate_DPP_kernel(X,N,p,dpp,gammatildeX):
    Kq = dpp.K(X, eval_pointwise=False)
    qX = dpp.eval_w(X)
    D = np.diag(np.sqrt(np.divide(qX, gammatildeX)))
    Ktilde = 1. / N * D @ Kq @ D
    evals_large_sparse, evecs_large_sparse = largest_eigsh(Ktilde, p, which='LM')
    evals_large_sparse = np.ones(p)
    Ktilde = np.dot(evecs_large_sparse,evecs_large_sparse.T)
    diagKtilde = np.diag(Ktilde)
    return evals_large_sparse, evecs_large_sparse, diagKtilde

# Function for sampling the finite DPP
def generate_DPP_list_of_samples(eig_vals, eig_vecs, maxit):
    DPP = FiniteDPP(kernel_type='correlation',projection=True,
                    **{'K_eig_dec': (eig_vals, eig_vecs)})
    for _ in range(maxit):
        DPP.sample_exact(mode='GS')
    return DPP.list_of_samples

In [None]:
class sample_ops:
    def __init__(self):
        self.name = []
losstype = 'linear_regression'

In [None]:
N, d = 1000, 1
X, y = generate_data_uniform(N, d)
y = np.ones((N,1))
Z = X

lambda_input = 0

inv = np.linalg.inv(np.dot(X.T, X) + N * lambda_input * np.identity(d))
theta_direct = np.dot(inv, np.dot(X.T, y))
gradient = get_gradient(X,y,theta_direct,(1 / N) * np.ones((N, 1)),losstype,lambda_input)
norm_gradient = np.linalg.norm(gradient, 2)
print('True solution obtained, with norm of gradient = ',norm_gradient)

jac_params = fit_Jacobi_parameters(Z)
gammatilde = stats.gaussian_kde(Z.T)
gammatilde.set_bandwidth(bw_method='silverman')
gammatildeZ = gammatilde.evaluate(Z.T)

theta = theta_direct
batch_size_list = np.array([5,10,15,20,25,30,35,40])
compute_grad_num = 1000

grad_var_iid = np.array(len(batch_size_list)*[[0.0]])
grad_var_dpp = np.array(len(batch_size_list)*[[0.0]])
for k in range(len(batch_size_list)):
    batch_size = int(batch_size_list[k])

    dpp = MultivariateJacobiOPE(batch_size, jac_params)
    eig_vals, eig_vecs, diagKtilde = generate_DPP_kernel(Z,N,batch_size,dpp,gammatildeZ)

    weight_iid = (1 / batch_size) * np.ones((N, 1))
    weight_dpp = np.reshape(1 / diagKtilde / N, (N,1))

    for j in range(compute_grad_num):
        idx = np.sort(random.sample(range(N), batch_size))
        grad_iid_tmp = get_gradient(X[idx,:],y[idx],theta,weight_iid[idx],losstype,lambda_input)
        if j == 0:
            grad_iid = grad_iid_tmp
        else:
            grad_iid = np.hstack((grad_iid, grad_iid_tmp)) 
    grad_var_iid[k] = np.var(grad_iid, ddof=1)

    DPP_list = generate_DPP_list_of_samples(eig_vals, eig_vecs, compute_grad_num)
    for j in range(compute_grad_num):
        idx = np.sort(DPP_list[j])
        grad_dpp_tmp = get_gradient(X[idx,:],y[idx],theta,weight_dpp[idx],losstype,lambda_input)
        if j == 0:
            grad_dpp = grad_dpp_tmp
        else:
            grad_dpp = np.hstack((grad_dpp, grad_dpp_tmp)) 
    grad_var_dpp[k] = np.var(grad_dpp, ddof=1)
    print(batch_size,' finished')
    
A = chi2.ppf(0.05/2, df=compute_grad_num-1)
B = chi2.ppf(1-0.05/2, df=compute_grad_num-1)
var_high = (compute_grad_num-1) / A * grad_var_dpp
var_low = (compute_grad_num-1) / B * grad_var_dpp
xmle = np.reshape(np.log(batch_size_list),(len(batch_size_list),1)) 
ymle = np.reshape(np.log(grad_var_dpp),(len(batch_size_list),1)) 
xmle = np.hstack((xmle,np.ones((len(batch_size_list),1))))
mle_para = np.reshape(np.dot(np.linalg.inv(np.dot(xmle.T, xmle)), np.dot(xmle.T, ymle)),(2,))
mle_y = np.exp(mle_para[0]*np.log(batch_size_list)+mle_para[1])
mle_slope = mle_para[0]

plt.rcParams.update({'font.size': 20})
fig, ax = plt.subplots() 
plt.loglog(batch_size_list,mle_y,'g-',label='MLE slope=%.2f' %mle_slope)
plt.loglog(batch_size_list,grad_var_iid, 'o', markeredgecolor='k', markerfacecolor='w', markersize=5)
plt.loglog(batch_size_list,grad_var_dpp,'ko', markersize=3)
plt.loglog(batch_size_list,var_high,'ro', markersize=2)
plt.loglog(batch_size_list,var_low,'bo', markersize=2)
plt.xlabel('batch size')
plt.ylabel('variance')
ax.set_xticks( [5,50] )
plt.xticks( ticks = [10,40], labels = ['$10^{1}$' , r'$4 \times 10^{1}$'] )
plt.legend(fontsize=18,loc='best')
plt.show()

In [None]:
N, d = 1000, 1
X, y = generate_data_uniform(N, d)
# X ,y = generate_data_mixture_Gaussian(N, d, 4)
y = y + np.random.normal(0.1, 0.5, size=(N, 1))
y = np.minimum(np.maximum(y,-0.95),0.95)
Z = np.concatenate((X,y),axis = 1)

lambda_input = 0

inv = np.linalg.inv(np.dot(X.T, X) + N * lambda_input * np.identity(d))
theta_direct = np.dot(inv, np.dot(X.T, y))
gradient = get_gradient(X,y,theta_direct,(1 / N) * np.ones((N, 1)),losstype,lambda_input)
norm_gradient = np.linalg.norm(gradient, 2)
print('True solution obtained, with norm of gradient = ',norm_gradient)

jac_params = fit_Jacobi_parameters(Z)
gammatilde = stats.gaussian_kde(Z.T)
gammatilde.set_bandwidth(bw_method='silverman')
gammatildeZ = gammatilde.evaluate(Z.T)

theta = theta_direct
batch_size_list = np.array([10,20,30,40,50,60,70,80,90,100])
compute_grad_num = 1000

grad_var_iid = np.array(len(batch_size_list)*[[0.0]])
grad_var_dpp = np.array(len(batch_size_list)*[[0.0]])
for k in range(len(batch_size_list)):
    batch_size = int(batch_size_list[k])

    dpp = MultivariateJacobiOPE(batch_size, jac_params)
    eig_vals, eig_vecs, diagKtilde = generate_DPP_kernel(Z,N,batch_size,dpp,gammatildeZ)

    weight_iid = (1 / batch_size) * np.ones((N, 1))
    weight_dpp = np.reshape(1 / diagKtilde / N, (N,1))

    for j in range(compute_grad_num):
        idx = np.sort(random.sample(range(N), batch_size))
        grad_iid_tmp = get_gradient(X[idx,:],y[idx],theta,weight_iid[idx],losstype,lambda_input)
        if j == 0:
            grad_iid = grad_iid_tmp
        else:
            grad_iid = np.hstack((grad_iid, grad_iid_tmp)) 
    grad_var_iid[k] = np.var(grad_iid, ddof=1)

    DPP_list = generate_DPP_list_of_samples(eig_vals, eig_vecs, compute_grad_num)
    for j in range(compute_grad_num):
        idx = np.sort(DPP_list[j])
        grad_dpp_tmp = get_gradient(X[idx,:],y[idx],theta,weight_dpp[idx],losstype,lambda_input)
        if j == 0:
            grad_dpp = grad_dpp_tmp
        else:
            grad_dpp = np.hstack((grad_dpp, grad_dpp_tmp)) 
    grad_var_dpp[k] = np.var(grad_dpp, ddof=1)
    print(batch_size,' finished')

A = chi2.ppf(0.05/2, df=compute_grad_num-1)
B = chi2.ppf(1-0.05/2, df=compute_grad_num-1)
var_high = (compute_grad_num-1) / A * grad_var_dpp
var_low = (compute_grad_num-1) / B * grad_var_dpp
xmle = np.reshape(np.log(batch_size_list),(len(batch_size_list),1)) 
ymle = np.reshape(np.log(grad_var_dpp),(len(batch_size_list),1)) 
xmle = np.hstack((xmle,np.ones((len(batch_size_list),1))))
mle_para = np.reshape(np.dot(np.linalg.inv(np.dot(xmle.T, xmle)), np.dot(xmle.T, ymle)),(2,))
mle_y = np.exp(mle_para[0]*np.log(batch_size_list)+mle_para[1])
mle_slope = mle_para[0]

plt.rcParams.update({'font.size': 20})
fig, ax = plt.subplots() 
plt.loglog(batch_size_list,mle_y,'g-',label='MLE slope=%.2f' %mle_slope)
plt.loglog(batch_size_list,grad_var_iid, 'o', markeredgecolor='k', markerfacecolor='w', markersize=5)
plt.loglog(batch_size_list,grad_var_dpp,'ko', markersize=3)
plt.loglog(batch_size_list,var_high,'ro', markersize=2)
plt.loglog(batch_size_list,var_low,'bo', markersize=2)
plt.xlabel('batch size')
plt.ylabel('variance')
plt.legend(fontsize=18,loc='best')
plt.show()

In [None]:
N, d = 1000, 2
X, y = generate_data_uniform(N, d)
# X ,y = generate_data_mixture_Gaussian(N, d, 4)
y = y + np.random.normal(0.1, 0.5, size=(N, 1))
y = np.minimum(np.maximum(y,-0.95),0.95)
Z = np.concatenate((X,y),axis = 1)

lambda_input = 0

inv = np.linalg.inv(np.dot(X.T, X) + N * lambda_input * np.identity(d))
theta_direct = np.dot(inv, np.dot(X.T, y))
gradient = get_gradient(X,y,theta_direct,(1 / N) * np.ones((N, 1)),losstype,lambda_input)
norm_gradient = np.linalg.norm(gradient, 2)
print('True solution obtained, with norm of gradient = ',norm_gradient)

jac_params = fit_Jacobi_parameters(Z)
gammatilde = stats.gaussian_kde(Z.T)
gammatilde.set_bandwidth(bw_method='silverman')
gammatildeZ = gammatilde.evaluate(Z.T)

theta = theta_direct
batch_size_list = np.array([10,20,30,40,50,60,70,80,90,100])
compute_grad_num = 1000

grad_var_iid = np.array(len(batch_size_list)*[[0.0]])
grad_var_dpp = np.array(len(batch_size_list)*[[0.0]])
for k in range(len(batch_size_list)):
    batch_size = int(batch_size_list[k])

    dpp = MultivariateJacobiOPE(batch_size, jac_params)
    eig_vals, eig_vecs, diagKtilde = generate_DPP_kernel(Z,N,batch_size,dpp,gammatildeZ)

    weight_iid = (1 / batch_size) * np.ones((N, 1))
    weight_dpp = np.reshape(1 / diagKtilde / N, (N,1))

    for j in range(compute_grad_num):
        idx = np.sort(random.sample(range(N), batch_size))
        grad_iid_tmp = get_gradient(X[idx,:],y[idx],theta,weight_iid[idx],losstype,lambda_input)
        if j == 0:
            grad_iid = grad_iid_tmp
        else:
            grad_iid = np.hstack((grad_iid, grad_iid_tmp)) 
    grad_var_iid[k] = np.trace(np.cov(grad_iid, ddof=1))

    DPP_list = generate_DPP_list_of_samples(eig_vals, eig_vecs, compute_grad_num)
    for j in range(compute_grad_num):
        idx = np.sort(DPP_list[j])
        grad_dpp_tmp = get_gradient(X[idx,:],y[idx],theta,weight_dpp[idx],losstype,lambda_input)
        if j == 0:
            grad_dpp = grad_dpp_tmp
        else:
            grad_dpp = np.hstack((grad_dpp, grad_dpp_tmp)) 
    grad_var_dpp[k] = np.trace(np.cov(grad_dpp, ddof=1))
    print(batch_size,' finished')

A = chi2.ppf(0.05/2, df=compute_grad_num-1)
B = chi2.ppf(1-0.05/2, df=compute_grad_num-1)
var_high = (compute_grad_num-1) / A * grad_var_dpp
var_low = (compute_grad_num-1) / B * grad_var_dpp
xmle = np.reshape(np.log(batch_size_list),(len(batch_size_list),1)) 
ymle = np.reshape(np.log(grad_var_dpp),(len(batch_size_list),1)) 
xmle = np.hstack((xmle,np.ones((len(batch_size_list),1))))
mle_para = np.reshape(np.dot(np.linalg.inv(np.dot(xmle.T, xmle)), np.dot(xmle.T, ymle)),(2,))
mle_y = np.exp(mle_para[0]*np.log(batch_size_list)+mle_para[1])
mle_slope = mle_para[0]
plt.rcParams.update({'font.size': 20})
fig, ax = plt.subplots() 
plt.loglog(batch_size_list,mle_y,'g-',label='MLE slope=%.2f' %mle_slope)
plt.loglog(batch_size_list,grad_var_iid, 'o', markeredgecolor='k', markerfacecolor='w', markersize=5)
plt.loglog(batch_size_list,grad_var_dpp,'ko', markersize=3)
plt.loglog(batch_size_list,var_high,'ro', markersize=2)
plt.loglog(batch_size_list,var_low,'bo', markersize=2)
plt.xlabel('batch size')
plt.ylabel('variance')
plt.legend(fontsize=18,loc='best')
plt.show()

A = chi2.ppf(0.05/2, df=compute_grad_num-1)
B = chi2.ppf(1-0.05/2, df=compute_grad_num-1)
var_high = (compute_grad_num-1) / A * grad_var_dpp
var_low = (compute_grad_num-1) / B * grad_var_dpp
xmle = np.reshape(np.log(batch_size_list),(len(batch_size_list),1)) 
ymle = np.reshape(np.log(grad_var_dpp),(len(batch_size_list),1)) 
xmle = np.hstack((xmle,np.ones((len(batch_size_list),1))))
xmle = xmle[1:len(batch_size_list),:]
ymle = ymle[1:len(batch_size_list),:]
mle_para = np.reshape(np.dot(np.linalg.inv(np.dot(xmle.T, xmle)), np.dot(xmle.T, ymle)),(2,))
mle_y = np.exp(mle_para[0]*np.log(batch_size_list[1:len(batch_size_list)])+mle_para[1])
mle_slope = mle_para[0]
plt.rcParams.update({'font.size': 20})
fig, ax = plt.subplots() 
plt.loglog(batch_size_list[1:len(batch_size_list)],mle_y,'g-',label='MLE slope=%.2f' %mle_slope)
plt.loglog(batch_size_list,grad_var_iid, 'o', markeredgecolor='k', markerfacecolor='w', markersize=5)
plt.loglog(batch_size_list,grad_var_dpp,'ko', markersize=3)
plt.loglog(batch_size_list,var_high,'ro', markersize=2)
plt.loglog(batch_size_list,var_low,'bo', markersize=2)
plt.xlabel('batch size')
plt.ylabel('variance')
plt.legend(fontsize=18,loc='best')
plt.show()