In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import euclidean_distances
from numpy import linalg as la
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans
from dppy.finite_dpps import FiniteDPP
import seaborn as sns
import scipy.io as sio
import time
import matplotlib.pyplot as plt 


# To plot consistent and pretty figures
%matplotlib inline
import matplotlib as mpl
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)
mpl.rcParams['font.family'] = 'times new roman'
mpl.rcParams['lines.linewidth'] = 2
mpl.rcParams['figure.dpi'] = 120
mpl.rcParams['savefig.dpi'] = 120
import matplotlib
matplotlib.axes.Axes.contour
matplotlib.pyplot.contour
matplotlib.axes.Axes.contourf
matplotlib.pyplot.contourf
matplotlib.figure.Figure.colorbar
matplotlib.pyplot.colorbar
matplotlib.axes.Axes.legend
matplotlib.pyplot.legend
matplotlib.contour.ContourSet
matplotlib.contour.ContourSet.legend_elements
def landmark_uniform(X, m):
    '''Uniform landmark selection'''
    n = len(X)
    ind = np.random.choice(n, m, replace=False)
    return X[ind]    

def landmark_kmeans(X,m):
    '''K-means landmark selection'''
    return KMeans(n_clusters=m, max_iter=100).fit(X).cluster_centers_

def landmark_dpp(K_org,X,max_eig,m):
    '''DPP landmark selection'''
    DPP = FiniteDPP(kernel_type='correlation',
                projection=False,
                **{'K': K_org/max_eig})
    try:
        ind = DPP.sample_mcmc_k_dpp(size=m)
        return X[ind] 
    except: 
        return np.ones((m,X.shape[1]))

def landmark_importanceSampling(X,sigma,m,frac,omega):
    '''Proposed importance sampling landmark selection
       Coarse-to-fine landmark selection strategy:
       m/2: uniform 
       m/2: based on the importance sampling distribution
    '''
    n = len(X)
    n_important = np.int(np.floor(frac*n))
    
    # n0: uniform, n1: K-means
    n0, n1 = np.int(np.floor(m/2)), np.int(np.ceil(m/2))
    
    # uniform 
    ind0 = np.random.choice(n, n0, replace=False)
    Z1 = X[ind0]
    
    # K-means 
    ind_remain = np.delete(np.arange(n), ind0)
    X_sub = X[ind_remain] # now, we work with the subsampled data set
    Dist = euclidean_distances(X_sub,Z1, squared=False) / sigma
    DistMin = Dist.min(axis=1)
    Prob = (1-omega)/len(ind_remain) + omega*(DistMin/np.sum(DistMin))
    ind1 = np.random.choice(len(X_sub), size=n_important, replace=False, p=Prob)
    Z2 = KMeans(n_clusters=n1, max_iter=100).fit(X_sub[ind1]).cluster_centers_
    
    # concatenate
    Z = np.concatenate((Z1,Z2),axis=0) 
    
    return Z 

def nystrom(X,Z,sigma,r):
    '''Computing low-rank approximation using the Nystrom method'''
    C = np.exp(-euclidean_distances(X,Z,squared=True)/(sigma**2))
    W = np.exp(-euclidean_distances(Z,squared=True)/(sigma**2))
    W = (W + W.T)/2
    Q, R = la.qr(C, mode='reduced')
    V, Sigma, _ = la.svd(la.multi_dot([R, np.linalg.pinv(W), np.transpose(R)]))
    EigVecNys = np.matmul(Q, V[:,:r])
    EigValNys = Sigma[:r]
    return EigVecNys, EigValNys