In [None]:
## K-Center and MEB Testing
## Karan Vombatkere, Dec 2021

#Imports
import numpy as np

import sys
sys.path.append('../algorithms/')

import coreset_util as c_util
import coreset_kcenter as c_kcenter
import coreset_meb as c_meb
import coreset_median as c_median
import helper_functions as hf



In [None]:
#Function to import data from data folder
def import_dataset(dataset_name, col_list):
    '''
    Import input data from data folder
    Args:
        dataset_name: csv file name, stored in data folder
        col_list: list of columns to compute Coreset on
    '''
    coreset_data = c_util.Coreset_Util(dataset_name, col_list)
    test_data = coreset_data.X_array

    return test_data


#Code to run k-center implementation and return metrics
def run_kcenter(x_arr, k_val, epsilon):
    '''
    Test kcenter class and generate relevant metrics on input data
    Args:
        x_arr: input data - numpy ndarray
        k_val: k value for number of cluster centers
        epsilon: epsilon value
    '''
    #Initialize kcenter coreset object with parameters
    print("============================================================================")
    kcenter_obj = c_kcenter.Coreset_kCenter(x_arr, k_val, epsilon)
    computed_coreset = kcenter_obj.compute_kCenter_Coreset()

    kcenter_cost = kcenter_obj.R_val
    min_range = (1-epsilon)*kcenter_cost
    max_range = (1+epsilon)*kcenter_cost

    #Initialize kcenter coreset object with coreset
    print("---------------------------------------------------------------------------")
    print('Computing centers on Coreset')
    kcenter_coreset_obj = c_kcenter.Coreset_kCenter(computed_coreset, k_val, epsilon)
    kcenter_coreset_obj.greedy_kcenter()
    coreset_kcenter_cost = kcenter_coreset_obj.R_val

    print("(1+e) approximation range = [{:.1f}, {:.1f}]".format(min_range, max_range))

    return None
    



In [None]:
#Specify testing parameters
dataset_name = 'UNdata_CO2Emissions.csv'
col_list = ['Year', 'Value']
UN_data = import_dataset(dataset_name, col_list)

#Specify parameters
k = 6
epsilon = 0.4
num_iterations = 2


for i in range(num_iterations):
    #Run k-center
    run_kcenter(UN_data, k, epsilon)

In [None]:
# generate 5 gaussian clusters
rng = np.random.default_rng(12345)

means1 = [[5, 5], [-6, -4], [0, 0], [8,8], [4,-2]]
covs1 = [np.array([[1, 0], [0, 1]]), np.array([[1, 0], [0, 1]]), np.array([[7, 0], [0, 1]]), np.array([[1, 0], [0, 1]]), np.array([[1, 0], [0, 1]])]
gaussian_stream = hf.simulate_gaussian_clusters(rng, [2000, 2000, 2000, 2000, 2000], 5, means1, covs1)

In [None]:
#Test kcenter on synthetic random data
# x_arr = []
# for i in range(1000):
#     x_val, y_val, z_val = np.random.randint(-100,101), np.random.randint(-100,101), np.random.randint(-100,101)
#     x_arr.append([x_val, y_val, z_val])

test_kcenter = c_kcenter.Coreset_kCenter(gaussian_stream, 5, 0.4, True)
test_coreset = test_kcenter.compute_kCenter_Coreset()


In [None]:
#Test MEB on synthetic data
eps=0.05
plotting=True

x_arr = []

# for i in range(10000):
#     x_val, y_val = np.random.normal(5,10), np.random.normal(0,4)
#     x_arr.append([x_val, y_val])

for i in range(10000):
    x_val, y_val,  = np.random.randint(-100,101), np.random.randint(-100,101)
    x_arr.append([x_val, y_val])

meb_test = c_meb.Coreset_MinimumEnclosingBall(x_arr, eps, plotting)
meb_test.compute_minimumEnclosingBall()