# Blurred Ball Cover

Data Source: https://archive.ics.uci.edu/ml/datasets/Epileptic+Seizure+Recognition

# Import Modules and Packages

In [123]:
%matplotlib inline
import numpy as np
import scipy as sp
import matplotlib.pyplot as plt
import random
from collections import namedtuple
import numpy.linalg as npl
from sets import Set

# Declare Data Structures

In [124]:
Ball = namedtuple("Ball", ["center", "radius"])
BlurredBall = namedtuple("BlurredBall", ["k", "MEB"])

# Methods to Generate and Read Data

In [125]:
def generate_normal_data(num_of_points, num_of_dimensions):
    mean = random.sample(np.array(range(100)), num_of_dimensions)
    scalar_variances = 10.0 * np.ones(num_of_dimensions)
    covariance = np.diag(scalar_variances)
    points = np.random.multivariate_normal(mean, covariance, num_of_points)
    return points

def generate_multivariate_data(num_of_points, num_of_dimensions):
    probabilities = [float(1.0 / num_of_dimensions)] * num_of_dimensions
    points = np.random.multinomial(20, probabilities, size = num_of_points)
    return points

def read_data(data_file):
    data = []
    with open(data_file) as data_reader:
        lines = data_reader.readlines()
        for line in lines:
            values = line.split()
            #values = values[1:]
            values = values[:400]
            row = [float(value) for value in values]
            data.append(row)
        data = np.array(data)
        print(data.shape)
        return data, data.shape[0], data.shape[1]

def write_blurred_balls(blurred_balls, file_name):
    N = len(blurred_balls)
    D = num_of_dimensions

    with open(file_name, "w") as outFile:
        outFile.write(repr(N) + " " + repr(D) + "\n")
        for ball in blurred_balls:
            print ball.MEB.center, ball.MEB.radius
            line = ""
            for d in range(D):
                line += repr(ball.MEB.center[d]) + " "
            line += repr(ball.MEB.radius) + "\n"
            outFile.write(line)

# Helper Functions for Computing Clarkson's (2/epsilon)-coreset and MEB 

In [126]:
def inside_ball_eps(blurred_ball, point, epsilon):
    #print "inside_ball"
    center, radius = blurred_ball.MEB
    distance = npl.norm(point - center)
    if distance <= (1 + epsilon) * radius:
        return True
    else:
        return False

In [127]:
def approx_meb_light(points, epsilon):
    coreset = []
    num_of_points = len(points)
    furthest_point = None
    center = points[0]
    radius = 0.0
    num_iterations = int(1.0 / (epsilon * epsilon))
    distances = {}
    prev_radius = -10000.0
    coreset = [points[0]]
    for i in range(1, num_iterations):
        for j in range(num_of_points):
            distances[j] = npl.norm(points[j] - center)
        furthest_point_index = max(distances, key = distances.get)
        furthest_point = points[furthest_point_index]
        furthest_distance = distances[furthest_point_index]

        center = center + (1.0 / i) * (furthest_point - center)   
        #if abs(furthest_distance - prev_radius) < 0.01:
        #    break
        prev_radius = furthest_distance
            
    radius = npl.norm(furthest_point - center)
    meb = Ball(center, radius)
    return meb

In [128]:
def epsilon_coreset(points, epsilon):
    coreset = []
    num_of_points = len(points)
    furthest_point = None
    center = points[0]
    radius = 0.0

    num_iterations = int(2.0 / (epsilon))
    print("Total Iterations: " + repr(num_iterations))
    distances = {}
    coreset_indices = Set()

    prev_radius = -10000.0
    
    coreset = [points[0]]
    for i in range(1, num_iterations):
        print("Iteration: ", i, " radius: ", prev_radius)
        # Find furthest point
        for j in range(num_of_points):
            distances[j] = npl.norm(points[j] - center)
        # Calculated furthest point    
        furthest_point_index = max(distances, key = distances.get)
        furthest_point = points[furthest_point_index]
        furthest_distance = distances[furthest_point_index]
        if furthest_point_index in coreset_indices:
            continue
        coreset_indices.add(furthest_point_index)
        coreset.append(furthest_point)
        coreset_meb = approx_meb_light(coreset, 0.01)
        center = coreset_meb.center
        radius = coreset_meb.radius
        #if radius - prev_radius < 0.001:
        #    break
        prev_radius = radius
        #print center, coreset_meb.radius
        
    return coreset

# Clarkson's 2/epsilon coreset and MEB Algorithm

In [129]:
def approx_meb(points, epsilon):
    coreset = epsilon_coreset(points, epsilon)
    meb = approx_meb_light(coreset, epsilon)
    return BlurredBall(coreset, meb)
    

In [130]:
points, num_of_points, num_of_dimensions = read_data("human_activity.txt")
np.random.shuffle(points)
print num_of_points, num_of_dimensions
blurred_ball = approx_meb(points, 0.1)
print blurred_ball.MEB.radius

(7352, 400)
7352 400
Total Iterations: 20
('Iteration: ', 1, ' radius: ', -10000.0)
('Iteration: ', 2, ' radius: ', 11.952528556439267)
('Iteration: ', 3, ' radius: ', 12.418314785828446)
('Iteration: ', 4, ' radius: ', 12.51540587642973)
('Iteration: ', 5, ' radius: ', 12.56190772180674)
('Iteration: ', 6, ' radius: ', 12.591252625389943)
('Iteration: ', 7, ' radius: ', 12.601211438633202)
('Iteration: ', 8, ' radius: ', 12.613647424040412)
('Iteration: ', 9, ' radius: ', 12.616084766165375)
('Iteration: ', 10, ' radius: ', 12.618068042066445)
('Iteration: ', 11, ' radius: ', 12.620364521456677)
('Iteration: ', 12, ' radius: ', 12.620364521456677)
('Iteration: ', 13, ' radius: ', 12.620364521456677)
('Iteration: ', 14, ' radius: ', 12.620364521456677)
('Iteration: ', 15, ' radius: ', 12.620364521456677)
('Iteration: ', 16, ' radius: ', 12.620364521456677)
('Iteration: ', 17, ' radius: ', 12.620364521456677)
('Iteration: ', 18, ' radius: ', 12.620364521456677)
('Iteration: ', 19, ' rad

In [131]:
for point in points:
    if inside_ball_eps(blurred_ball, point, 0.05) == False:
        print "Missed"
        break
print num_of_points, num_of_dimensions

7352 400


# Blurred Ball Cover Algorithm's Helper Method

In [132]:
def update(blurred_balls, A, epsilon):
    print "update"
    K = []
    for blurred_ball in blurred_balls:
        K.extend(blurred_ball.k)


    covered_flag = True    
    for point in A:
        point_flag = False
        for blurred_ball in blurred_balls:
            if inside_ball_eps(blurred_ball, point, epsilon) == True:
                point_flag = True
                break
        if point_flag == False:
            covered_flag = False
            break
    
                
    if covered_flag == True:
        print("This point is covered")
        return blurred_balls
    #else:
        #print("This point is not covered")
        
    K_union_A = list(K)
    K_union_A.extend(A)
    blurred_ball_new = approx_meb(K_union_A, epsilon / 3.0)
    
    discardables = []
    for blurred_ball in blurred_balls:
        lhs = blurred_ball.MEB.radius
        rhs = epsilon * blurred_ball_new.MEB.radius / 4.0
        #print("lhs <> rhs: " + repr(lhs) + " <> " + repr(rhs))
        if blurred_ball.MEB.radius <= (epsilon * blurred_ball_new.MEB.radius / 4.0):
            print "Found a discardable"
            discardables.append(blurred_ball)
    
    blurred_balls = [bb for bb in blurred_balls if bb not in np.array(discardables)]
    
    blurred_balls.append(blurred_ball_new)
    
    return blurred_balls
        

# Blurred Ball Cover Algorithm

In [133]:
def agarwal_single_scan(points, largest_ball, num_of_points, num_of_dimensions, epsilon):
    batch_size = 100
    blurred_balls = []
    initial_points = points[0:num_of_dimensions, :]
    if largest_ball == None:
        blurred_ball_init = approx_meb(initial_points, epsilon / 3.0)
        blurred_balls = [blurred_ball_init]
    else:
        blurred_balls = [largest_ball]
            
    for i in range(num_of_dimensions, num_of_points, batch_size):
        print "Iteration: " + repr(i)
        sid = i
        fid = min(i + batch_size, num_of_points)
        A = points[sid : fid, :]
        blurred_balls = update(blurred_balls, A, epsilon)
        
    return blurred_balls

In [134]:
def agarwal_multi_scan(points, num_of_points, num_of_dimensions, num_of_scans, epsilon):
    if num_of_scans == 1:
        return agarwal_single_scan(points, None, num_of_points, num_of_dimensions, epsilon)
    else:
        largest_ball = None
        
        for i in range(1, num_of_scans + 1):
            print "Running scan: " + repr(i)
            blurred_balls = agarwal_single_scan(points, largest_ball, num_of_points, num_of_dimensions, epsilon)
            write_blurred_balls(blurred_balls, "activity" + repr(i) + ".txt")
            largest_ball = blurred_balls[-1]
    return blurred_balls
            
    

# Drawing Utility Methods : For Debug and Visualization Purpose

In [135]:
def draw_blurred_balls(blurred_balls):
    xmin = min(points[:, 0])
    xmax = max(points[:, 0])
    ymin = min(points[:, 1])
    ymax = max(points[:, 1])

    print(xmin, xmax, ymin, ymax)
    # ball <- \phi
    ball = None
    count = 0
    processed_points = []
    fig, ax = plt.subplots(1, 1) 
    #ax.set_xlim(xmin - 10, xmax + 10)
    #ax.set_ylim(ymin - 10, ymax + 10)
    ax.set_aspect('equal', 'datalim')
    ax.set_xlim(xmin - 10, xmax + 10)
    ax.set_ylim(ymin - 10, ymax + 10)


    X = points[:, 0]
    Y = points[:, 1]
    scatters = plt.scatter(X, Y)

    for blurred_ball in blurred_balls:
        #print blurred_ball
        center, radius = blurred_ball.MEB
        circle = plt.Circle(center, radius, color='r', alpha = 0.1)
        ax.add_artist(circle)
        ax.add_artist(scatters)
    plt.show()

In [136]:
def draw_points_and_meb(points, meb):
    xmin = min(points[:, 0])
    xmax = max(points[:, 0])
    ymin = min(points[:, 1])
    ymax = max(points[:, 1])
    print(xmin, xmax, ymin, ymax)
    processed_points = []
    fig, ax = plt.subplots(1, 1) 
    #ax.set_xlim(xmin - 10, xmax + 10)
    #ax.set_ylim(ymin - 10, ymax + 10)
    ax.set_aspect('equal', 'datalim')
    ax.set_xlim(xmin - 50, xmax + 50)
    ax.set_ylim(ymin - 50, ymax + 50)
    X = points[:, 0]
    Y = points[:, 1]
    scatters = plt.scatter(X, Y)
    center = meb.center
    radius = meb.radius
    circle = plt.Circle(center, radius, color='r', alpha = 0.1)
    ax.add_artist(circle)

# Read Data from File

# Run Blurred Ball Cover Algorithm

In [137]:
blurred_balls = agarwal_multi_scan(points, num_of_points, num_of_dimensions, 4, 0.1)
#draw_blurred_balls(blurred_balls)

Running scan: 1
Total Iterations: 60
('Iteration: ', 1, ' radius: ', -10000.0)
('Iteration: ', 2, ' radius: ', 8.7248764487312283)
('Iteration: ', 3, ' radius: ', 9.2086710411081665)
('Iteration: ', 4, ' radius: ', 9.3494918419003508)
('Iteration: ', 5, ' radius: ', 9.4408381705058932)
('Iteration: ', 6, ' radius: ', 9.4632298643704207)
('Iteration: ', 7, ' radius: ', 9.4762119216422374)
('Iteration: ', 8, ' radius: ', 9.4912317412607941)
('Iteration: ', 9, ' radius: ', 9.498543282520032)
('Iteration: ', 10, ' radius: ', 9.4984944803664018)
('Iteration: ', 11, ' radius: ', 9.4994737211715865)
('Iteration: ', 12, ' radius: ', 9.4994737211715865)
('Iteration: ', 13, ' radius: ', 9.4994737211715865)
('Iteration: ', 14, ' radius: ', 9.4994737211715865)
('Iteration: ', 15, ' radius: ', 9.4994737211715865)
('Iteration: ', 16, ' radius: ', 9.4994737211715865)
('Iteration: ', 17, ' radius: ', 9.4994737211715865)
('Iteration: ', 18, ' radius: ', 9.4994737211715865)
('Iteration: ', 19, ' radius:



('Iteration: ', 2, ' radius: ', 9.4850073624425946)
('Iteration: ', 3, ' radius: ', 9.8071243423078833)
('Iteration: ', 4, ' radius: ', 10.228534325745107)
('Iteration: ', 5, ' radius: ', 10.376820248294081)
('Iteration: ', 6, ' radius: ', 10.419755453855494)
('Iteration: ', 7, ' radius: ', 10.426979728835246)
('Iteration: ', 8, ' radius: ', 10.429761109828325)
('Iteration: ', 9, ' radius: ', 10.430664885431332)
('Iteration: ', 10, ' radius: ', 10.43223468081128)
('Iteration: ', 11, ' radius: ', 10.43223468081128)
('Iteration: ', 12, ' radius: ', 10.43223468081128)
('Iteration: ', 13, ' radius: ', 10.43223468081128)
('Iteration: ', 14, ' radius: ', 10.43223468081128)
('Iteration: ', 15, ' radius: ', 10.43223468081128)
('Iteration: ', 16, ' radius: ', 10.43223468081128)
('Iteration: ', 17, ' radius: ', 10.43223468081128)
('Iteration: ', 18, ' radius: ', 10.43223468081128)
('Iteration: ', 19, ' radius: ', 10.43223468081128)
('Iteration: ', 20, ' radius: ', 10.43223468081128)
('Iteration:

('Iteration: ', 2, ' radius: ', 11.760167598776651)
('Iteration: ', 3, ' radius: ', 11.97551292355336)
('Iteration: ', 4, ' radius: ', 12.297557163714588)
('Iteration: ', 5, ' radius: ', 12.311427206580259)
('Iteration: ', 6, ' radius: ', 12.324232859111405)
('Iteration: ', 7, ' radius: ', 12.328149331574632)
('Iteration: ', 8, ' radius: ', 12.331170737660491)
('Iteration: ', 9, ' radius: ', 12.332310326188397)
('Iteration: ', 10, ' radius: ', 12.332038222108221)
('Iteration: ', 11, ' radius: ', 12.332780822343583)
('Iteration: ', 12, ' radius: ', 12.33250094749498)
('Iteration: ', 13, ' radius: ', 12.33250094749498)
('Iteration: ', 14, ' radius: ', 12.33250094749498)
('Iteration: ', 15, ' radius: ', 12.33250094749498)
('Iteration: ', 16, ' radius: ', 12.33250094749498)
('Iteration: ', 17, ' radius: ', 12.33250094749498)
('Iteration: ', 18, ' radius: ', 12.33250094749498)
('Iteration: ', 19, ' radius: ', 12.33250094749498)
('Iteration: ', 20, ' radius: ', 12.33250094749498)
('Iteration

  -2.78181868e-01  -5.32108922e-01  -7.60659446e-01  -5.23215160e-01] 10.9436884814
[ 0.42013683  0.05081239 -0.17458024 -0.10889082 -0.0215111  -0.21321413
 -0.12123907 -0.04838061 -0.21169737 -0.03946987  0.01976108 -0.36234123
  0.14502231  0.0787427   0.22502742  0.04603943 -0.16348888 -0.62523009
 -0.42213201 -0.20349833 -0.21278096 -0.31445397 -0.04738732  0.01350096
 -0.18532467 -0.08827261  0.03419212  0.01349188  0.23084456  0.05798909
 -0.00998128  0.14416122  0.12633239  0.25768475  0.0073195   0.03503773
  0.02242325 -0.29918318 -0.3208686   0.0250478   0.24145986  0.22137395
  0.09556683 -0.8600753  -0.77431106 -0.85682011 -0.86205475 -0.77580643
 -0.85341993  0.23558907  0.23990479  0.11317675  0.24041247  0.16485245
  0.06282596 -0.06195699 -0.15274386 -0.43479327 -0.58752951 -0.87162239
 -0.7821876  -0.8448737  -0.32092328 -0.74656395 -0.70522675 -0.56982702
  0.62617275 -0.68322855  0.74105258 -0.27669399  0.2931851  -0.36101891
  0.44856332 -0.02183775  0.12247708 -0.

update
This point is covered
Iteration: 4800
update
This point is covered
Iteration: 4900
update
This point is covered
Iteration: 5000
update
This point is covered
Iteration: 5100
update
This point is covered
Iteration: 5200
update
This point is covered
Iteration: 5300
update
This point is covered
Iteration: 5400
update
This point is covered
Iteration: 5500
update
This point is covered
Iteration: 5600
update
This point is covered
Iteration: 5700
update
This point is covered
Iteration: 5800
update
This point is covered
Iteration: 5900
update
This point is covered
Iteration: 6000
update
This point is covered
Iteration: 6100
update
This point is covered
Iteration: 6200
update
This point is covered
Iteration: 6300
update
This point is covered
Iteration: 6400
update
This point is covered
Iteration: 6500
update
This point is covered
Iteration: 6600
update
This point is covered
Iteration: 6700
update
This point is covered
Iteration: 6800
update
This point is covered
Iteration: 6900
update
Thi

# Inspect Blurred Ball Cover's Output

In [138]:
for i in range(len(blurred_balls)):
    print i, len(blurred_balls[i].k), blurred_balls[i].MEB.radius

0 12 12.3323910656


# Check if all the points are covered by the largest Ball

In [139]:
largest_ball = blurred_balls[-1]
epsilon = 0.1
covered_flag = True
for point in points:
    point_flag = False
    for blurred_ball in blurred_balls:
        if inside_ball_eps(blurred_ball, point, epsilon) == True:
            point_flag = True
    if point_flag == False:
        covered_flag = False
        break
        
if covered_flag == True:
    print "covered"
else:
    print "missed"

covered


# Write the Blurred Balls into a File

In [140]:
N = len(blurred_balls)
D = num_of_dimensions

with open("eplileptic_balls.txt", "w") as outFile:
    outFile.write(repr(N) + " " + repr(D) + "\n")
    for ball in blurred_balls:
        print ball.MEB.center, ball.MEB.radius
        line = ""
        for d in range(D):
            line += repr(ball.MEB.center[d]) + " "
        line += repr(ball.MEB.radius) + "\n"
        outFile.write(line)
        #print line

[ 0.42013683  0.05081239 -0.17458024 -0.10889082 -0.0215111  -0.21321413
 -0.12123907 -0.04838061 -0.21169737 -0.03946987  0.01976108 -0.36234123
  0.14502231  0.0787427   0.22502742  0.04603943 -0.16348888 -0.62523009
 -0.42213201 -0.20349833 -0.21278096 -0.31445397 -0.04738732  0.01350096
 -0.18532467 -0.08827261  0.03419212  0.01349188  0.23084456  0.05798909
 -0.00998128  0.14416122  0.12633239  0.25768475  0.0073195   0.03503773
  0.02242325 -0.29918318 -0.3208686   0.0250478   0.24145986  0.22137395
  0.09556683 -0.8600753  -0.77431106 -0.85682011 -0.86205475 -0.77580643
 -0.85341993  0.23558907  0.23990479  0.11317675  0.24041247  0.16485245
  0.06282596 -0.06195699 -0.15274386 -0.43479327 -0.58752951 -0.87162239
 -0.7821876  -0.8448737  -0.32092328 -0.74656395 -0.70522675 -0.56982702
  0.62617275 -0.68322855  0.74105258 -0.27669399  0.2931851  -0.36101891
  0.44856332 -0.02183775  0.12247708 -0.22276021  0.32016765  0.33960575
 -0.16546677 -0.01338341  0.22154487  0.03307224  0

In [141]:
import numpy as np
from sklearn.decomposition import PCA

def get_pca_transformer(points_hd):
    pca = PCA(n_components=2)
    pca.fit(points_hd)
    return pca

def get_blurred_ball_2d(blurred_ball, pca):
    points_hd = blurred_ball.k
    center_hd = blurred_ball.MEB.center
    points_hd.append(center_hd)
    
    points_2d = pca.fit_transform(points_hd)
    num_of_points = len(points_2d)
    center_2d = points_2d[num_of_points - 1, :]
    radius = blurred_ball.MEB.radius
    points_2d = points_2d[:num_of_points - 2, :]
    blurred_ball_2d = BlurredBall(points_2d, Ball(center_2d, radius))
    return blurred_ball_2d

In [142]:
'''
pca = get_pca_transformer(points)
blurred_balls_2d = [get_blurred_ball_2d(blurred_ball, pca) for blurred_ball in blurred_balls]
print(blurred_balls_2d[0])
draw_blurred_balls(blurred_balls_2d)
'''

'\npca = get_pca_transformer(points)\nblurred_balls_2d = [get_blurred_ball_2d(blurred_ball, pca) for blurred_ball in blurred_balls]\nprint(blurred_balls_2d[0])\ndraw_blurred_balls(blurred_balls_2d)\n'

# Timothy Chan's Algorithm

In [143]:
Ball = namedtuple("Ball", ["center", "radius"])

def inside_ball(ball, point):
    center, radius = ball
    distance = npl.norm(point - center)
    if distance < radius:
        return True
    else:
        return False

def meb_ball_and_point(ball, p):
    c, r = ball
    pc_scalar = npl.norm(c - p)
    pc_vector = c - p
    radius_unit = pc_vector / pc_scalar
    p_mirror = radius_unit * r + c
    c_prime = (p + p_mirror) / 2.0
    r_prime = npl.norm(p_mirror - p) / 2.0
    meb = Ball(c_prime, r_prime)
    return meb

def create_initial_ball(point):
    return Ball(point, 0.0)

def chan(points):
    count = 0
    center = None
    radius = None
    ball = None
    for point in points:
        if ball == None:
            ball = create_initial_ball(point)
            continue
        if inside_ball(ball, point):
            continue
        else:
            count += 1
            ball = meb_ball_and_point(ball, point)
            center, radius = ball
    return Ball(center, radius)

In [144]:
import random
#np.random.shuffle(points)
print points.shape
meb = chan(points)
print(meb.radius)
    

(7352, 400)
14.9812650394


# TODO

## Compute MEB from MEBs

# Extension of Chan's Algorithm?