In [1]:
import math
import warnings
import numpy as np
from snnpy import *
from sklearn.neighbors import BallTree
from sklearn.neighbors import NearestNeighbors
from sklearn.neighbors import KDTree
from bf_search import *
from tqdm import tqdm


    
def sigificant_digit(number, digits=2):
    return round(number, digits - int(math.floor(math.log10(abs(number)))) - 1)


warnings.filterwarnings("ignore")

### size (ndim=2)

In [2]:
n_samples = 3000
unit_inc = 3000
n_dim =  2
Rlist = [0.1, 0.3, 0.5, 0.7]

query_num = list()

rng = np.random.RandomState(0)
xrange = np.arange(n_samples, n_samples + 10*unit_inc, unit_inc)
print(", ".join([str(i) for i in xrange]))

for R in tqdm(Rlist):
    qm = 0
    
    qm_answer = 0
    qm_count = 0

    for size in range(len(xrange)):
        i = xrange[size]
        
        data = rng.random_sample((2*i, n_dim)) 
        data = (data - data.mean(axis=0))/data.std(axis=0)
        X = data[:i]
        Query = data[i:]

        # snn
        snn = build_snn_model(X)
        
        for j in range(Query.shape[0]): 
            sind = snn.radius_single_query(Query[j], R)
            qm_answer = qm_answer + len(sind)
            
        qm_count = qm_count + Query.shape[0]
        
        
    qm = qm_answer / qm_count
    print("average query returns:", sigificant_digit(qm))
    query_num.append(qm)
    print()

  0%|          | 0/4 [00:00<?, ?it/s]

3000, 6000, 9000, 12000, 15000, 18000, 21000, 24000, 27000, 30000


 25%|██▌       | 1/4 [00:07<00:21,  7.03s/it]

average query returns: 54.0



 50%|█████     | 2/4 [00:18<00:19,  9.65s/it]

average query returns: 460.0



 75%|███████▌  | 3/4 [00:35<00:13, 13.13s/it]

average query returns: 1200.0



100%|██████████| 4/4 [00:59<00:00, 14.79s/it]

average query returns: 2200.0






In [3]:
### save the data
query_num = np.array(query_num)

with open('result/query_r/size/query_num_d2_radius.npy', 'wb') as f:
    np.save(f, query_num)

### size (ndim=50)

In [4]:
n_samples = 3000
unit_inc = 3000
n_dim =  50
Rlist = [8, 10, 12, 14]

query_num = list()

rng = np.random.RandomState(0)
xrange = np.arange(n_samples, n_samples + 10*unit_inc, unit_inc)

for R in tqdm(Rlist):
    qm = 0
    
    qm_answer = 0
    qm_count = 0
    
    for size in range(len(xrange)):
        i = xrange[size]
        
        data = rng.random_sample((2*i, n_dim)) 
        data = (data - data.mean(axis=0))/data.std(axis=0)
        X = data[:i]
        Query = data[i:]

        # snn
        snn = build_snn_model(X)
        
        for j in range(Query.shape[0]): 
            sind = snn.radius_single_query(Query[j], R)
            qm_answer = qm_answer + len(sind)
            
        qm_count = qm_count + Query.shape[0] 
        
    qm = qm_answer / qm_count
    print("average query returns:", sigificant_digit(qm))
    query_num.append(qm)
    print()

 25%|██▌       | 1/4 [02:17<06:53, 137.91s/it]

average query returns: 210.0



 50%|█████     | 2/4 [05:27<05:36, 168.11s/it]

average query returns: 11000.0



 75%|███████▌  | 3/4 [07:47<02:35, 155.31s/it]

average query returns: 21000.0



100%|██████████| 4/4 [10:23<00:00, 155.86s/it]

average query returns: 21000.0






In [5]:
### save the data

query_num = np.array(query_num)
    
with open('result/query_r/size/query_num_radius.npy', 'wb') as f:
    np.save(f, query_num)

### dimensions

In [6]:
n_samples = 10000
unit_inc = 30
Rlist = [1.5, 9, 15.5, 22]
rng = np.random.RandomState(0)

query_num = list()

xrange = np.arange(2, 2 + 10*unit_inc, unit_inc)

for R in tqdm(Rlist):
    qm = 0
    
    qm_answer = 0
    qm_count = 0
    
    for dim in range(len(xrange)):
        i = xrange[dim]
        data = rng.random_sample((2*n_samples, i)) 
        data = (data - data.mean(axis=0))/data.std(axis=0)
        X = data[:n_samples]
        Query = data[n_samples:]
    
        # snn
        snn = build_snn_model(X)
        
        for j in range(Query.shape[0]): 
            sind = snn.radius_single_query(Query[j], R)
            qm_answer = qm_answer + len(sind)
            
        qm_count = qm_count + Query.shape[0]  
        
    qm = qm_answer / qm_count
    print("average query returns:", sigificant_digit(qm))
    query_num.append(qm)
    print()


 25%|██▌       | 1/4 [01:15<03:45, 75.22s/it]

average query returns: 390.0



 50%|█████     | 2/4 [02:59<03:04, 92.32s/it]

average query returns: 1900.0



 75%|███████▌  | 3/4 [04:47<01:39, 99.58s/it]

average query returns: 4500.0



100%|██████████| 4/4 [06:48<00:00, 102.13s/it]

average query returns: 8500.0






In [7]:
### save the data
query_num = np.array(query_num)

with open('result/query_r/dim/query_num_radius.npy', 'wb') as f:
    np.save(f, query_num)