In [1]:
import math
import warnings
import numpy as np
from snnpy import *
from sklearn.neighbors import BallTree
from sklearn.neighbors import NearestNeighbors
from sklearn.neighbors import KDTree
from bf_search import *
from tqdm import tqdm

    
def sigificant_digit(number, digits=2):
    return round(number, digits - int(math.floor(math.log10(abs(number)))) - 1)


warnings.filterwarnings("ignore")

### size (ndim=2)

In [2]:
n_samples = 3000
unit_inc = 3000
n_dim =  2
Rlist = [0.1, 0.3, 0.5, 0.7]

query_num = list()

rng = np.random.RandomState(0)
xrange = np.arange(n_samples, n_samples + 10*unit_inc, unit_inc)
print(", ".join([str(i) for i in xrange]))
for size in tqdm(range(len(xrange))):
    i = xrange[size]
    qm = 0
    data = rng.random_sample((2*i, n_dim)) 
    data = (data - data.mean(axis=0))/data.std(axis=0)
    X = data[:i]
    Query = data[i:]
    
    qm_answer = 0
    qm_count = 0
    
    for R in Rlist:
        # snn
        snn = build_snn_model(X)
        
        for j in range(Query.shape[0]): 
            sind = snn.radius_single_query(Query[j], R)
            qm_answer = qm_answer + len(sind)
        
        qm_count = qm_count + Query.shape[0]
        
    qm = qm_answer / qm_count
    print("average query returns:", sigificant_digit(qm))
    query_num.append(qm)
    print()

  0%|          | 0/10 [00:00<?, ?it/s]

3000, 6000, 9000, 12000, 15000, 18000, 21000, 24000, 27000, 30000


 10%|█         | 1/10 [00:00<00:05,  1.64it/s]

average query returns: 140.0



 20%|██        | 2/10 [00:02<00:08,  1.11s/it]

average query returns: 280.0



 30%|███       | 3/10 [00:04<00:11,  1.71s/it]

average query returns: 430.0



 40%|████      | 4/10 [00:08<00:14,  2.45s/it]

average query returns: 570.0



 50%|█████     | 5/10 [00:13<00:16,  3.36s/it]

average query returns: 710.0



 60%|██████    | 6/10 [00:19<00:17,  4.41s/it]

average query returns: 850.0



 70%|███████   | 7/10 [00:27<00:16,  5.66s/it]

average query returns: 990.0



 80%|████████  | 8/10 [00:37<00:14,  7.11s/it]

average query returns: 1100.0



 90%|█████████ | 9/10 [00:50<00:08,  8.72s/it]

average query returns: 1300.0



100%|██████████| 10/10 [01:05<00:00,  6.50s/it]

average query returns: 1400.0






In [3]:
### save the data
query_num = np.array(query_num)

with open('result/query_r/size/query_num_d2.npy', 'wb') as f:
    np.save(f, query_num)

### size (ndim=50)

In [4]:
n_samples = 3000
unit_inc = 3000
n_dim =  50
Rlist = [8, 10, 12, 14]

query_num = list()

rng = np.random.RandomState(0)
xrange = np.arange(n_samples, n_samples + 10*unit_inc, unit_inc)
for size in tqdm(range(len(xrange))):
    i = xrange[size]
    qm = 0
    data = rng.random_sample((2*i, n_dim)) 
    data = (data - data.mean(axis=0))/data.std(axis=0)
    X = data[:i]
    Query = data[i:]
    
    qm_answer = 0
    qm_count = 0
    
    for R in Rlist:
        # snn
        snn = build_snn_model(X)
        
        for j in range(Query.shape[0]): 
            sind = snn.radius_single_query(Query[j], R)
            qm_answer = qm_answer + len(sind)
        
        qm_count = qm_count + Query.shape[0]
    
    qm = qm_answer / qm_count
    print("average query returns:", sigificant_digit(qm))
    query_num.append(qm)
    print()

 10%|█         | 1/10 [00:01<00:15,  1.77s/it]

average query returns: 1900.0



 20%|██        | 2/10 [00:08<00:35,  4.43s/it]

average query returns: 3800.0



 30%|███       | 3/10 [00:21<00:59,  8.52s/it]

average query returns: 5700.0



 40%|████      | 4/10 [00:44<01:25, 14.23s/it]

average query returns: 7600.0



 50%|█████     | 5/10 [01:19<01:48, 21.80s/it]

average query returns: 9400.0



 60%|██████    | 6/10 [02:09<02:05, 31.40s/it]

average query returns: 11000.0



 70%|███████   | 7/10 [03:20<02:12, 44.13s/it]

average query returns: 13000.0



 80%|████████  | 8/10 [04:56<02:01, 60.69s/it]

average query returns: 15000.0



 90%|█████████ | 9/10 [07:02<01:21, 81.27s/it]

average query returns: 17000.0



100%|██████████| 10/10 [09:46<00:00, 58.68s/it] 

average query returns: 19000.0






In [5]:
### save the data

query_num = np.array(query_num)
    
with open('result/query_r/size/query_num.npy', 'wb') as f:
    np.save(f, query_num)

### dimensions

In [6]:
n_samples = 10000
unit_inc = 30
Rlist = [1.5, 9, 15.5, 22]
rng = np.random.RandomState(0)

query_num = list()

xrange = np.arange(2, 2 + 10*unit_inc, unit_inc)
for dim in tqdm(range(len(xrange))):
    qm = 0
    i = xrange[dim]
    data = rng.random_sample((2*n_samples, i)) 
    data = (data - data.mean(axis=0))/data.std(axis=0)
    X = data[:n_samples]
    Query = data[n_samples:]
    
    qm_answer = 0
    qm_count = 0
    
    for R in Rlist:
        # snn
        snn = build_snn_model(X)
        
        for j in range(Query.shape[0]): 
            sind = snn.radius_single_query(Query[j], R)
            qm_answer = qm_answer + len(sind)
            
        qm_count = qm_count + Query.shape[0]
        
    qm = qm_answer / qm_count
    print("average query returns:", sigificant_digit(qm))
    query_num.append(qm)
    print()


 10%|█         | 1/10 [00:04<00:40,  4.55s/it]

average query returns: 8500.0



 20%|██        | 2/10 [00:14<01:02,  7.80s/it]

average query returns: 7200.0



 30%|███       | 3/10 [00:30<01:20, 11.57s/it]

average query returns: 5000.0



 40%|████      | 4/10 [00:50<01:28, 14.80s/it]

average query returns: 5000.0



 50%|█████     | 5/10 [01:19<01:39, 19.96s/it]

average query returns: 3600.0



 60%|██████    | 6/10 [01:54<01:40, 25.04s/it]

average query returns: 2500.0



 70%|███████   | 7/10 [02:44<01:40, 33.34s/it]

average query returns: 2500.0



 80%|████████  | 8/10 [03:43<01:23, 41.53s/it]

average query returns: 2400.0



 90%|█████████ | 9/10 [05:01<00:52, 52.84s/it]

average query returns: 1300.0



100%|██████████| 10/10 [06:31<00:00, 39.10s/it]

average query returns: 150.0






In [7]:
### save the data
query_num = np.array(query_num)

    
with open('result/query_r/dim/query_num.npy', 'wb') as f:
    np.save(f, query_num)