In [43]:
from rtree import index
from sklearn.neighbors import NearestNeighbors, BallTree, KDTree
import numpy as np
import networkx as nx
import sys, os
from scipy.spatial.distance import cdist
import time

In [44]:
#from multiprocessing.pool import ThreadPool
RESULT_DIR="results"
RESULT_FILE_PREFIX="tree-pair-distance-"
HEADER_CSV="Scenario, Type, Time"
#BASE_DIRECTORY=os.getcwd()
# Dask has issues with NFS home directory on Comet
# BASE_DIRECTORY='/scratch/luckow/7146882'
BASE_DIRECTORY='/oasis/scratch/comet/luckow/temp_project'
#BASE_DIRECTORY='/scratch/luckow/7218009/'
OUT_DIR=os.path.join(BASE_DIRECTORY, "npy_stack")
RESULT_DIR=os.path.join(BASE_DIRECTORY, "results")

FILENAMES=["../132k_dataset/atom_pos_132K.npy", "../145K_dataset/atom_pos_145K.npy", 
          "../300K_dataset/atom_pos_291K.npy", '../840K_dataset/atom_pos_839K.npy']

# Dummy Data and Scikit

In [3]:
number_points = 10
points_local_np = np.arange(number_points*3).reshape(number_points,3)

In [40]:
def pairwise_distance_balltree(points_np):
    tree = BallTree(points_np, leaf_size=40)
    edges = tree.query_radius(points_np, 15.0)
    edge_list=[list(zip(np.repeat(idx, len(dest_list)), dest_list)) for idx, dest_list in enumerate(edges)]
    edge_list_flat = np.array([list(item) for sublist in edge_list for item in sublist])
    res = edge_list_flat
    res=edge_list_flat[edge_list_flat[:,0]<edge_list_flat[:,1], :] 
    return res

In [41]:
def pairwise_distance_kdtree(points_np):
    tree = KDTree(points_np, leaf_size=40)
    edges = tree.query_radius(points_np, 15.0)
    edge_list=[list(zip(np.repeat(idx, len(dest_list)), dest_list)) for idx, dest_list in enumerate(edges)]
    edge_list_flat = np.array([list(item) for sublist in edge_list for item in sublist])
    res = edge_list_flat
    res=edge_list_flat[edge_list_flat[:,0]<edge_list_flat[:,1], :] 
    return res

In [42]:
def pairwise_distance_cdist(points_np, cutoff=15.0):
    distances = cdist(points_np, points_np)
    true_res = np.array(np.where(distances < cutoff))
    res=np.array(zip(true_res[0], true_res[1]))
    res=res[res[:,0]<res[:,1], :]
    return res

In [46]:
atoms = np.load("../132k_dataset/atom_pos_132K.npy")
atoms = atoms[:50000]

In [47]:
%%time
res_cdist=pairwise_distance_cdist(atoms)

KeyboardInterrupt: 

In [None]:
%%time
res_tree=pairwise_distance_tree(atoms)

In [None]:
len(res_tree)

In [None]:
len(res_cdist)

In [None]:
res_tree[:5]

In [None]:
res_cdist[:5]

In [19]:
g1=nx.from_edgelist(res_tree)
g2=nx.from_edgelist(res_cdist)

In [25]:
nx.difference(g1, g2).edges()

[]

In [45]:
for i in range(10):
    atoms = np.load("../840K_dataset/atom_pos_839K.npy")
    for n in [10000, 20000, 40000, 80000, 160000, 320000, 640000, 839000]:
        a = atoms[:n]
        start = time.time()
        res=pairwise_distance_balltree(a)
        end = time.time()
        print ("BallTree, MBP, %d, %.4f"%(n, end-start))
        start = time.time()
        res=pairwise_distance_tree(a)
        end = time.time()
        print ("KDTree, MBP, %d, %.4f"%(n, end-start))
        if n<=50000:
            start = time.time()
            res=pairwise_distance_cdist(a)
            end = time.time()
            print ("cdist, MBP, %d, %.4f"%(n, end-start))       

BallTree, MBP, 10000, 0.2709
KDTree, MBP, 10000, 0.2299
cdist, MBP, 10000, 1.1918
BallTree, MBP, 20000, 0.5060
KDTree, MBP, 20000, 0.5134
cdist, MBP, 20000, 4.3979
BallTree, MBP, 40000, 1.0088
KDTree, MBP, 40000, 1.8102
cdist, MBP, 40000, 82.2562
BallTree, MBP, 80000, 5.4268
KDTree, MBP, 80000, 4.9246
BallTree, MBP, 160000, 7.4169
KDTree, MBP, 160000, 9.1029
BallTree, MBP, 320000, 17.0483
KDTree, MBP, 320000, 15.4173
BallTree, MBP, 640000, 39.1077
KDTree, MBP, 640000, 41.1997
BallTree, MBP, 839000, 57.6504
KDTree, MBP, 839000, 59.1428
BallTree, MBP, 10000, 0.2405
KDTree, MBP, 10000, 0.2480
cdist, MBP, 10000, 1.2526
BallTree, MBP, 20000, 0.5209
KDTree, MBP, 20000, 0.5209
cdist, MBP, 20000, 5.2524
BallTree, MBP, 40000, 2.0861
KDTree, MBP, 40000, 2.7132
cdist, MBP, 40000, 66.7035
BallTree, MBP, 80000, 6.2341
KDTree, MBP, 80000, 3.6110
BallTree, MBP, 160000, 8.9416
KDTree, MBP, 160000, 9.2471
BallTree, MBP, 320000, 17.8004
KDTree, MBP, 320000, 14.5516
BallTree, MBP, 640000, 40.6775
KDTree,

In [30]:
!ls ../840K_dataset 

atom_pos_839K.npy
