# Index Tests
Tests on the creation use data with the following composition:
- test1: 20 3-dimensional points
- test2: 30 3-dimensional points
- test3: 100 4-dimensional points

## Import Vantage Point Tree Classes

In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [2]:
!pip install import-ipynb



In [3]:
%cd "/content/gdrive/MyDrive/[MIRCV]FoodWebSearch/notebooks"

import import_ipynb
from index_creation import VP_Tree, Node

print("\nClass VP_Tree and Node has been imported correctly")

/content/gdrive/.shortcut-targets-by-id/1Lqbq3dboVPe48Cxj_XZyhRs54U4hXhEM/[MIRCV]FoodWebSearch/notebooks
importing Jupyter notebook from index_creation.ipynb
Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).

Class VP_Tree and Node has been imported correctly


## Initialize Notebook

In [5]:
import numpy as np
import os
from scipy.spatial import distance as d

TEST_PATH = "/content/gdrive/My Drive/[MIRCV]FoodWebSearch/antonio-tests"

FEATURES_PATH_TEST_1 = "/content/gdrive/My Drive/[MIRCV]FoodWebSearch/antonio-tests/features-test-1.npy"
FEATURES_NAMES_TEST_1 = "/content/gdrive/My Drive/[MIRCV]FoodWebSearch/antonio-tests/features-names-test-1.npy"

FEATURES_PATH_TEST_2 = "/content/gdrive/My Drive/[MIRCV]FoodWebSearch/antonio-tests/features-test-2.npy"
FEATURES_NAMES_TEST_2 = "/content/gdrive/My Drive/[MIRCV]FoodWebSearch/antonio-tests/features-names-test-2.npy"

FEATURES_PATH_TEST_3 = "/content/gdrive/My Drive/[MIRCV]FoodWebSearch/antonio-tests/features-test-3.npy"
FEATURES_NAMES_TEST_3 = "/content/gdrive/My Drive/[MIRCV]FoodWebSearch/antonio-tests/features-names-test-3.npy"

## Partition By Median Tests

In [6]:
# Partition By Median Tests 1
data = [("image0", np.array([0,2,1])) , ("image1",np.array([2,3,6])), ("image2",np.array([5,3,2])),
          ("image3",np.array([5,6,4])), ("image4",np.array([5,16,1])), ("image5",np.array([2,6,2])), ("image6",np.array([1,3,1]))]

vantage_point_tree = VP_Tree("Index_Test", 5)
node, s_1, s_2 = vantage_point_tree.partition_by_median(data)

print("Node:", node.pivot)
print("Median:", node.median)
print("Set 1:", s_1)
print("Set 2:", s_2)

Node: ('image3', array([5, 6, 4]))
Median: 5.260683827334365
Set 1: [('image2', array([5, 3, 2])), ('image5', array([2, 6, 2])), ('image1', array([2, 3, 6]))]
Set 2: [('image6', array([1, 3, 1])), ('image0', array([0, 2, 1])), ('image4', array([ 5, 16,  1]))]


In [7]:
# Partition By Median Tests 2
data = [("img_1",np.array([0,0])), ("img_2",np.array([0,1])), ("img_3",np.array([2,2])), ("img_4",np.array([3,3]))]

vantage_point_tree = VP_Tree("Index_Test", 5)

node, s_1, s_2 = vantage_point_tree.partition_by_median(data)

print("Pivot:", node.pivot)
print("Median:", node.median)
print("Set 1:", s_1)
print("Set 2:", s_2)

Pivot: ('img_1', array([0, 0]))
Median: 2.8284271247461903
Set 1: [('img_2', array([0, 1])), ('img_3', array([2, 2]))]
Set 2: [('img_3', array([2, 2])), ('img_4', array([3, 3]))]


## Creation Tests

In [8]:
# Create Tree Test 1
vantage_point_tree = VP_Tree("Index_Test_1",4)
vantage_point_tree.create_vptree(FEATURES_NAMES_TEST_1,FEATURES_PATH_TEST_1)
result = VP_Tree.print_tree(vantage_point_tree.root, level=1)

print("Result:\n" + result + "\n")

index_name = vantage_point_tree.index_name

path = VP_Tree.save_vptree(os.path.join(TEST_PATH, index_name),vantage_point_tree)

print("Saved on:", path)

tree_loaded = VP_Tree.load_vptree(path)

print("\nTree Loaded\n")
result = VP_Tree.print_tree(tree_loaded.root, level=1)
print(result)

Number of data: 21
The max height of the tree is: 3
Tree is building
Building of the tree completed in: 0.08094048500061035 s
Result:
id: 0 ('img5', array([2, 6, 2]))
	id: 00 ('img2', array([5, 3, 2]))
		id: 000 ('img3', array([5, 6, 4]))
			/content/gdrive/My Drive/[MIRCV]FoodWebSearch/antonio-tests/Index_Test_1/leaves_3/000_subset_1.npy
			/content/gdrive/My Drive/[MIRCV]FoodWebSearch/antonio-tests/Index_Test_1/leaves_3/000_subset_2.npy
		id: 001 ('img13', array([ 2,  3, 10]))
			/content/gdrive/My Drive/[MIRCV]FoodWebSearch/antonio-tests/Index_Test_1/leaves_3/001_subset_1.npy
			/content/gdrive/My Drive/[MIRCV]FoodWebSearch/antonio-tests/Index_Test_1/leaves_3/001_subset_2.npy
	id: 01 ('img8', array([223,  23,  61]))
		id: 010 ('img10', array([77, 61, 46]))
			/content/gdrive/My Drive/[MIRCV]FoodWebSearch/antonio-tests/Index_Test_1/leaves_3/010_subset_1.npy
			/content/gdrive/My Drive/[MIRCV]FoodWebSearch/antonio-tests/Index_Test_1/leaves_3/010_subset_2.npy
		id: 011 ('img16', array(

In [None]:
# Create Tree Test 2
vantage_point_tree = VP_Tree("Index_Test_2",4)
vantage_point_tree.create_vptree(FEATURES_NAMES_TEST_2, FEATURES_PATH_TEST_2)
result = VP_Tree.print_tree(vantage_point_tree.root, level=1)
print("Result:\n" + result + "\n")

path = VP_Tree.save_vptree(os.path.join(TEST_PATH, index_name),vantage_point_tree)

print("Saved on:", path)

tree_loaded = VP_Tree.load_vptree(path)

print("\nTree Loaded\n")
result = VP_Tree.print_tree(tree_loaded.root, level=1)
print(result)

Number of data: 30
The max height of the tree is: 3
Level n.1
Level n.2
Level n.2
Building of the tree completed in: 2.3370540142059326 s
Result:
id: 0 ('img_12', array([38., 86., 60.]))
	id: 00 ('img_16', array([56., 97., 40.]))
		id: 000 ('img_15', array([11., 71., 14.]))
			/content/gdrive/My Drive/[MIRCV]FoodWebSearch/antonio-tests/Index_Test_2/leaves_3/000_subset_1.npy
			/content/gdrive/My Drive/[MIRCV]FoodWebSearch/antonio-tests/Index_Test_2/leaves_3/000_subset_2.npy
		id: 001 ('img_21', array([66., 61., 89.]))
			/content/gdrive/My Drive/[MIRCV]FoodWebSearch/antonio-tests/Index_Test_2/leaves_3/001_subset_1.npy
			/content/gdrive/My Drive/[MIRCV]FoodWebSearch/antonio-tests/Index_Test_2/leaves_3/001_subset_2.npy
	id: 01 ('img_13', array([66., 13., 90.]))
		id: 010 ('img_8', array([98., 27., 22.]))
			/content/gdrive/My Drive/[MIRCV]FoodWebSearch/antonio-tests/Index_Test_2/leaves_3/010_subset_1.npy
			/content/gdrive/My Drive/[MIRCV]FoodWebSearch/antonio-tests/Index_Test_2/leaves_

In [None]:
# Create Tree Test 3
vantage_point_tree = VP_Tree("Index_Test_3",6)
vantage_point_tree.create_vptree(FEATURES_NAMES_TEST_3, FEATURES_PATH_TEST_3)
result = VP_Tree.print_tree(vantage_point_tree.root, level=1)
print("Result:\n" + result + "\n")

path = VP_Tree.save_vptree(os.path.join(TEST_PATH, index_name),vantage_point_tree)

print("Saved on:", path)

tree_loaded = VP_Tree.load_vptree(path)

print("\nTree Loaded\n")
result = VP_Tree.print_tree(tree_loaded.root, level=1)
print(result)

Number of data: 100
The max height of the tree is: 5
Level n.1
Level n.2
Level n.3
Level n.4
Level n.4
Level n.3
Level n.4
Level n.4
Level n.2
Level n.3
Level n.4
Level n.4
Level n.3
Level n.4
Level n.4
Building of the tree completed in: 9.011357307434082 s
Result:
id: 0 ('img_40', array([14, 34, 30, 85]))
	id: 00 ('img_75', array([ 2, 17, 25, 31]))
		id: 000 ('img_68', array([43, 34, 30, 29]))
			id: 0000 ('img_45', array([11, 29, 18, 31]))
				id: 00000 ('img_90', array([33, 13, 16, 34]))
					/content/gdrive/My Drive/[MIRCV]FoodWebSearch/antonio-tests/Index_Test_3/leaves_5/00000_subset_1.npy
					/content/gdrive/My Drive/[MIRCV]FoodWebSearch/antonio-tests/Index_Test_3/leaves_5/00000_subset_2.npy
				id: 00001 ('img_83', array([38,  2, 36, 51]))
					/content/gdrive/My Drive/[MIRCV]FoodWebSearch/antonio-tests/Index_Test_3/leaves_5/00001_subset_1.npy
					/content/gdrive/My Drive/[MIRCV]FoodWebSearch/antonio-tests/Index_Test_3/leaves_5/00001_subset_2.npy
			id: 0001 ('img_32', array([

In [None]:
# Create Tree Test 4
vantage_point_tree = VP_Tree("Index_Test_4",4, disk_mode=True,leaves_path=TEST_PATH)
vantage_point_tree.create_vptree(FEATURES_NAMES_TEST_1, FEATURES_PATH_TEST_1)
result = VP_Tree.print_tree(vantage_point_tree.root, level=1, disk_mode=True)

print("Result:\n" + result + "\n")

path = VP_Tree.save_vptree(os.path.join(TEST_PATH, index_name),vantage_point_tree)

print("Saved on:", path)

tree_loaded = VP_Tree.load_vptree(path)

print("\nTree Loaded\n")
result = VP_Tree.print_tree(tree_loaded.root, level=1)
print(result)

Number of data: 21
The max height of the tree is: 3
Level n.1
Level n.2
Level n.2
Building of the tree completed in: 2.7408978939056396 s
Result:
id: 0 ('img9', array([15, 32, 22]))
	id: 00 ('img13', array([ 2,  3, 10]))
		id: 000 ('img18', array([ 5, 23,  1]))
			/content/gdrive/My Drive/[MIRCV]FoodWebSearch/antonio-tests/Index_Test_4/leaves_3/000_subset_1.npy
			/content/gdrive/My Drive/[MIRCV]FoodWebSearch/antonio-tests/Index_Test_4/leaves_3/000_subset_2.npy
		id: 001 ('img12', array([30, 61, 20]))
			/content/gdrive/My Drive/[MIRCV]FoodWebSearch/antonio-tests/Index_Test_4/leaves_3/001_subset_1.npy
			/content/gdrive/My Drive/[MIRCV]FoodWebSearch/antonio-tests/Index_Test_4/leaves_3/001_subset_2.npy
	id: 01 ('img0', array([0, 2, 1]))
		id: 010 ('img16', array([ 1, 30, 60]))
			/content/gdrive/My Drive/[MIRCV]FoodWebSearch/antonio-tests/Index_Test_4/leaves_3/010_subset_1.npy
			/content/gdrive/My Drive/[MIRCV]FoodWebSearch/antonio-tests/Index_Test_4/leaves_3/010_subset_2.npy
		id: 011

## Searching Tests

In [None]:
# k-NN search Test 1
vantage_point_tree = VP_Tree("Index_Test",4)
vantage_point_tree.create_vptree(FEATURES_NAMES_TEST_1, FEATURES_PATH_TEST_1)

query = [0,1,1]
k = 3

print("Query:", query, "\n")

nn, d_nn = vantage_point_tree.knn_search(k, query)

print("\nk-NN search on the tree")
print("Points:", nn)
print("Distances:", d_nn)

print("\n\nSequential Scan on the array")
data = np.load(FEATURES_PATH_TEST_1)
distances = [d.euclidean(point, query) for point in data]
zipped = sorted(zip(data, distances), key=lambda x:x[1])
for element in zipped[0:k]:
  print(element)

Number of data: 21
The max height of the tree is: 3
Level n.1
Level n.2
Level n.2
Building of the tree completed in: 0.09337711334228516 s
Query: [0, 1, 1] 

Query answered in 0.019068241119384766  s

k-NN search on the tree
Points: [('img0', array([0, 2, 1])), array(['img0', array([0, 2, 1])], dtype=object), array(['img6', array([1, 3, 1])], dtype=object)]
Distances: [1.0, 1.0, 2.23606797749979]


Sequential Scan on the array
(array([0, 2, 1]), 1.0)
(array([1, 3, 1]), 2.23606797749979)
(array([5, 3, 2]), 5.477225575051661)


In [None]:
# k-NN search Test 2
vantage_point_tree = VP_Tree("Index_Test_3",4)
vantage_point_tree.create_vptree(FEATURES_NAMES_TEST_2, FEATURES_PATH_TEST_2)

query = [0,1,1]
k = 3

nn, d_nn = vantage_point_tree.knn_search(k, query)

print("Query:", query)

print("k-NN search on the tree")
print("Points:", nn)
print("Distances:", d_nn)

print("\n\nSequential Scan on the array")
data = np.load(FEATURES_PATH_TEST_2)
distances = [d.euclidean(point, query) for point in data]
zipped = sorted(zip(data, distances), key=lambda x:x[1])
for element in zipped[0:k]:
  print(element)

Number of data: 30
The max height of the tree is: 3
Level n.1
Level n.2
Level n.2
Building of the tree completed in: 2.4061920642852783 s
Query answered in 0.01263284683227539  s
Query: [0, 1, 1]
k-NN search on the tree
Points: [('img_10', array([ 4.,  9., 22.])), array(['img_24', array([26.,  3., 10.])], dtype=object), array(['img_6', array([32., 13.,  6.])], dtype=object)]
Distances: [22.825424421026653, 27.586228448267445, 34.539832078341085]


Sequential Scan on the array
(array([ 4.,  9., 22.]), 22.825424421026653)
(array([26.,  3., 10.]), 27.586228448267445)
(array([32., 13.,  6.]), 34.539832078341085)


In [None]:
# k-NN search Test 3
vantage_point_tree = VP_Tree("Index_Test_3",4)
vantage_point_tree.create_vptree(FEATURES_NAMES_TEST_3, FEATURES_PATH_TEST_3)

query = [2,17,26,31]
k = 6

nn, d_nn = vantage_point_tree.knn_search(k, query)

print("Query:", query)

print("k-NN search on the tree")
print("Points:", nn)
print("Distances:", d_nn)

print("\n\nSequential Scan on the array")
data = np.load(FEATURES_PATH_TEST_3)
distances = [d.euclidean(point, query) for point in data]
zipped = sorted(zip(data, distances), key=lambda x:x[1])
for element in zipped[0:k]:
  print(element)

Number of data: 100
The max height of the tree is: 5
Level n.1
Level n.2
Level n.3
Level n.3
Level n.2
Level n.3
Level n.3
Building of the tree completed in: 5.186305999755859 s
Query answered in 0.03465151786804199  s
Query: [2, 17, 26, 31]
k-NN search on the tree
Points: [array(['img_75', array([ 2, 17, 25, 31])], dtype=object), array(['img_45', array([11, 29, 18, 31])], dtype=object), array(['img_80', array([10, 31, 30, 25])], dtype=object), array(['img_95', array([14, 26, 10, 15])], dtype=object), ('img_20', array([20, 28, 34, 12])), array(['img_46', array([19, 28, 47, 39])], dtype=object)]
Distances: [1.0, 17.0, 17.663521732655695, 27.147743920996454, 29.49576240750525, 30.24896692450835]


Sequential Scan on the array
(array([ 2, 17, 25, 31]), 1.0)
(array([11, 29, 18, 31]), 17.0)
(array([10, 31, 30, 25]), 17.663521732655695)
(array([14, 26, 10, 15]), 27.147743920996454)
(array([20, 28, 34, 12]), 29.49576240750525)
(array([19, 28, 47, 39]), 30.24896692450835)


In [None]:
#k-NN search Test 4
vantage_point_tree = VP_Tree("Index_Test_3",4, disk_mode=True)
vantage_point_tree.create_vptree(FEATURES_NAMES_TEST_3, FEATURES_PATH_TEST_3)

query = [0,1,1,2]
k = 6

nn, d_nn = vantage_point_tree.knn_search(k, query)

print("Query:", query)

print("k-NN search on the tree")
print("Points:", nn)
print("Distances:", d_nn)

print("\n\nSequential Scan on the array")
data = np.load(FEATURES_PATH_TEST_3)
distances = [d.euclidean(point, query) for point in data]
zipped = sorted(zip(data, distances), key=lambda x:x[1])
for element in zipped[0:k]:
  print(element)

Number of data: 100
The max height of the tree is: 5
Level n.1
Level n.2
Level n.3
Level n.3
Level n.2
Level n.3
Level n.3
Building of the tree completed in: 0.15247058868408203 s
Query answered in 0.024530410766601562  s
Query: [0, 1, 1, 2]
k-NN search on the tree
Points: [array(['img_95', array([14, 26, 10, 15])], dtype=object), array(['img_75', array([ 2, 17, 25, 31])], dtype=object), array(['img_45', array([11, 29, 18, 31])], dtype=object), array(['img_20', array([20, 28, 34, 12])], dtype=object), array(['img_80', array([10, 31, 30, 25])], dtype=object), array(['img_90', array([33, 13, 16, 34])], dtype=object)]
Distances: [32.72613634390714, 40.95119045888654, 45.110974274559844, 48.14561246884289, 48.68264577855234, 49.8196748283246]


Sequential Scan on the array
(array([14, 26, 10, 15]), 32.72613634390714)
(array([ 2, 17, 25, 31]), 40.95119045888654)
(array([11, 29, 18, 31]), 45.110974274559844)
(array([20, 28, 34, 12]), 48.14561246884289)
(array([10, 31, 30, 25]), 48.6826457785