In [None]:
from scikitplot import annoy

annoy.__version__, dir(annoy), dir(annoy.Annoy)

In [None]:
import random; random.seed(0)

from scikitplot.annoy import Annoy, AnnoyIndex, Index

AnnoyIndex?

In [None]:
# =============================================================
# 1. Construction
# =============================================================
idx = AnnoyIndex(0)
print("Index dimension:", idx.f)
print("Metric         :", idx.metric)
print(idx)
print(idx.info())

In [None]:
from scikitplot import annoy as a

print(a.AnnoyBase)           # should show the extension type
print(a.AnnoyIndex)          # should show <class '..._base.Index'>
print(a.Annoy)               # same

print(isinstance(idx, a.Index))
print(isinstance(idx, a.AnnoyBase))

print(type(idx))
print(idx.__class__.__module__)
print(idx.__class__.__mro__)

In [None]:
# =============================================================
# 1. Construction
# =============================================================
idx = AnnoyIndex(f=3)
print("Index dimension:", idx.f)
print("Metric         :", idx.metric)
print(idx)

In [None]:
# =============================================================
# 1. Construction
# =============================================================
idx = AnnoyIndex(f=3, metric="angular")
print("Index dimension:", idx.f)
print("Metric         :", idx.metric)

In [None]:
# =============================================================
# 2. Add items
# =============================================================
idx.add_item(0, [1, 0, 0])
idx.add_item(1, [0, 1, 0])
idx.add_item(2, [0, 0, 1])

print("Number of items:", idx.get_n_items())
print("Index dimension:", idx.f)
print("Metric         :", idx.metric)

In [None]:
# =============================================================
# 1. Construction
# =============================================================
idx = AnnoyIndex(100, metric="angular")
print("Index dimension:", idx.f)
print("Metric         :", idx.metric)
idx.on_disk_build("annoy_test.annoy")

In [None]:
# =============================================================
# 2. Add items
# =============================================================
f=100
n=1000
for i in range(n):
    if(i % (n//10) == 0): print(f"{i} / {n} = {1.0 * i / n}")
    # v = []
    # for z in range(f):
    #     v.append(random.gauss(0, 1))
    v = [random.gauss(0, 1) for _ in range(f)]
    idx.add_item(i, v)

print("Number of items:", idx.get_n_items())
print("Index dimension:", idx.f)
print("Metric         :", idx.metric)
print(idx)

In [None]:
# =============================================================
# 3. Build index
# =============================================================
idx.build(10)
print("Trees:", idx.get_n_trees())
print("Memory usage:", idx.memory_usage(), "bytes")
print(idx)
print(idx.info())

In [None]:
idx.unbuild()
print(idx)

In [None]:
idx.build(10)
print(idx)

In [None]:
# =============================================================
# 1. Construction
# =============================================================
idx = AnnoyIndex(0, metric="angular")
print("Index dimension:", idx.f)
print("Metric         :", idx.metric)

In [None]:
# =============================================================
# 2. Add items
# =============================================================
f=100
n=1000
for i in range(n):
    if(i % (n//10) == 0): print(f"{i} / {n} = {1.0 * i / n}")
    # v = []
    # for z in range(f):
    #     v.append(random.gauss(0, 1))
    v = [random.gauss(0, 1) for _ in range(f)]
    idx.add_item(i, v)

print("Number of items:", idx.get_n_items())
print("Index dimension:", idx.f)
print("Metric         :", idx.metric)
print(idx)

In [None]:
# =============================================================
# 3. Build index
# =============================================================
idx.build(10)
print("Trees:", idx.get_n_trees())
print("Memory usage:", idx.memory_usage(), "bytes")
print(idx)
print(idx.info())

In [None]:
# =============================================================
# 4. Query — return NNSResult
# =============================================================
res = idx.get_nns_by_item(
    0,
    5,
    # search_k = -1,
    include_distances=True,
)

print(res)

In [None]:
# =============================================================
# 8. Query using vector
# =============================================================
res2 = idx.get_nns_by_vector(
    [random.gauss(0, 1) for _ in range(f)],
    5,
    include_distances=True
)
print("\nQuery by vector:", res2)

In [None]:
# =============================================================
# 9. Low-level (non-result) mode
# =============================================================
items = idx.get_nns_by_item(0, 2, include_distances=False)
print("\nLow-level items only:", items)

items_low, d_low = idx.get_nns_by_item(0, 2, include_distances=True)
print("Low-level tuple return:", items_low, d_low)

In [None]:
# =============================================================
# 10. Persistence
# =============================================================
print("\n=== Saving with binary annoy ===")
print(idx)
idx.save("annoy_test.annoy")
print(idx)

print("Loading...")
idx2 = AnnoyIndex(100, metric='angular').load("annoy_test.annoy")
print("Loaded index:", idx2)

In [None]:
# =============================================================
# 11. Raw serialize / deserialize
# =============================================================
print("\n=== Raw serialize ===")
buf = idx.serialize()
new_idx = AnnoyIndex(100, metric='angular')
new_idx.deserialize(buf)
print("Deserialized index n_items:", new_idx.get_n_items())
print(idx)
print(new_idx)

In [None]:
idx.unload()
print(idx)

In [None]:

f = 10
idx = AnnoyIndex(f, "angular")

# Distinct non-zero content so we can see mismatches clearly
for i in range(20):
    idx.add_item(i, [float(i)] * f)
idx.build(10)
idx.save("tmp_current.tree")

u = idx.get_item_vector(15)

j = AnnoyIndex(f, "angular")
j.load("tmp_current.tree")
v = j.get_item_vector(15)

print("idx.f:", idx.f, "j.f:", j.f)
print("len(u):", len(u), "len(v):", len(v))
print("u:", u)
print("v:", v)
print("u == v:", u == v)

In [None]:
import os

# 1. Recreate a fresh index with the current build
f = 10
idx = AnnoyIndex(f, "angular")
for i in range(20):
    idx.add_item(i, [0.0]*f)
idx.build(10)
idx.save("tmp_current.tree")
u = idx.get_item_vector(15)

print("tmp_current.tree size:", os.path.getsize("tmp_current.tree"))
print("test.tree size  :", os.path.getsize("../../../../scikitplot/annoy/tests/test.tree"))
u


In [None]:

# quick sanity load:
j = AnnoyIndex(f, "angular")
j.load("tmp_current.tree")  # this should succeed if build is OK
j.get_item_vector(15)       # should also work
v = j.get_item_vector(15)
v

In [None]:
u = idx.get_item_vector(15)
v = j.get_item_vector(15)

u, v, u==v

In [None]:
import random
from pathlib import Path

random.seed(0)

HERE = Path.cwd().resolve()
OUT = HERE / "../../../../scikitplot/annoy/tests" / "test_v2.tree"

f = 10
n = 1000
idx = AnnoyIndex(f, "angular")
for i in range(n):
    idx.add_item(i, [random.gauss(0, 1) for _ in range(f)])

idx.build(10)
idx.save(str(OUT))
print("Wrote", OUT)

In [None]:
idx.get_nns_by_item(0, 10)

In [None]:
idx.info()

In [1]:
import joblib

# joblib.dump(idx, "test.joblib")
a = joblib.load("test.joblib")
a

Annoy(f=10, metric='angular', n_items=20, n_trees=10, on_disk_path=None)

In [4]:
a.get_item_vector(9)

[9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0]

In [None]:
f = 10
idx = a.AnnoyBase(f, "angular")

# Distinct non-zero content so we can see mismatches clearly
for i in range(20):
    idx.add_item(i, [float(i)] * f)
idx.build(10)

In [None]:
from scikitplot import annoy as a

idx = a.Index.from_low_level(idx)

import joblib
joblib.dump(idx, "test.joblib")

In [None]:

idx.save_to_file("test.joblib"), a.Index.load_from_file("test.joblib")

In [None]:
idx.get_nns_by_item(0, 10), idx.get_item_vector(0)

In [None]:
idx.metric

In [None]:
# AttributeError: Cannot change metric after the index has been created.
# idx.metric = "dot"

In [None]:
idx#.unload()

In [None]:
import random
from pathlib import Path

random.seed(0)

HERE = Path.cwd().resolve()
OUT = HERE / "../../../../scikitplot/annoy/tests" / "test_v2.tree"

f = 10
n = 1000
idx = AnnoyIndex(f, "angular")
for i in range(n):
    idx.add_item(i, [random.gauss(0, 1) for _ in range(f)])

idx.build(10)
idx.save(str(OUT))
print("Wrote", OUT)
idx

In [None]:
# Small subset → DataFrame/CSV
df = idx.to_dataframe(start=0, stop=1000)
df.to_csv("sample.csv", index=False)

In [None]:
import pandas as pd

pd.read_csv("sample.csv")

In [None]:
# Large export → memory-safe .npy
# Exports items [0, n_items) into a memmapped .npy
idx.save_vectors_npy("annoy_vectors.npy")

In [None]:
# Range-only export (strict, sized)
idx.save_vectors_npy("chunk_0_1m.npy", start=0, stop=1_000_000)


In [None]:
# Streaming CSV (warning: huge)
idx.to_csv("annoy_vectors.csv", start=0, stop=100_000)
