In [1]:
!pip install hnswlib

Collecting hnswlib
  Downloading hnswlib-0.8.0.tar.gz (36 kB)
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: hnswlib
  Building wheel for hnswlib (pyproject.toml) ... [?25l[?25hdone
  Created wheel for hnswlib: filename=hnswlib-0.8.0-cp312-cp312-linux_x86_64.whl size=2528139 sha256=f5676776d25284ad0584d67ef5466870011e87fc72ce2169630aced6b99c8ebb
  Stored in directory: /root/.cache/pip/wheels/ac/39/b3/cbd7f9cbb76501d2d5fbc84956e70d0b94e788aac87bda465e
Successfully built hnswlib
Installing collected packages: hnswlib
Successfully installed hnswlib-0.8.0


In [2]:
import hnswlib
import numpy as np
import time
from sklearn.neighbors import NearestNeighbors

# ===========================
# 1. Buat data 2D acak
# ===========================
num_elements = 1000
dim = 2
data = np.random.random((num_elements, dim)).astype(np.float32)

# Query point
query = np.array([[0.5, 0.5]], dtype=np.float32)
k = 5  # cari 5 tetangga terdekat

# ===========================
# 2. Exact NN (Brute Force)
# ===========================
nn = NearestNeighbors(n_neighbors=k, algorithm='brute', metric='euclidean')
nn.fit(data)

start = time.time()
distances, indices = nn.kneighbors(query)
end = time.time()

print("=== Exact NN ===")
print("Indices:", indices)
print("Distances:", distances)
print("Waktu:", end - start, "detik")

# ===========================
# 3. HNSW
# ===========================
# Inisialisasi index HNSW
p = hnswlib.Index(space='l2', dim=dim)

# Ukuran maksimum elemen yang bisa ditampung
p.init_index(max_elements=num_elements, ef_construction=100, M=16)

# Tambahkan data
p.add_items(data)

# Set parameter pencarian
p.set_ef(50)   # tradeoff speed vs accuracy

start = time.time()
labels, distances = p.knn_query(query, k=k)
end = time.time()

print("\n=== HNSW ===")
print("Indices:", labels)
print("Distances:", distances)
print("Waktu:", end - start, "detik")


=== Exact NN ===
Indices: [[797 592 494 476  15]]
Distances: [[0.01642255 0.01692873 0.02015443 0.0202848  0.0237461 ]]
Waktu: 0.08613872528076172 detik

=== HNSW ===
Indices: [[797 592 494 476  15]]
Distances: [[0.0002697  0.00028658 0.0004062  0.00041147 0.00056388]]
Waktu: 0.0002155303955078125 detik


In [3]:
import hnswlib
import numpy as np
import time
import pandas as pd
from sklearn.neighbors import NearestNeighbors

def run_experiment(n_data, dim, metric_type):
    print(f"Running experiment: {n_data} data, {dim}D, Metric: {metric_type}...")

    # 1. Generate Dataset
    np.random.seed(42)
    data = np.random.random((n_data, dim)).astype(np.float32)
    query = np.random.random((1, dim)).astype(np.float32)
    k = 5 # Mencari 5 tetangga terdekat

    # Untuk Inner Product, normalisasi data sangat penting
    # agar setara dengan Cosine Similarity.
    if metric_type == 'ip':
        norm_data = np.linalg.norm(data, axis=1)
        data = data / norm_data[:, np.newaxis]
        norm_query = np.linalg.norm(query, axis=1)
        query = query / norm_query[:, np.newaxis]
        sklearn_metric = 'cosine'
    else:
        sklearn_metric = 'euclidean'


    # 2. Exact NN (Brute Force)
    nn = NearestNeighbors(n_neighbors=k, algorithm='brute', metric=sklearn_metric)
    nn.fit(data)
    start_time = time.time()
    _, indices_exact = nn.kneighbors(query)
    time_exact = time.time() - start_time


    # 3. HNSW (Approximate NN)
    p = hnswlib.Index(space=metric_type, dim=dim)

    # Waktu Build
    start_build = time.time()
    p.init_index(max_elements=n_data, ef_construction=200, M=16)
    p.add_items(data)
    time_build = time.time() - start_build

    # Waktu Search
    p.set_ef(50)
    start_search = time.time()
    indices_hnsw, _ = p.knn_query(query, k=k)
    time_hnsw = time.time() - start_search

    # 4. Hitung Recall (Akurasi)
    true_neighbors = set(indices_exact[0])
    approx_neighbors = set(indices_hnsw[0])
    recall = len(true_neighbors.intersection(approx_neighbors)) / k

    return {
        "Ukuran Data": n_data,
        "Dimensi": dim,
        "Metrik Jarak": metric_type.upper(),
        "Waktu Build (HNSW) (s)": round(time_build, 6),
        "Waktu Search (Exact) (s)": round(time_exact, 6),
        "Waktu Search (HNSW) (s)": round(time_hnsw, 6),
        f"Recall@{k}": recall
    }

# --- Eksekusi Semua Skenario ---
scenarios = [
    (1000, 2, 'l2'),
    (1000, 5, 'l2'),
    (1000000, 2, 'l2'),
    (1000000, 5, 'l2'),
    (1000, 2, 'ip'),
    (1000, 5, 'ip'),
    (1000000, 2, 'ip'),
    (1000000, 5, 'ip'),
]

results = []
for n_data, dim, metric in scenarios:
    results.append(run_experiment(n_data, dim, metric))

df_results = pd.DataFrame(results)
print("\n--- Hasil Eksperimen ---")
# Konversi ke Markdown
md_table = df_results.to_markdown(index=False)
print(md_table)

Running experiment: 1000 data, 2D, Metric: l2...
Running experiment: 1000 data, 5D, Metric: l2...
Running experiment: 1000000 data, 2D, Metric: l2...
Running experiment: 1000000 data, 5D, Metric: l2...
Running experiment: 1000 data, 2D, Metric: ip...
Running experiment: 1000 data, 5D, Metric: ip...
Running experiment: 1000000 data, 2D, Metric: ip...
Running experiment: 1000000 data, 5D, Metric: ip...

--- Hasil Eksperimen ---
|   Ukuran Data |   Dimensi | Metrik Jarak   |   Waktu Build (HNSW) (s) |   Waktu Search (Exact) (s) |   Waktu Search (HNSW) (s) |   Recall@5 |
|--------------:|----------:|:---------------|-------------------------:|---------------------------:|--------------------------:|-----------:|
|          1000 |         2 | L2             |                 0.214559 |                   0.001589 |                  8.7e-05  |          1 |
|          1000 |         5 | L2             |                 0.147348 |                   0.00306  |                  7.8e-05  |        

| Ukuran Data | Dimensi | Metrik Jarak | Waktu Build (HNSW) (s) | Waktu Search (Exact) (s) | Waktu Search (HNSW) (s) | Recall@5 |
| :---------: | :-----: | :----------: | :--------------------: | :----------------------: | :---------------------: | :------: |
| 1000 | 2 | L2 | 0.214559 | 0.001589 | 8.7e-05 | 1 |
| 1000 | 5 | L2 | 0.147348 | 0.00306 | 7.8e-05 | 1 |
| 1000000 | 2 | L2 | 121.839 | 0.055544 | 0.000103 | 1 |
| 1000000 | 5 | L2 | 180.781 | 0.030031 | 0.000135 | 1 |
| 1000 | 2 | IP | 0.161587 | 0.018055 | 4.9e-05 | 1 |
| 1000 | 5 | IP | 0.14942 | 0.006016 | 7.7e-05 | 1 |
| 1000000 | 2 | IP | 153.142 | 0.077408 | 8.1e-05 | 0 |
| 1000000 | 5 | IP | 160.29 | 0.04719 | 8.8e-05 | 1 |