## PRAKTIKUM 3

In [None]:
import hnswlib
import numpy as np
import time
from sklearn.neighbors import NearestNeighbors

# ===========================
# 1. Buat data 2D acak
# ===========================
num_elements = 1000
dim = 2
data = np.random.random((num_elements, dim)).astype(np.float32)

# Query point
query = np.array([[0.5, 0.5]], dtype=np.float32)
k = 5  # cari 5 tetangga terdekat

# ===========================
# 2. Exact NN (Brute Force)
# ===========================
nn = NearestNeighbors(n_neighbors=k, algorithm='brute', metric='euclidean')
nn.fit(data)

start = time.time()
distances, indices = nn.kneighbors(query)
end = time.time()

print("=== Exact NN ===")
print("Indices:", indices)
print("Distances:", distances)
print("Waktu:", end - start, "detik")

# ===========================
# 3. HNSW
# ===========================
# Inisialisasi index HNSW
p = hnswlib.Index(space='l2', dim=dim)

# Ukuran maksimum elemen yang bisa ditampung
p.init_index(max_elements=num_elements, ef_construction=100, M=16)

# Tambahkan data
p.add_items(data)

# Set parameter pencarian
p.set_ef(50)   # tradeoff speed vs accuracy

start = time.time()
labels, distances = p.knn_query(query, k=k)
end = time.time()

print("\n=== HNSW ===")
print("Indices:", labels)
print("Distances:", distances)
print("Waktu:", end - start, "detik")


=== Exact NN ===
Indices: [[993 964 788 528  16]]
Distances: [[0.01145547 0.02310762 0.0234505  0.02675764 0.0279675 ]]
Waktu: 0.05717778205871582 detik

=== HNSW ===
Indices: [[993 964 788 528  16]]
Distances: [[0.00013123 0.00053396 0.00054993 0.00071597 0.00078218]]
Waktu: 0.0001513957977294922 detik


In [None]:
import hnswlib
import numpy as np
import time
from sklearn.neighbors import NearestNeighbors

# Function to run HNSW experiment
def run_hnsw_experiment(n_points, dim, metric_type):
    np.random.seed(42)
    data = np.random.random((n_points, dim)).astype(np.float32)
    query = np.random.random((1, dim)).astype(np.float32)
    k = 5

    print(f"\n--- Experiment: n_points={n_points}, dim={dim}, metric={metric_type} ---")

    # Exact NN (Brute Force) - for comparison (only for smaller datasets and supported metrics)
    if n_points <= 10000:
        sklearn_metric = None
        if metric_type == 'l2':
            sklearn_metric = 'euclidean'
        elif metric_type == 'cosine':
            sklearn_metric = 'cosine'

        if sklearn_metric:
            nn = NearestNeighbors(n_neighbors=k, algorithm='brute', metric=sklearn_metric)
            nn.fit(data)
            start = time.time()
            distances_exact, indices_exact = nn.kneighbors(query)
            end = time.time()
            time_exact = end - start
            print("=== Exact NN ===")
            print("Indices:", indices_exact[0])
            print("Distances:", distances_exact[0])
            print("Waktu:", round(time_exact, 6), "detik")
        else:
            print("=== Exact NN (Brute Force) Skipped: Metric not supported by scikit-learn NearestNeighbors ===")


    # HNSW
    p = hnswlib.Index(space=metric_type, dim=dim)
    p.init_index(max_elements=n_points, ef_construction=100, M=16)
    p.add_items(data)
    p.set_ef(50)

    start = time.time()
    labels_hnsw, distances_hnsw = p.knn_query(query, k=k)
    end = time.time()
    time_hnsw = end - start

    print("\n=== HNSW ===")
    print("Indices:", labels_hnsw[0])
    print("Distances:", distances_hnsw[0])
    print("Waktu:", round(time_hnsw, 6), "detik")


# Run experiments
# HNSW supports 'l2', 'ip', 'cosine'

# Experiment 1: 1000 points, 2D, L2
run_hnsw_experiment(1000, 2, 'l2')

# Experiment 2: 1000 points, 2D, IP
run_hnsw_experiment(1000, 2, 'ip')

# Experiment 3: 1000 points, 5D, L2
run_hnsw_experiment(1000, 5, 'l2')

# Experiment 4: 1000 points, 5D, IP
run_hnsw_experiment(1000, 5, 'ip')


# Experiment 5: 1 million points, 2D, L2
run_hnsw_experiment(1000000, 2, 'l2')

# Experiment 6: 1 million points, 2D, IP
run_hnsw_experiment(1000000, 2, 'ip')

# Experiment 7: 1 million points, 5D, L2
run_hnsw_experiment(1000000, 5, 'l2')

# Experiment 8: 1 million points, 5D, IP
run_hnsw_experiment(1000000, 5, 'ip')

# Experiment 9: 1000 points, 2D, Cosine
run_hnsw_experiment(1000, 2, 'cosine')

# Experiment 10: 1 million points, 5D, Cosine
run_hnsw_experiment(1000000, 5, 'cosine')


--- Experiment: n_points=1000, dim=2, metric=l2 ---
=== Exact NN ===
Indices: [112 535 777 246 763]
Distances: [0.0046002  0.037096   0.04848799 0.06090931 0.06460769]
Waktu: 0.001287 detik

=== HNSW ===
Indices: [112 535 777 246 763]
Distances: [2.1161813e-05 1.3761134e-03 2.3510854e-03 3.7099437e-03 4.1741543e-03]
Waktu: 5.7e-05 detik

--- Experiment: n_points=1000, dim=2, metric=ip ---
=== Exact NN (Brute Force) Skipped: Metric not supported by scikit-learn NearestNeighbors ===

=== HNSW ===
Indices: [249 712 598 977 932]
Distances: [0.50142217 0.5034294  0.5059401  0.5082675  0.5107913 ]
Waktu: 5.2e-05 detik

--- Experiment: n_points=1000, dim=5, metric=l2 ---
=== Exact NN ===
Indices: [988 780  27 943  93]
Distances: [0.10281748 0.15009931 0.20240517 0.22791335 0.24103224]
Waktu: 0.000545 detik

=== HNSW ===
Indices: [988 780  27 943  93]
Distances: [0.01057143 0.0225298  0.04096785 0.05194449 0.05809654]
Waktu: 0.000175 detik

--- Experiment: n_points=1000, dim=5, metric=ip ---
