# Praktikum 4

In [None]:
!pip install annoy hnswlib faiss-cpu



In [None]:
import numpy as np
import time
from annoy import AnnoyIndex
import faiss
import hnswlib

# ===============================
# 1. Buat dataset 1 juta data 5D
# ===============================
n_data = 1_000_000   # bisa coba 100_000 dulu jika RAM terbatas
dim = 5
X = np.random.random((n_data, dim)).astype(np.float32)

# Query point
query = np.random.random((1, dim)).astype(np.float32)
k = 10

# ===============================
# 2. Annoy
# ===============================
print("=== Annoy ===")
ann_index = AnnoyIndex(dim, 'euclidean')

start = time.time()
for i in range(n_data):
    ann_index.add_item(i, X[i])
ann_index.build(10)  # 10 trees
build_time = time.time() - start

start = time.time()
neighbors = ann_index.get_nns_by_vector(query[0], k, include_distances=True)
query_time = time.time() - start

print("Build time:", build_time, "detik")
print("Query time:", query_time, "detik")
print("Neighbors:", neighbors[0][:5], "...")

# ===============================
# 3. FAISS (Flat Index)
# ===============================
print("\n=== FAISS (IndexFlatL2) ===")
faiss_index = faiss.IndexFlatL2(dim)

start = time.time()
faiss_index.add(X)
build_time = time.time() - start

start = time.time()
distances, indices = faiss_index.search(query, k)
query_time = time.time() - start

print("Build time:", build_time, "detik")
print("Query time:", query_time, "detik")
print("Neighbors:", indices[0][:5], "...")

# ===============================
# 4. HNSW (hnswlib)
# ===============================
print("\n=== HNSW (hnswlib) ===")
hnsw_index = hnswlib.Index(space='l2', dim=dim)

start = time.time()
hnsw_index.init_index(max_elements=n_data, ef_construction=200, M=16)
hnsw_index.add_items(X)
build_time = time.time() - start

hnsw_index.set_ef(50)

start = time.time()
labels, distances = hnsw_index.knn_query(query, k=k)
query_time = time.time() - start

print("Build time:", build_time, "detik")
print("Query time:", query_time, "detik")
print("Neighbors:", labels[0][:5], "...")

=== Annoy ===
Build time: 26.430821895599365 detik
Query time: 0.00038433074951171875 detik
Neighbors: [620012, 256164, 296557, 563026, 631294] ...

=== FAISS (IndexFlatL2) ===
Build time: 0.008841753005981445 detik
Query time: 0.012901782989501953 detik
Neighbors: [620012 256164 296557 563026 631294] ...

=== HNSW (hnswlib) ===
Build time: 206.2334759235382 detik
Query time: 0.0005288124084472656 detik
Neighbors: [620012 256164 296557 563026 631294] ...


Lakukan percobaan pada metric distance yang berbeda. catat hasilnya pada tabel yang anda buat sendiri seperti pada praktikum 1.

| Dataset                | Algoritma               | Build Time (detik) | Query Time (detik) | Kualitas Neighbor (Top-5)                | Catatan Singkat                                  |
| ---------------------- | ----------------------- | ------------------ | ------------------ | ---------------------------------------- | ------------------------------------------------ |
| **1 Juta data (5D)**   | **Annoy**               | 26.4308            | **0.000384**       | [620012, 256164, 296557, 563026, 631294] | Build cukup lama, query sangat cepat dan akurat  |
|                        | **FAISS (IndexFlatL2)** | **0.0088**         | 0.0129             | [620012, 256164, 296557, 563026, 631294] | Build super cepat, query lebih lambat dari Annoy |
|                        | **HNSW (hnswlib)**      | 206.2335           | 0.000529           | [620012, 256164, 296557, 563026, 631294] | Build paling berat, query cepat dan hasil akurat |
| **500 Ribu data (5D)** | **Annoy**               | 13.6088            | 0.000649           | [445259, 294505, 47134, 133576, 271926]  | Build sedang, query cepat                        |
|                        | **FAISS (IndexFlatL2)** | **0.0055**         | 0.006299           | [445259, 220653, 294505, 47134, 133576]  | Build tercepat, query moderat                    |
|                        | **HNSW (hnswlib)**      | 108.7801           | **0.000318**       | [445259, 220653, 294505, 47134, 133576]  | Build berat, query tercepat                      |
| **100 Ribu data (5D)** | **Annoy**               | 1.7680             | 0.000232           | [69511, 51244, 88736, 78055, 37909]      | Build ringan, query cepat                        |
|                        | **FAISS (IndexFlatL2)** | **0.0011**         | 0.000920           | [69511, 51244, 88736, 78055, 37909]      | Build tercepat, hasil identik                    |
|                        | **HNSW (hnswlib)**      | 14.7013            | **0.000211**       | [69511, 51244, 88736, 78055, 37909]      | Build agak berat, query tercepat dan akurat      |


In [None]:
import numpy as np
import time
from annoy import AnnoyIndex
import faiss
import hnswlib

# ===============================
# 1. Buat dataset 500K data 5D
# ===============================
n_data = 500_000   # bisa coba 100_000 dulu jika RAM terbatas
dim = 5
X = np.random.random((n_data, dim)).astype(np.float32)

# Query point
query = np.random.random((1, dim)).astype(np.float32)
k = 10

# ===============================
# 2. Annoy
# ===============================
print("=== Annoy ===")
ann_index = AnnoyIndex(dim, 'euclidean')

start = time.time()
for i in range(n_data):
    ann_index.add_item(i, X[i])
ann_index.build(10)  # 10 trees
build_time = time.time() - start

start = time.time()
neighbors = ann_index.get_nns_by_vector(query[0], k, include_distances=True)
query_time = time.time() - start

print("Build time:", build_time, "detik")
print("Query time:", query_time, "detik")
print("Neighbors:", neighbors[0][:5], "...")

# ===============================
# 3. FAISS (Flat Index)
# ===============================
print("\n=== FAISS (IndexFlatL2) ===")
faiss_index = faiss.IndexFlatL2(dim)

start = time.time()
faiss_index.add(X)
build_time = time.time() - start

start = time.time()
distances, indices = faiss_index.search(query, k)
query_time = time.time() - start

print("Build time:", build_time, "detik")
print("Query time:", query_time, "detik")
print("Neighbors:", indices[0][:5], "...")

# ===============================
# 4. HNSW (hnswlib)
# ===============================
print("\n=== HNSW (hnswlib) ===")
hnsw_index = hnswlib.Index(space='l2', dim=dim)

start = time.time()
hnsw_index.init_index(max_elements=n_data, ef_construction=200, M=16)
hnsw_index.add_items(X)
build_time = time.time() - start

hnsw_index.set_ef(50)

start = time.time()
labels, distances = hnsw_index.knn_query(query, k=k)
query_time = time.time() - start

print("Build time:", build_time, "detik")
print("Query time:", query_time, "detik")
print("Neighbors:", labels[0][:5], "...")


=== Annoy ===
Build time: 13.608788251876831 detik
Query time: 0.0006492137908935547 detik
Neighbors: [445259, 294505, 47134, 133576, 271926] ...

=== FAISS (IndexFlatL2) ===
Build time: 0.00547027587890625 detik
Query time: 0.006299257278442383 detik
Neighbors: [445259 220653 294505  47134 133576] ...

=== HNSW (hnswlib) ===
Build time: 108.78008031845093 detik
Query time: 0.00031757354736328125 detik
Neighbors: [445259 220653 294505  47134 133576] ...


In [None]:
import numpy as np
import time
from annoy import AnnoyIndex
import faiss
import hnswlib

# ===============================
# 1. Buat dataset 100K data 5D
# ===============================
n_data = 100_000   # bisa coba 100_000 dulu jika RAM terbatas
dim = 5
X = np.random.random((n_data, dim)).astype(np.float32)

# Query point
query = np.random.random((1, dim)).astype(np.float32)
k = 10

# ===============================
# 2. Annoy
# ===============================
print("=== Annoy ===")
ann_index = AnnoyIndex(dim, 'euclidean')

start = time.time()
for i in range(n_data):
    ann_index.add_item(i, X[i])
ann_index.build(10)  # 10 trees
build_time = time.time() - start

start = time.time()
neighbors = ann_index.get_nns_by_vector(query[0], k, include_distances=True)
query_time = time.time() - start

print("Build time:", build_time, "detik")
print("Query time:", query_time, "detik")
print("Neighbors:", neighbors[0][:5], "...")

# ===============================
# 3. FAISS (Flat Index)
# ===============================
print("\n=== FAISS (IndexFlatL2) ===")
faiss_index = faiss.IndexFlatL2(dim)

start = time.time()
faiss_index.add(X)
build_time = time.time() - start

start = time.time()
distances, indices = faiss_index.search(query, k)
query_time = time.time() - start

print("Build time:", build_time, "detik")
print("Query time:", query_time, "detik")
print("Neighbors:", indices[0][:5], "...")

# ===============================
# 4. HNSW (hnswlib)
# ===============================
print("\n=== HNSW (hnswlib) ===")
hnsw_index = hnswlib.Index(space='l2', dim=dim)

start = time.time()
hnsw_index.init_index(max_elements=n_data, ef_construction=200, M=16)
hnsw_index.add_items(X)
build_time = time.time() - start

hnsw_index.set_ef(50)

start = time.time()
labels, distances = hnsw_index.knn_query(query, k=k)
query_time = time.time() - start

print("Build time:", build_time, "detik")
print("Query time:", query_time, "detik")
print("Neighbors:", labels[0][:5], "...")


=== Annoy ===
Build time: 1.7679774761199951 detik
Query time: 0.00023245811462402344 detik
Neighbors: [69511, 51244, 88736, 78055, 37909] ...

=== FAISS (IndexFlatL2) ===
Build time: 0.001056671142578125 detik
Query time: 0.0009202957153320312 detik
Neighbors: [69511 51244 88736 78055 37909] ...

=== HNSW (hnswlib) ===
Build time: 14.701262712478638 detik
Query time: 0.0002105236053466797 detik
Neighbors: [69511 51244 88736 78055 37909] ...
