Based on https://www.pinecone.io/learn/series/faiss/vector-indexes/ and https://github.com/pgvector/pgvector

# Set up runtime

In [172]:
%pip install psycopg2
%pip install pgvector

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Collecting sqlalchemy
  Obtaining dependency information for sqlalchemy from https://files.pythonhosted.org/packages/cb/a7/3c8c8a36f336880e4dca47bf30d5c723384c40b67e649b35a582d6df45ef/SQLAlchemy-2.0.20-cp311-cp311-macosx_11_0_arm64.whl.metadata
  Downloading SQLAlchemy-2.0.20-cp311-cp311-macosx_11_0_arm64.whl.metadata (9.4 kB)
Downloading SQLAlchemy-2.0.20-cp311-cp311-macosx_11_0_arm64.whl (2.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m9.9 MB/s[0m eta [36m0:00:00[0mta [36m0:00:01[0m
[?25hInstalling collected packages: sqlalchemy
Successfully installed sqlalchemy-2.0.20
Note: you may need to restart the kernel to use updated packages.


In [229]:
import os
import shutil
import psycopg2
import numpy as np
import pgvector
import math
import pandas as pd
pd.set_option('display.max_colwidth', 190)
from pgvector.psycopg2 import register_vector
from psycopg2.extras import execute_values
import urllib.request as request
from contextlib import closing

# Load and prepare demo data

In [2]:
# first we download the Sift1M dataset
with closing(request.urlopen('ftp://ftp.irisa.fr/local/texmex/corpus/sift.tar.gz')) as r:
    with open('sift.tar.gz', 'wb') as f:
        shutil.copyfileobj(r, f)

In [3]:
import tarfile

# the download leaves us with a tar.gz file, we unzip it
tar = tarfile.open('sift.tar.gz', "r:gz")
tar.extractall()

In [133]:
import numpy as np

# now define a function to read the fvecs file format of Sift1M dataset
def read_fvecs(fp):
    a = np.fromfile(fp, dtype='int32')
    d = a[0]
    return a.reshape(-1, d + 1)[:, 1:].copy().view('float32')

In [134]:
# data we will search through
xb = read_fvecs('./sift/sift_base.fvecs')  # 1M samples
# also get some query vectors to search with
xq = read_fvecs('./sift/sift_query.fvecs')
# take just one query (there are many in sift_learn.fvecs)
xq = xq[0].reshape(1, xq.shape[1])

In [135]:
# The query vector
xq.shape

(1, 128)

In [136]:
# The vector search space
xb.shape

(1000000, 128)

In [137]:
# Make it a two column array to match the target table schema in postgres:
data_list = [(idx, np.array(row)) for idx, row in enumerate(xb)]

# Upload vector embeddings to Postgres

In [177]:
pguser=os.getenv("PGUSER")
pgpassword=os.getenv("PGPASSWORD")
pghost="localhost"
pgdatabase="postgres"
conn = psycopg2.connect(f"postgresql://{pguser}:{pgpassword}@{pghost}/{pgdatabase}")
conn.autocommit = True
register_vector(conn)
cur = conn.cursor()

In [376]:
cur.execute("SHOW server_version;");pd.DataFrame(cur.fetchall(), columns=['server_version'])

Unnamed: 0,server_version
0,15.4


In [115]:
cur.execute("""
CREATE TABLE embeddings (
            id int,
            embedding vector(128)
            );
""")

In [116]:
# Upload the vector embeddings
execute_values(cur, "INSERT INTO embeddings (id, embedding) VALUES %s", data_list)

In [370]:
# Sample of the uploaded vector embeddings
cur.execute("SELECT * FROM embeddings LIMIT 10;");pd.DataFrame(cur.fetchall(), columns=['id', 'embedding'])

Unnamed: 0,id,embedding
0,0,"[0.0, 16.0, 35.0, 5.0, 32.0, 31.0, 14.0, 10.0, 11.0, 78.0, 55.0, 10.0, 45.0, 83.0, 11.0, 6.0, 14.0, 57.0, 102.0, 75.0, 20.0, 8.0, 3.0, 5.0, 67.0, 17.0, 19.0, 26.0, 5.0, 0.0, 1.0, 22.0, 6..."
1,1,"[14.0, 35.0, 19.0, 20.0, 3.0, 1.0, 13.0, 11.0, 16.0, 119.0, 85.0, 5.0, 0.0, 5.0, 24.0, 26.0, 0.0, 27.0, 119.0, 13.0, 3.0, 9.0, 19.0, 0.0, 0.0, 11.0, 73.0, 9.0, 10.0, 3.0, 5.0, 0.0, 92.0,..."
2,2,"[0.0, 1.0, 5.0, 3.0, 44.0, 40.0, 20.0, 14.0, 10.0, 100.0, 63.0, 7.0, 44.0, 47.0, 9.0, 6.0, 7.0, 70.0, 114.0, 82.0, 11.0, 5.0, 3.0, 4.0, 30.0, 16.0, 33.0, 55.0, 5.0, 0.0, 0.0, 4.0, 104.0,..."
3,3,"[12.0, 47.0, 14.0, 25.0, 2.0, 3.0, 4.0, 7.0, 14.0, 122.0, 90.0, 7.0, 0.0, 0.0, 6.0, 14.0, 0.0, 24.0, 122.0, 22.0, 2.0, 4.0, 6.0, 0.0, 0.0, 10.0, 93.0, 10.0, 6.0, 6.0, 0.0, 0.0, 122.0, 31..."
4,4,"[1.0, 1.0, 0.0, 0.0, 14.0, 16.0, 30.0, 50.0, 2.0, 40.0, 81.0, 25.0, 65.0, 37.0, 7.0, 3.0, 13.0, 121.0, 121.0, 15.0, 2.0, 4.0, 4.0, 6.0, 0.0, 11.0, 71.0, 106.0, 2.0, 2.0, 0.0, 0.0, 92.0, ..."
5,5,"[48.0, 69.0, 9.0, 6.0, 2.0, 3.0, 7.0, 25.0, 64.0, 130.0, 33.0, 4.0, 0.0, 0.0, 0.0, 9.0, 1.0, 22.0, 66.0, 44.0, 14.0, 6.0, 0.0, 0.0, 1.0, 5.0, 7.0, 4.0, 19.0, 40.0, 1.0, 0.0, 130.0, 17.0,..."
6,6,"[0.0, 42.0, 55.0, 4.0, 16.0, 26.0, 24.0, 10.0, 1.0, 91.0, 100.0, 12.0, 42.0, 70.0, 8.0, 0.0, 17.0, 48.0, 111.0, 73.0, 19.0, 5.0, 2.0, 2.0, 68.0, 23.0, 13.0, 15.0, 6.0, 2.0, 1.0, 27.0, 98..."
7,7,"[16.0, 36.0, 10.0, 7.0, 0.0, 0.0, 31.0, 30.0, 11.0, 69.0, 90.0, 7.0, 0.0, 17.0, 38.0, 29.0, 0.0, 15.0, 112.0, 15.0, 5.0, 9.0, 41.0, 5.0, 0.0, 3.0, 22.0, 36.0, 32.0, 10.0, 6.0, 1.0, 112.0..."
8,8,"[8.0, 35.0, 11.0, 5.0, 13.0, 4.0, 4.0, 4.0, 9.0, 41.0, 110.0, 50.0, 8.0, 0.0, 3.0, 4.0, 20.0, 47.0, 110.0, 15.0, 3.0, 3.0, 1.0, 14.0, 5.0, 18.0, 50.0, 23.0, 6.0, 6.0, 4.0, 4.0, 79.0, 56...."
9,9,"[21.0, 13.0, 18.0, 11.0, 14.0, 6.0, 4.0, 14.0, 39.0, 54.0, 52.0, 10.0, 8.0, 14.0, 5.0, 2.0, 23.0, 76.0, 65.0, 10.0, 11.0, 23.0, 3.0, 0.0, 6.0, 10.0, 17.0, 5.0, 7.0, 21.0, 20.0, 13.0, 63...."


In [199]:
# The input vector to be used for the vector queries
pd.DataFrame({'embedding' : [xq]})

Unnamed: 0,embedding
0,"[[1.0, 3.0, 11.0, 110.0, 62.0, 22.0, 4.0, 0.0, 43.0, 21.0, 22.0, 18.0, 6.0, 28.0, 64.0, 9.0, 11.0, 1.0, 0.0, 0.0, 1.0, 40.0, 101.0, 21.0, 20.0, 2.0, 4.0, 2.0, 2.0, 9.0, 18.0, 35.0, 1.0, ..."


In [213]:
# Prepare query vector as input literal
query_vector = ",".join(row for row in xq.astype('str')[0])

# Exact nearest neighbor search
I.e., no index is being used.

## Exact Euclidean Distance

In [465]:
%%time
# Run a similarity query with euclidean distance:
sql = f"SELECT id, embedding <-> '[{query_vector}]' as distance FROM embeddings ORDER BY distance LIMIT 10;"
cur.execute(sql)
euclidean_baseline = pd.DataFrame(cur.fetchall(), columns=['id', 'distance'])
euclidean_baseline

CPU times: user 2.05 ms, sys: 3.03 ms, total: 5.08 ms
Wall time: 126 ms


Unnamed: 0,id,distance
0,932085,232.871209
1,934876,234.71472
2,561813,243.989754
3,708177,255.460369
4,706771,256.31426
5,695756,258.862898
6,435345,261.241268
7,701258,264.280154
8,455537,267.284493
9,872728,268.069021


In [466]:
# Show scan type used in embedding column in the query plan
df=pd.read_sql_query("EXPLAIN ANALYZE " + sql, engine)
df[df['QUERY PLAN'].str.contains('Scan')]

Unnamed: 0,QUERY PLAN
9,-> Parallel Seq Scan on embeddings (cost=0.00..76637.33 rows=416667 width=12) (actual time=0.108..97.531 rows=333333 loops=3)


## Exact Cosine Distance

In [471]:
%%time
# Run a similarity query with cosine distance:
sql = f"SELECT id, embedding <=> '[{query_vector}]' as distance FROM embeddings ORDER BY distance LIMIT 10;"
cur.execute(sql)
cosine_baseline = pd.DataFrame(cur.fetchall(), columns=['id', 'distance'])
cosine_baseline

CPU times: user 1.7 ms, sys: 2.09 ms, total: 3.78 ms
Wall time: 124 ms


Unnamed: 0,id,distance
0,932085,0.105112
1,934876,0.106705
2,561813,0.115366
3,708177,0.126512
4,706771,0.127323
5,695756,0.129983
6,435345,0.132089
7,701258,0.135184
8,455537,0.138418
9,872728,0.138856


In [468]:
# Show scan type used in embedding column in the query plan
df=pd.read_sql_query("EXPLAIN ANALYZE " + sql, engine)
df[df['QUERY PLAN'].str.contains('Scan')]

Unnamed: 0,QUERY PLAN
9,-> Parallel Seq Scan on embeddings (cost=0.00..76637.33 rows=416667 width=12) (actual time=0.114..99.913 rows=333333 loops=3)


## Exact Dot Product Distance

In [472]:
%%time
# Run a similarity query with dot product distance:
sql = f"SELECT id, embedding <#> '[{query_vector}]' as distance FROM embeddings ORDER BY distance LIMIT 10;"
cur.execute(sql)
dot_product_baseline = pd.DataFrame(cur.fetchall(), columns=['id', 'distance'])
dot_product_baseline

CPU times: user 2.05 ms, sys: 2.7 ms, total: 4.75 ms
Wall time: 126 ms


Unnamed: 0,id,distance
0,932085,-230843.0
1,934876,-230600.0
2,561813,-228242.0
3,708177,-225288.0
4,706771,-225144.0
5,695756,-224256.0
6,435345,-224214.0
7,701258,-223408.0
8,872728,-222827.0
9,455537,-222341.0


In [470]:
# Show scan type used in embedding column in the query plan
df=pd.read_sql_query("EXPLAIN ANALYZE " + sql, engine)
df[df['QUERY PLAN'].str.contains('Scan')]

Unnamed: 0,QUERY PLAN
9,-> Parallel Seq Scan on embeddings (cost=0.00..76637.33 rows=416667 width=12) (actual time=0.202..102.335 rows=333333 loops=3)


# IVFflat Index Nearest Neighbor Search

In [473]:
%%time
# Create three IVF indexes for euclidean, cosine and dot product distances
# In a separate session you can check progress using: SELECT phase, tuples_done, tuples_total FROM pg_stat_progress_create_index;
cur.execute("SET maintenance_work_mem TO '256 MB';")
number_of_lists = round(len(xb) / 1000)
cur.execute(f"CREATE INDEX euclidean_embeddings_idx ON embeddings USING ivfflat (embedding vector_l2_ops) WITH (lists = {number_of_lists});")
cur.execute(f"CREATE INDEX cosine_embeddings_idx ON embeddings USING ivfflat (embedding vector_cosine_ops) WITH (lists = {number_of_lists});")
cur.execute(f"CREATE INDEX dotproduct_embeddings_idx ON embeddings USING ivfflat (embedding vector_ip_ops) WITH (lists = {number_of_lists});")

CPU times: user 4.9 ms, sys: 4.92 ms, total: 9.82 ms
Wall time: 1min 20s


In [474]:
# Get index size
cur.execute("select pg_size_pretty (pg_indexes_size('embeddings'));");pd.DataFrame(cur.fetchall(), columns=['pg_size_pretty'])

Unnamed: 0,pg_size_pretty
0,947 MB


## Euclidean Distance with IVF

In [509]:
%%time
# Run a similarity query with euclidean distance and IVF index:
number_of_probes=round(math.sqrt(number_of_lists))
cur.execute(f"SET ivfflat.probes = {number_of_probes};")
sql = f"SELECT id, embedding <-> '[{query_vector}]' as distance FROM embeddings ORDER BY distance LIMIT 10;"
cur.execute(sql)
euclidean_ivf = pd.DataFrame(cur.fetchall(), columns=['id', 'distance'])
euclidean_ivf

CPU times: user 2.08 ms, sys: 2.66 ms, total: 4.73 ms
Wall time: 71.1 ms


Unnamed: 0,id,distance
0,932085,232.871209
1,934876,234.71472
2,561813,243.989754
3,708177,255.460369
4,706771,256.31426
5,435345,261.241268
6,701258,264.280154
7,455537,267.284493
8,872728,268.069021
9,36538,270.820974


In [510]:
# Show scan type used in embedding column in the query plan
df=pd.read_sql_query("EXPLAIN ANALYZE " + sql, engine)
df[df['QUERY PLAN'].str.contains('Scan')]

Unnamed: 0,QUERY PLAN
1,-> Index Scan using euclidean_embeddings_idx on embeddings (cost=105.00..6591.00 rows=1000000 width=12) (actual time=5.167..5.182 rows=10 loops=1)


In [511]:
# Calculate Recall Percentage for euclidean distance IVF lookup:
baseline = euclidean_baseline["distance"].to_numpy()
baseline[np.in1d(baseline, euclidean_ivf["distance"].to_numpy()).tolist()].size / baseline.size * 100

90.0

## Cosine Distance with IVF

In [516]:
%%time
# Run a similarity query with cosine distance and IVF index:
sql = f"SELECT id, embedding <=> '[{query_vector}]' as distance FROM embeddings ORDER BY distance LIMIT 10;"
cur.execute(sql)
cosine_ivf = pd.DataFrame(cur.fetchall(), columns=['id', 'distance'])
cosine_ivf

CPU times: user 1.71 ms, sys: 1.76 ms, total: 3.46 ms
Wall time: 70.6 ms


Unnamed: 0,id,distance
0,932085,0.105112
1,934876,0.106705
2,561813,0.115366
3,708177,0.126512
4,706771,0.127323
5,695756,0.129983
6,435345,0.132089
7,701258,0.135184
8,455537,0.138418
9,872728,0.138856


In [513]:
# Show scan type used in embedding column in the query plan
df=pd.read_sql_query("EXPLAIN ANALYZE " + sql, engine)
df[df['QUERY PLAN'].str.contains('Scan')]

Unnamed: 0,QUERY PLAN
1,-> Index Scan using cosine_embeddings_idx on embeddings (cost=120.00..6606.00 rows=1000000 width=12) (actual time=6.923..7.181 rows=10 loops=1)


In [514]:
# Calculate Recall Percentage for cosine distance IVF lookup:
baseline = cosine_baseline["distance"].to_numpy()
baseline[np.in1d(baseline, cosine_ivf["distance"].to_numpy()).tolist()].size / baseline.size * 100

100.0

## Dot Product Distance with IVF

In [519]:
%%time
# Run a similarity query with dot product distance and IVF index:
sql = f"SELECT id, embedding <#> '[{query_vector}]' as distance FROM embeddings ORDER BY distance LIMIT 10;"
cur.execute(sql)
dot_product_ivf = pd.DataFrame(cur.fetchall(), columns=['id', 'distance'])
dot_product_ivf

CPU times: user 2.07 ms, sys: 2.71 ms, total: 4.78 ms
Wall time: 69.7 ms


Unnamed: 0,id,distance
0,932085,-230843.0
1,934876,-230600.0
2,561813,-228242.0
3,708177,-225288.0
4,706771,-225144.0
5,435345,-224214.0
6,701258,-223408.0
7,872728,-222827.0
8,455537,-222341.0
9,562594,-221958.0


In [520]:
# Show scan type used in embedding column in the query plan
df=pd.read_sql_query("EXPLAIN ANALYZE " + sql, engine)
df[df['QUERY PLAN'].str.contains('Scan')]

Unnamed: 0,QUERY PLAN
1,-> Index Scan using dotproduct_embeddings_idx on embeddings (cost=105.00..6591.00 rows=1000000 width=12) (actual time=5.197..5.221 rows=10 loops=1)


In [521]:
# Calculate Recall Percentage for cosine distance IVF lookup:
baseline = dot_product_baseline["distance"].to_numpy()
baseline[np.in1d(baseline, dot_product_ivf["distance"].to_numpy()).tolist()].size / baseline.size * 100

90.0

In [522]:
# Clean up IVF indexes
cur.execute("DROP INDEX euclidean_embeddings_idx;")
cur.execute("DROP INDEX cosine_embeddings_idx;")
cur.execute("DROP INDEX dotproduct_embeddings_idx;")

# HNSW Index Nearest Neighbor Search

In [523]:
%%time
# Create three HNSW indexes for euclidean, cosine and dot product distances
# In a separate session you can check progress using: SELECT phase, tuples_done, tuples_total FROM pg_stat_progress_create_index;
cur.execute("SET maintenance_work_mem TO '256 MB';")
number_of_neighbors_per_vertex = 16 # default is 16
ef_construction = 65 # default is 64
cur.execute(f"""CREATE INDEX euclidean_embeddings_hnsw_idx ON embeddings USING hnsw (embedding vector_l2_ops)
                                                           WITH (m = {number_of_neighbors_per_vertex},
                                                                 ef_construction = {ef_construction});""")
cur.execute(f"""CREATE INDEX cosine_embeddings_hnsw_idx ON embeddings USING hnsw (embedding vector_cosine_ops)
                                                           WITH (m = {number_of_neighbors_per_vertex},
                                                                 ef_construction = {ef_construction});""")
cur.execute(f"""CREATE INDEX dotproduct_embeddings_hnsw_idx ON embeddings USING hnsw (embedding vector_ip_ops)
                                                           WITH (m = {number_of_neighbors_per_vertex},
                                                                 ef_construction = {ef_construction});""")

CPU times: user 256 ms, sys: 131 ms, total: 387 ms
Wall time: 1h 52min 43s


In [524]:
# Get index size
cur.execute("select pg_size_pretty (pg_indexes_size('embeddings'));");pd.DataFrame(cur.fetchall(), columns=['pg_size_pretty'])

Unnamed: 0,pg_size_pretty
0,2342 MB


## Euclidean Distance with HNSW

In [536]:
%%time
# Run a similarity query with euclidean distance and HNSW index:
ef_search = 50 # default is 40
cur.execute(f"SET hnsw.ef_search = {ef_search};")
sql = f"SELECT id, embedding <-> '[{query_vector}]' as distance FROM embeddings ORDER BY distance LIMIT 10;"
cur.execute(sql)
euclidean_hnsw = pd.DataFrame(cur.fetchall(), columns=['id', 'distance'])
euclidean_hnsw

CPU times: user 1.5 ms, sys: 1.89 ms, total: 3.38 ms
Wall time: 5.58 ms


Unnamed: 0,id,distance
0,932085,232.871209
1,934876,234.71472
2,561813,243.989754
3,708177,255.460369
4,706771,256.31426
5,695756,258.862898
6,435345,261.241268
7,701258,264.280154
8,455537,267.284493
9,872728,268.069021


In [537]:
# Show scan type used in embedding column in the query plan
df=pd.read_sql_query("EXPLAIN ANALYZE " + sql, engine)
df[df['QUERY PLAN'].str.contains('Scan')]

Unnamed: 0,QUERY PLAN
1,-> Index Scan using euclidean_embeddings_hnsw_idx on embeddings (cost=40.72..298256.72 rows=1000000 width=12) (actual time=3.026..3.047 rows=10 loops=1)


In [538]:
# Calculate Recall Percentage for euclidean distance IVF lookup:
baseline = euclidean_baseline["distance"].to_numpy()
baseline[np.in1d(baseline, euclidean_hnsw["distance"].to_numpy()).tolist()].size / baseline.size * 100

100.0

## Cosine Distance HNSW

In [539]:
%%time
# Run a similarity query with cosine distance and IVF index:
sql = f"SELECT id, embedding <=> '[{query_vector}]' as distance FROM embeddings ORDER BY distance LIMIT 10;"
cur.execute(sql)
cosine_hnsw = pd.DataFrame(cur.fetchall(), columns=['id', 'distance'])
cosine_hnsw

CPU times: user 1.48 ms, sys: 1.94 ms, total: 3.42 ms
Wall time: 6.97 ms


Unnamed: 0,id,distance
0,932085,0.105112
1,934876,0.106705
2,561813,0.115366
3,708177,0.126512
4,706771,0.127323
5,695756,0.129983
6,435345,0.132089
7,701258,0.135184
8,455537,0.138418
9,872728,0.138856


In [540]:
# Show scan type used in embedding column in the query plan
df=pd.read_sql_query("EXPLAIN ANALYZE " + sql, engine)
df[df['QUERY PLAN'].str.contains('Scan')]

Unnamed: 0,QUERY PLAN
1,-> Index Scan using cosine_embeddings_hnsw_idx on embeddings (cost=40.72..298256.72 rows=1000000 width=12) (actual time=4.981..5.001 rows=10 loops=1)


In [541]:
# Calculate Recall Percentage for cosine distance IVF lookup:
baseline = cosine_baseline["distance"].to_numpy()
baseline[np.in1d(baseline, cosine_hnsw["distance"].to_numpy()).tolist()].size / baseline.size * 100

100.0

## Dot Product HNSW

In [542]:
%%time
# Run a similarity query with dot product distance and IVF index:
sql = f"SELECT id, embedding <#> '[{query_vector}]' as distance FROM embeddings ORDER BY distance LIMIT 10;"
cur.execute(sql)
dot_product_hnsw = pd.DataFrame(cur.fetchall(), columns=['id', 'distance'])
dot_product_hnsw

CPU times: user 1.31 ms, sys: 1.81 ms, total: 3.12 ms
Wall time: 6.41 ms


Unnamed: 0,id,distance
0,932085,-230843.0
1,934876,-230600.0
2,561813,-228242.0
3,708177,-225288.0
4,706771,-225144.0
5,695756,-224256.0
6,435345,-224214.0
7,701258,-223408.0
8,872728,-222827.0
9,455537,-222341.0


In [543]:
# Show scan type used in embedding column in the query plan
df=pd.read_sql_query("EXPLAIN ANALYZE " + sql, engine)
df[df['QUERY PLAN'].str.contains('Scan')]

Unnamed: 0,QUERY PLAN
1,-> Index Scan using dotproduct_embeddings_hnsw_idx on embeddings (cost=40.72..298256.72 rows=1000000 width=12) (actual time=2.173..2.188 rows=10 loops=1)


In [544]:
# Calculate Recall Percentage for cosine distance IVF lookup:
baseline = dot_product_baseline["distance"].to_numpy()
baseline[np.in1d(baseline, dot_product_hnsw["distance"].to_numpy()).tolist()].size / baseline.size * 100

100.0

In [441]:
# Clean up HNSW indexes
cur.execute("DROP INDEX euclidean_embeddings_hnsw_idx;")
cur.execute("DROP INDEX cosine_embeddings_hnsw_idx;")
cur.execute("DROP INDEX dotproduct_embeddings_hnsw_idx;")