In [12]:
import tqdm
import psycopg2
from pgvector.psycopg2 import register_vector

In [13]:
conn = psycopg2.connect( host="localhost",
                        database="vectordb",
                        user="testuser",
                        password="testpwd")

Enable the extension

In [14]:
cur = conn.cursor()
cur.execute('CREATE EXTENSION IF NOT EXISTS vector')

Register the vector type with your connection or cursor

In [15]:
register_vector(conn)

Create a table

In [16]:
cur.execute('CREATE TABLE employees (id bigserial PRIMARY KEY, name text, email text, address text, describe text, embedding vector(1024))')

Config and index for table

In [17]:
cur.execute("SET maintenance_work_mem = '4GB'")
cur.execute("CREATE INDEX ON employees USING ivfflat (embedding vector_l2_ops) WITH (lists = 1000)")
cur.execute("SET ivfflat.probes = 64")
cur.execute("SET max_parallel_maintenance_workers = 6;")
conn.commit() 

Fake data for table

In [18]:
from faker import Faker
import numpy as np

number_data = 100000
fake = Faker()
embeddings = np.random.rand(number_data,1024)
for i in tqdm.tqdm(range(number_data)):
    name = fake.name()
    email = fake.email()
    address = fake.address()
    describe = fake.text()
    cur.execute('INSERT INTO employees (name, email, address, describe, embedding) VALUES (%s, %s, %s, %s, %s)', (name, email, address, describe, embeddings[i]))
conn.commit() 

100%|██████████| 100000/100000 [05:38<00:00, 295.02it/s]


Test Query

In [20]:
cur.execute('SELECT id, name, email, address, describe, embedding <-> %s AS similarity FROM employees ORDER BY similarity ASC LIMIT 5;', (embeddings[10],))
cur.fetchall()

[(11,
  'Jo Robinson',
  'marthanorton@example.com',
  'USCGC Mendoza\nFPO AA 85238',
  'Wife research mother senior yeah series within. Ability major matter big art.\nIdea when country rise. Us card goal him play stop cost. Reach instead indicate amount.',
  0.0),
 (99842,
  'Elizabeth Osborn',
  'larrybrown@example.com',
  '89785 Melissa Ports\nChristopherview, PA 19690',
  'Gun whatever least guess physical point. Guy push room relate office believe way.\nDream know suffer star trouble however both town. International plant material black.',
  12.19498843459566),
 (77612,
  'Kimberly Huff',
  'anthony89@example.net',
  '043 Catherine Station\nPort Larry, HI 26049',
  'Someone there seek I factor provide. Hand their soon remain.\nGive various treat myself leg run. These word blood drug involve.\nBlack for though stuff late. Drive worker billion goal western.',
  12.196697501030377),
 (52190,
  'Kristen Martinez',
  'mgates@example.com',
  '000 Joseph Hollow Apt. 991\nNew Robertfurt, 

Close Connection

In [21]:
cur.close()
conn.close()

Benchmark

In [22]:
conn = psycopg2.connect( host="localhost",
                        database="vectordb",
                        user="testuser",
                        password="testpwd")
cur = conn.cursor()

In [23]:
import random
num_query = 10000
for _ in tqdm.tqdm(range(num_query)):
    cur.execute('SELECT id, name, email, address, describe, embedding <-> %s AS similarity FROM employees ORDER BY similarity ASC LIMIT 5;', (embeddings[random.randint(0,num_query)],))

100%|██████████| 10000/10000 [01:15<00:00, 133.07it/s]


In [None]:
cur.close()
conn.close()