# Demo

## Prerequisites
- Docker
- Docker-Compose


## Start Container

In [None]:
!docker compose up -d

## Create Connection

In [None]:
import psycopg

connection = psycopg.connect('postgresql://postgres:root@127.0.0.1:5432', dbname='407-ki', autocommit=True)

## Enable Extension

In [None]:
from pgvector.psycopg import register_vector

connection.execute('CREATE EXTENSION IF NOT EXISTS vector')
register_vector(connection)

## Create Table

In [None]:
connection.execute('DROP TABLE IF EXISTS demo')
connection.execute('CREATE TABLE demo (id bigserial, name text, embedding vector(100))')

## Import File

In [None]:
import pandas as pd

file = '~/Code/dewiki_20180420_100d.txt'
df = pd.read_csv(file, delimiter='\s+', header=None, skiprows=1)

df.rename(columns={0: 'name'}, inplace=True)

# Convert the 100 columns into a single column containing a numpy array
embeddings = df[df.columns[1:]].to_numpy()
df['embedding'] = embeddings.tolist()

# Only keep name and embedding columns
df = df[['name', 'embedding']]

## Import Data

In [None]:
print(f'Importing {len(df)} rows...')

# cur = connection.cursor()
with connection.cursor().copy('COPY demo (name, embedding) FROM STDIN WITH (FORMAT BINARY)') as copy:
    # use set_types for binary copy
    # https://www.psycopg.org/psycopg3/docs/basic/copy.html#binary-copy
    copy.set_types(['text', 'vector'])

    for index, row in df.iterrows():
        # show progress
        if index % 10000 == 0:
            print('.', end='', flush=True)

        copy.write_row([str(row['name']), row['embedding']])

        # flush data
        while connection.pgconn.flush() == 1:
            print('F', end='', flush=True)
            pass

print('\n\nSuccess!')

## Create Index

In [None]:
connection.execute("SET maintenance_work_mem = '16GB'")
connection.execute('SET max_parallel_maintenance_workers = 7')
connection.execute('CREATE INDEX ON demo USING hnsw (embedding vector_cosine_ops)')

## Querying Data