# Install pgvector to Postgre SQL 
- Follow the git hub https://github.com/pgvector/pgvector to build and install the vector extension for PostgreSQL server
- Create the vector extension in PostgreSQL via admin user
- Get started with example in python https://github.com/pgvector/pgvector-python/blob/master/examples/openai/example.py

- In Python, also need to install `pgvector` package

In [None]:
from dotenv import load_dotenv
import os
load_dotenv()

for key, value in os.environ.items():
    print(f"{key} = {value}")


In [None]:
# %pip install pgvector

import os
import numpy as np

import psycopg2
from pgvector.psycopg2 import register_vector

# Connect to PostgreSQL (update credentials as needed)
conn = psycopg2.connect(
    dbname="netflix_db",
    user=os.getenv("POSTGRES_USERNAME_DB", "postgres"),
    password=os.getenv("POSTGRES_PASSWORD_DB", "Post!234"),
    host=os.getenv("POSTGRES_DATABASE_HOST", "localhost"),
    port=os.getenv("POSTGRES_DATABASE_PORT", "5432")
)

cur = conn.cursor()
cur.execute('CREATE EXTENSION IF NOT EXISTS vector')
register_vector(conn)

cur.execute('DROP TABLE IF EXISTS documents')
cur.execute('CREATE TABLE documents (id bigserial PRIMARY KEY, content text, embedding vector(1536))')
conn.commit()
cur.close()




Insert rows into `documents` table

In [None]:
from openai import OpenAI
client = OpenAI()

def embed(input):
    response = client.embeddings.create(input=input, model='text-embedding-3-small')
    return [v.embedding for v in response.data]


input = [
    'The dog is barking',
    'The cat is purring',
    'The bear is growling'
]
embeddings = embed(input)
conn.rollback()  # Reset transaction state if previous error occurred
cur = conn.cursor()
for content, embedding in zip(input, embeddings):
    cur.execute('INSERT INTO documents (content, embedding) VALUES (%s, %s)', (content, np.array(embedding)))
conn.commit()
cur.close()


Make search to find text similar with query = 'tiger'

In [None]:
query = 'tiger'
query_embedding = embed([query])[0]
cur = conn.cursor()

cur.execute('SELECT content FROM documents ORDER BY embedding <=> %s LIMIT 5', (np.array(query_embedding),))
result = cur.fetchall()
for row in result:
    print(row[0])
cur.close()