Explain the code

## Setup initial base

In [1]:
from dotenv import load_dotenv
import os
load_dotenv()

True

In [2]:
import os
## Create a connection string to your PostgreSQL database
connection_string = f"postgresql://{os.getenv('POSTGRES_USERNAME_DB', 'postgres')}:{os.getenv('POSTGRES_PASSWORD_DB')}@{os.getenv('POSTGRES_DATABASE_HOST', 'localhost')}:5432/netflix_db"
print(connection_string)

postgresql://postgres:Post!234@localhost:5432/netflix_db


In [None]:
from langchain_postgres.vectorstores import PGVector
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.schema import Document

# Set up the connection string and embedding function
embedding_function = OpenAIEmbeddings()

documents = [
    Document(page_content="Prerequisites"),
    Document(page_content="How to install pgvector"),
    Document(page_content="Basic usage of pgvector"),
    Document(page_content="Indexing vector data with pgvector"),
    Document(page_content="pgvector integration with other tools"),
    Document(page_content="Building a sample application with pgvector and OpenAI")
]
# Create a PGVector instance
vector_store = PGVector.from_documents(
    documents=documents,
    embedding=embedding_function,
    connection=connection_string
)


PGVector.create_collection()


In [4]:
# Perform a similarity search
query = "pgvector installation steps"
results = vector_store.similarity_search(query)
results


[Document(id='45e34107-28cf-4689-bd28-535127377d0b', metadata={}, page_content='How to install pgvector'),
 Document(id='b8aea090-b17d-4615-8e7a-608ea7036a28', metadata={}, page_content='How to install pgvector'),
 Document(id='0f243ed7-41c8-4f9c-9b23-13752f89fdc0', metadata={}, page_content='pgvector integration with other tools'),
 Document(id='eb818bb0-a5e5-4e0a-9020-ea47e51e378e', metadata={}, page_content='pgvector integration with other tools')]

Build Chatbot with LangChain

Setup a method to get embedding

In [13]:
import google.generativeai as genai
from tenacity import (retry, stop_after_attempt, wait_random_exponential)

@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
def get_embedding(input):
    result = genai.embed_content(
        model="models/embedding-001",
        content=input
    )
    return result["embedding"]

Insert data into database

In [19]:
import psycopg2

# Connect to the database
conn = psycopg2.connect(
    dbname="netflix_db",
    user=os.getenv("POSTGRES_USERNAME_DB", "postgres"),
    password=os.getenv("POSTGRES_PASSWORD_DB", "Post!234"),
    host=os.getenv("POSTGRES_DATABASE_HOST", "localhost"),
    port=os.getenv("POSTGRES_DATABASE_PORT", "5432")
)

cur = conn.cursor()

In [None]:
import psycopg2

# Create a table for our documents
cur.execute("""
    CREATE TABLE IF NOT EXISTS documents (
        id SERIAL PRIMARY KEY,
        content TEXT,
        embedding vector(768)
    )
""")

# Function to add a document
def add_document(content):
    embedding = get_embedding(content)
    cur.execute("INSERT INTO documents (content, embedding) VALUES (%s, %s)", (content, embedding))
    conn.commit()

# Add some sample documents
sample_docs = [
    "The quick brown fox jumps over the lazy dog.",
    "Python is a high-level programming language.",
    "Vector databases are essential for modern AI applications.",
    "PostgreSQL is a powerful open-source relational database.",
]

for doc in sample_docs:
    add_document(doc)

# Clean up
cur.close()
conn.close()
    

In [20]:



# Function to search for similar documents
def search_documents(query, limit=5):
    query_embedding = get_embedding(query)
    cur.execute("""
        SELECT content, embedding <=> %s AS distance
        FROM documents
        ORDER BY distance
        LIMIT %s
    """, (str(list(query_embedding)), limit))
    return cur.fetchall()

# Reconnect to the database
conn = psycopg2.connect(
    dbname="netflix_db",
    user=os.getenv("POSTGRES_USERNAME_DB", "postgres"),
    password=os.getenv("POSTGRES_PASSWORD_DB", "Post!234"),
    host=os.getenv("POSTGRES_DATABASE_HOST", "localhost"),
    port=os.getenv("POSTGRES_DATABASE_PORT", "5432")
)
cur = conn.cursor()
# Perform a search
search_query = "Tell me about programming languages"
results = search_documents(search_query)
print(f"Search results for: '{search_query}'")
for i, (content, distance) in enumerate(results, 1):
    print(f"{i}. {content} (Distance: {distance:.4f})")

# Clean up
cur.close()
conn.close()

Search results for: 'Tell me about programming languages'
1. Python is a high-level programming language. (Distance: 0.2740)
2. Python is a high-level programming language. (Distance: 0.2740)
3. Python is a high-level programming language. (Distance: 0.2740)
4. Vector databases are essential for modern AI applications. (Distance: 0.3924)
5. Vector databases are essential for modern AI applications. (Distance: 0.3924)
