In [6]:
from sqlalchemy import create_engine
import pandas as pd
from dotenv import load_dotenv
import os
import sys
sys.path.append("../functions")  # Add parent directory to path
from connect_openai_api import connect_to_openai_client
# === Load environment variables from .env file ===
load_dotenv()

# === Set up database connection parameters ===
user = os.getenv("POSTGRES_USER")
password = os.getenv("POSTGRES_PASSWORD")
host = os.getenv("POSTGRES_HOST")
name = os.getenv("POSTGRES_NAME")
port = os.getenv("POSTGRES_PORT")

# === Create SQLAlchemy engine ===
engine = create_engine(f"postgresql+psycopg2://{user}:{password}@{host}:{port}/{name}")

# === Test connection: Read table or query ===
query = "SELECT count(*) from vectors;"
df = pd.read_sql(query, engine)

print(df.head())


   count
0  21800


In [24]:
df['embedding'] = df['embedding'].apply(
    lambda s: [float(x) for x in s.strip('{}').split(',')]
)

In [7]:
from openai import OpenAI
client = connect_to_openai_client()
response = client.embeddings.create(
    input = "What is the capital of France?",
    model="text-embedding-3-small"
)

Connected to OpenAI API client successfully.


In [22]:
response.data[0].embedding
query = f"SELECT * FROM vectors ORDER BY embedding = '{response.data[0].embedding}' LIMIT 1;"

In [23]:
df = pd.read_sql(query, engine)
df.head()

Unnamed: 0,id,qa_text,embedding
0,2301,"In round 2, with a value of 400, a daily doubl...","{0.055482495576143265,0.01343670766800642,0.02..."


In [24]:
print(df['qa_text'].values[0])

In round 2, with a value of 400, a daily double value of 0, in the category ZOOLOGY, the question was From Latin to gnaw, there are more of this group than all other mammals combined and the answer was rodents. This was on 1984-12-05.


In [25]:
len(response.data[0].embedding)

1536

In [None]:
import openai
import psycopg2
import numpy as np

# Set up OpenAI API (replace with your actual API key)
openai.api_key = "your_openai_api_key"

# Connect to the database
conn = psycopg2.connect("dbname=your_database user=your_username")
cur = conn.cursor()

# Create a table for our documents
cur.execute("""
    CREATE TABLE IF NOT EXISTS documents (
        id SERIAL PRIMARY KEY,
        content TEXT,
        embedding vector(1536)
    )
""")

# Function to get embeddings from OpenAI
def get_embedding(text):
    response = openai.embeddings.create(input=text, model="text-embedding-ada-002")
    return response['data'][0]['embedding']

# Function to add a document
def add_document(content):
    embedding = get_embedding(content)
    cur.execute("INSERT INTO documents (content, embedding) VALUES (%s, %s)", (content, embedding))
    conn.commit()

# Function to search for similar documents
def search_documents(query, limit=5):
    query_embedding = get_embedding(query)
    cur.execute("""
        SELECT content, embedding <-> %s AS distance
        FROM documents
        ORDER BY distance
        LIMIT %s
    """, (query_embedding, limit))
    return cur.fetchall()

# Add some sample documents
sample_docs = [
    "The quick brown fox jumps over the lazy dog.",
    "Python is a high-level programming language.",
    "Vector databases are essential for modern AI applications.",
    "PostgreSQL is a powerful open-source relational database.",
]
for doc in sample_docs:
    add_document(doc)

# Perform a search
search_query = "Tell me about programming languages"
results = search_documents(search_query)
print(f"Search results for: '{search_query}'")
for i, (content, distance) in enumerate(results, 1):
    print(f"{i}. {content} (Distance: {distance:.4f})")

# Clean up
cur.close()
conn.close()