In [1]:
from pgvector.sqlalchemy import Vector
from sentence_transformers import SentenceTransformer
from sqlalchemy import create_engine, insert, select, text, Integer, String, Text
from sqlalchemy.orm import declarative_base, mapped_column, Session

ModuleNotFoundError: No module named 'pgvector'

In [None]:

# Create a connection to the PostgreSQL database
engine = create_engine('postgresql+psycopg://localhost/pgvector_example')

# Connect to the database and create the required extension
with engine.connect() as conn:
    conn.execute(text('CREATE EXTENSION IF NOT EXISTS vector'))
    conn.commit()

# Define the base class for SQLAlchemy models
Base = declarative_base()

# Define the Document model
class Document(Base):
    __tablename__ = 'document'

    # Define the columns of the document table
    id = mapped_column(Integer, primary_key=True)
    content = mapped_column(Text)
    embedding = mapped_column(Vector(384))

# Drop and create the document table in the database
Base.metadata.drop_all(engine)
Base.metadata.create_all(engine)

# Define a list of sentences
sentences = [
    'The dog is barking',
    'The cat is purring',
    'The bear is growling'
]

# Load the SentenceTransformer model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Generate embeddings for the sentences
embeddings = model.encode(sentences)

# Create a list of dictionaries representing the documents
documents = [dict(content=sentences[i], embedding=embedding) for i, embedding in enumerate(embeddings)]

# Create a new session to interact with the database
session = Session(engine)

# Insert the documents into the database
session.execute(insert(Document), documents)

# Retrieve a document by its ID
doc = session.get(Document, 1)

# Find the most similar documents to the given document
neighbors = session.scalars(select(Document).filter(Document.id != doc.id).order_by(Document.embedding.cosine_distance(doc.embedding)).limit(5))

# Print the content of the similar documents
for neighbor in neighbors:
    print(neighbor.content)
