In [1]:
import json
import numpy as np
import psycopg2
from pgvector.psycopg2 import register_vector
from sentence_transformers import SentenceTransformer

# Pgvector
- Use sentence transformer for embeddings
- Other codes: [here](https://github.com/pgvector/pgvector-python/blob/master/examples/sentence_transformers/example.py)

In [14]:
class VectorStore():
    def __init__(self, database="rag_db", user="postgres", password="password", model="all-MiniLM-L6-v2"):
        self.conn = psycopg2.connect(host="localhost", port="5432", database=database, user=user, password=password)
        self.cur = self.conn.cursor()
        register_vector(self.conn)
        self.model = SentenceTransformer(model)
    
    def terminate(self):
        self.cur.close()
        self.conn.close()
    def get_embedding(self, text):
        return self.model.encode(text)
    def add_document(self, content):
        embedding = self.get_embedding(content)
        self.cur.execute("INSERT INTO items (name, content, embedding384) VALUES (%s, %s, %s)", ("demo", content, embedding))
        self.conn.commit()
    def search_documents_by_distance(self, query, limit=5):
        query_embedding = self.get_embedding(query)
        self.cur.execute("""
            SELECT content, embedding384 <-> %s AS distance
            FROM items
            ORDER BY distance
            LIMIT %s
        """, (query_embedding, limit))
        return self.cur.fetchall()
    def search_documents_by_cosine_distance(self, query, limit=5):
        query_embedding = self.get_embedding(query)
        self.cur.execute("""
            SELECT content, embedding384 <=> %s AS cosine_distance
            FROM items
            ORDER BY cosine_distance
            LIMIT %s
        """, (query_embedding, limit))
        return self.cur.fetchall()
    
vec = VectorStore(database="rag_db", user="postgres", password="password", model="all-MiniLM-L6-v2")

In [3]:
with open("data/sample.json", "r") as f:
    sample_docs = json.load(f)["docs"]
    print(sample_docs)
for doc in sample_docs:
    vec.add_document(doc)

['Ash finally became the Pokémon Champion after years of hard work and determination.', 'Pikachu’s Thunderbolt attack saved the day during the intense battle.', 'The bond between trainers and their Pokémon is truly inspiring and heartwarming.', 'Team Rocket’s schemes often lead to chaos and trouble for innocent Pokémon.', 'A trainer abandoned their Pokémon, leaving it heartbroken and confused.', 'The forest was destroyed after a fierce battle between two powerful legendary Pokémon.', 'Charizard’s wings spread wide as it soared high above the clouds.', 'Trainers gathered in the arena, ready for the upcoming tournament.', 'A Poké Ball rolled to a stop on the ground, the capture uncertain.']


In [12]:
# Perform a search (distance)
search_query = "Tell me more about pokeball"
results = vec.search_documents_by_distance(search_query)
print(f"Search results for: '{search_query}'")
for i, (content, distance) in enumerate(results, 1):
    print(f"{i}. {content} (Distance: {distance:.4f})")

Search results for: 'Tell me more about pokeball'
1. A Poké Ball rolled to a stop on the ground, the capture uncertain. (Distance: 0.9112)
2. Team Rocket’s schemes often lead to chaos and trouble for innocent Pokémon. (Distance: 1.0872)
3. Ash finally became the Pokémon Champion after years of hard work and determination. (Distance: 1.1024)
4. The bond between trainers and their Pokémon is truly inspiring and heartwarming. (Distance: 1.1175)
5. A trainer abandoned their Pokémon, leaving it heartbroken and confused. (Distance: 1.1713)


In [18]:
# Perform a search (cosine distance)
search_query = "Tell me more about pokeball"
results = vec.search_documents_by_cosine_distance(search_query)
print(f"Search results for: '{search_query}'")
for i, (content, distance) in enumerate(results, 1):
    print(f"{i}. {content} (Cosine Distance: {distance:.4f})")

Search results for: 'Tell me more about pokeball'
1. A Poké Ball rolled to a stop on the ground, the capture uncertain. (Cosine Distance: 0.4152)
2. Team Rocket’s schemes often lead to chaos and trouble for innocent Pokémon. (Cosine Distance: 0.5910)
3. Ash finally became the Pokémon Champion after years of hard work and determination. (Cosine Distance: 0.6076)
4. The bond between trainers and their Pokémon is truly inspiring and heartwarming. (Cosine Distance: 0.6244)
5. A trainer abandoned their Pokémon, leaving it heartbroken and confused. (Cosine Distance: 0.6860)


# FAISS
- [FAISS vector store](https://medium.com/@amrita.thakur/understanding-faiss-vector-store-and-its-advantages-cdc7b54afe47#:~:text=In%20the%20age%20of%20information,of%20dense%20vectors%20at%20scale.)
- [https://www.pinecone.io/learn/series/faiss/faiss-tutorial/](https://www.pinecone.io/learn/series/faiss/faiss-tutorial/)
- [Similarity Search](https://www.datacamp.com/blog/faiss-facebook-ai-similarity-search)