In [1]:
import setup
setup.init_django()

In [2]:
from decouple import config
from blog.models import BlogPost, EMBEDDING_LENGTH
from blog import services

In [3]:
qs=BlogPost.objects.filter(can_delete=True)
qs

<QuerySet [<BlogPost: BlogPost object (5)>, <BlogPost: BlogPost object (6)>, <BlogPost: BlogPost object (7)>, <BlogPost: BlogPost object (8)>]>

!pip install llama-index sqlalchemy llama-index-vector-stores-postgres

In [4]:
#!pip install llama-index sqlalchemy llama-index-vector-stores-postgres

In [5]:
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding

In [6]:
LLM_MODEL = config("LLM_MODEL", default="gpt-4o") # not in use use
EMEDDING_LENGTH = config("EMEDDING_LENGTH", default=1536, cast=int)
EMEDDING_MODEL =config("EMEDDING_MODEL", default="text-embedding-3-small")
OPENAI_API_KEY = config("OPENAI_API_KEY")

llm = OpenAI(model=LLM_MODEL, api_key=OPENAI_API_KEY)
embed_model = OpenAIEmbedding(model=EMEDDING_MODEL, api_key=OPENAI_API_KEY)

In [7]:
from llama_index.core import Settings

Settings.llm = llm
Settings.embed_model = embed_model

In [8]:
vector_db_name="vector_db"
vector_db_table_name="blogpost"

In [9]:
DATABASE_URL = config("DATABASE_URL_POOL")
if DATABASE_URL.startswith("postgres://"):
    DATABASE_URL = DATABASE_URL.replace("postgres://", "postgresql://", 1)

In [10]:
DATABASE_URL

'postgresql://neondb_owner:npg_eixHPS8AM7hZ@ep-holy-paper-a66bzvpf-pooler.us-west-2.aws.neon.tech/vector_db?sslmode=require'

In [11]:
# create a new database
from sqlalchemy import create_engine, text

engine = create_engine(DATABASE_URL, isolation_level="AUTOCOMMIT")
with engine.connect() as connection:
    result = connection.execute(text("SELECT 1 FROM pg_database WHERE datname = :db_name"), {"db_name": vector_db_name})
    db_exists = result.scalar() == 1
    if not db_exists:
        session.execute(text('CREATE EXTENSION IF NOT EXISTS vector'))
        connection.execute(text(f"CREATE DATABASE {vector_db_name}"))

In [12]:
from sqlalchemy import make_url
from llama_index.vector_stores.postgres import PGVectorStore

url = make_url(DATABASE_URL)
vector_store = PGVectorStore.from_params(
    database=vector_db_name,
    host=url.host,
    password=url.password,
    port=url.port or 5432,
    user=url.username,
    table_name=vector_db_table_name,
    embed_dim=EMEDDING_LENGTH,  
    hnsw_kwargs={
        "hnsw_m": 16,
        "hnsw_ef_construction": 64,
        "hnsw_ef_search": 40,
        "hnsw_dist_method": "vector_cosine_ops",
    },
)

In [13]:
from llama_index.core import VectorStoreIndex, StorageContext

storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_vector_store(
    vector_store, storage_context=storage_context, show_progress=True
)
query_engine = index.as_query_engine()

In [14]:
query_engine.query("My query")

Response(response="I'm sorry, I need more information to provide a relevant answer to your query. Could you please provide more details or clarify your question?", source_nodes=[NodeWithScore(node=TextNode(id_='599d5175-1af2-445b-9ab8-ce74ceb89bdc', embedding=None, metadata={'pk': 7, 'title': 'Blog Post 3'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='7', node_type='4', metadata={'pk': 7, 'title': 'Blog Post 3'}, hash='0093a1df42857a4b9c9651d4f5f50e9365ebbd53deb1e17a3c4b62ca4597a29f')}, metadata_template='{key}: {value}', metadata_separator='\n', text='The weather is very hot', mimetype='text/plain', start_char_idx=0, end_char_idx=23, metadata_seperator='\n', text_template='{metadata_str}\n\n{content}'), score=0.17025399715465306), NodeWithScore(node=TextNode(id_='9a508a87-ec8c-436a-83e7-8badd5906e3d', embedding=None, metadata={'pk': 8, 'title': 'Blog Post 4'}, excluded_embed_metadata_keys=[], e

In [15]:
from llama_index.core import Document
qs=BlogPost.objects.filter(can_delete=True)
docs=[]
for obj in qs:
    docs.append(Document(text=f"{obj.get_embedding_text_raw()}", doc_id=str(obj.id), metadata={"pk": obj.pk, "title":obj.title}))
docs                
    

[Document(id_='5', embedding=None, metadata={'pk': 5, 'title': 'Blog Post 1'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text_resource=MediaResource(embeddings=None, data=None, text='The dog jumped over the cat', path=None, url=None, mimetype=None), image_resource=None, audio_resource=None, video_resource=None, text_template='{metadata_str}\n\n{content}'),
 Document(id_='6', embedding=None, metadata={'pk': 6, 'title': 'Blog Post 2'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text_resource=MediaResource(embeddings=None, data=None, text='The cat jumped over the dog', path=None, url=None, mimetype=None), image_resource=None, audio_resource=None, video_resource=None, text_template='{metadata_str}\n\n{content}'),
 Document(id_='7', embedding=None, metadata={'pk': 7, 'title': 'Blog Post 3'}, e

In [16]:
for doc in docs:
    index.delete_ref_doc(f"{doc.id_}", delete_from_docstore=True)
    index.insert(doc)
    

Parsing nodes:   0%|          | 0/1 [00:00<?, ?it/s]

Parsing nodes:   0%|          | 0/1 [00:00<?, ?it/s]

Parsing nodes:   0%|          | 0/1 [00:00<?, ?it/s]

Parsing nodes:   0%|          | 0/1 [00:00<?, ?it/s]

In [17]:
query_engine = index.as_query_engine()
response=query_engine.query("The dog jumped")

In [18]:
response

Response(response='The phrase "The dog jumped" is part of the content in "Blog Post 1."', source_nodes=[NodeWithScore(node=TextNode(id_='1fb59f6d-cab8-4447-b293-4dff00516d24', embedding=None, metadata={'pk': 5, 'title': 'Blog Post 1'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='5', node_type='4', metadata={'pk': 5, 'title': 'Blog Post 1'}, hash='03f7b786b4e2d04897226456692b333e8d23c7a95cf0b30a6a7217a31498fc8f')}, metadata_template='{key}: {value}', metadata_separator='\n', text='The dog jumped over the cat', mimetype='text/plain', start_char_idx=0, end_char_idx=27, metadata_seperator='\n', text_template='{metadata_str}\n\n{content}'), score=0.578400696611364), NodeWithScore(node=TextNode(id_='27b10a3f-0e0e-4b86-b44a-bb2416740d72', embedding=None, metadata={'pk': 6, 'title': 'Blog Post 2'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '

In [19]:
for k in response.metadata.keys():
    for subk, v in response.metadata[k].items():
        print(subk,v)

pk 5
title Blog Post 1
pk 6
title Blog Post 2


In [20]:
str(response.response)

'The phrase "The dog jumped" is part of the content in "Blog Post 1."'