In [3]:
import os 
from dotenv import load_dotenv
import nest_asyncio 

load_dotenv()
nest_asyncio.apply()

In [4]:
llamaparse_api_key = os.getenv("LLAMA_CLOUD_API_KEY")

In [6]:
from llama_parse import LlamaParse
parse_documents = LlamaParse(api_key=llamaparse_api_key, result_type="markdown").load_data(["./data/School_Census_Report_2021.pdf"])

Started parsing the file under job_id 8a6a8643-8e5b-4a22-bb13-9eec040ae034
....

In [7]:
import qdrant_client

qdrant_url = os.getenv("QDRANT_URL")
qdrant_api_key = os.getenv("QDRANT_API_KEY")
client = qdrant_client.QdrantClient(api_key=qdrant_api_key, url=qdrant_url)

In [8]:


from llama_index.embeddings.huggingface import HuggingFaceEmbedding

modelPath = "sentence-transformers/all-MiniLM-l6-v2"
embed_model = HuggingFaceEmbedding(model_name=modelPath)

In [9]:
from llama_index.llms.groq import Groq

groq_api_key = os.getenv("GROQ_API_KEY")
llm = Groq(model="mixtral-8x7b-32768", api_key=groq_api_key)


In [10]:
from llama_index.core import Settings

Settings.embed_model = embed_model
Settings.llm = llm

In [14]:
from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.vector_stores.qdrant import QdrantVectorStore


vector_store = QdrantVectorStore(client=client, collection_name='llamaparse_qdrant_rag')
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(documents=parse_documents, storage_context=storage_context, show_progress=True)


Parsing nodes:   0%|          | 0/1 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/136 [00:00<?, ?it/s]



In [19]:
# create a query engine for the index
query_engine = index.as_query_engine()

# query the engine
query = "How many goverment schools? "
response = query_engine.query(query)
print(response)

The context information provided indicates that there are 10,146 government schools in total.
