In [None]:
from dotenv import load_dotenv
load_dotenv()


In [None]:
from llama_index.core.indices.vector_store.base import VectorStoreIndex
from llama_index.core import StorageContext, SimpleDirectoryReader
from llama_index.core.readers.file.base import Document
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.llms.ollama import Ollama
from llama_index.core.node_parser import SentenceSplitter
import qdrant_client
import os
from llama_index.core import Settings
from qdrant_client.http.models import VectorParams, Distance

embed_model = OllamaEmbedding(model_name="nomic-embed-text:latest")
llm = Ollama(model="llama3.2:latest")
Settings.embed_model = embed_model
Settings.chunk_size = 512
Settings.chunk_overlap = 50
Settings.llm = llm

In [None]:
documents = SimpleDirectoryReader(input_files=["data/azure-data-factory.txt"]).load_data(show_progress=True)
print(len(documents))
print(f"Document Metadata: {documents[0].metadata}")

In [None]:
print(documents[0].text)

In [None]:
splitter = SentenceSplitter(chunk_size=1024, chunk_overlap=100)
nodes = splitter.get_nodes_from_documents(documents)
print(f"Length of nodes : {len(nodes)}\n")
print(f"get the content for node 0 :{nodes[0].get_content(metadata_mode='all')}")

In [None]:
client = qdrant_client.QdrantClient(
    url=os.environ["QDRANT_URL"],
    api_key=os.environ["QDRANT_API_KEY"],
)

if not client.collection_exists("azure-data-factory"):
    qdrant_collection = client.create_collection(collection_name="azure-data-factory", vectors_config=VectorParams(
        size=768,
        distance=Distance.COSINE
    ))
    print(f"Collection created:\n{qdrant_collection.model_dump_json()}")
else:
    qdrant_collection = client.get_collection("azure-data-factory")
    print(f"Collection already exists:\n{qdrant_collection.model_dump_json()}")

vector_store = QdrantVectorStore(client=client, collection_name="azure-data-factory", batch_size=64, parallel=1)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

In [None]:
vector_index = VectorStoreIndex(nodes=nodes, storage_context=storage_context, show_progress=True)

In [None]:
query = vector_index.as_query_engine()

In [None]:
response = query.query("What is the Copy Activity?")

In [None]:
from IPython.display import display, Markdown
display(Markdown(response.response))