##### Command to run ->
1. docker info
2. docker pull qdrant/qdrant 
3. docker run -p 6333:6333 -v .:/qdrant/storage qdrant/qdrant

In [8]:
import os, yaml
from qdrant_client import QdrantClient
from langchain.vectorstores import Qdrant
from langchain.chat_models import AzureChatOpenAI
from langchain.document_loaders import PyPDFLoader
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [2]:
with open('cadentials.yaml') as f:
    credentials = yaml.load(f, Loader=yaml.FullLoader)

os.environ['OPENAI_API_KEY'] = credentials['OPENAI_API_KEY']
os.environ['OPENAI_API_TYPE'] = credentials['OPENAI_API_TYPE']
os.environ['AZURE_OPENAI_ENDPOINT'] = credentials['AD_OPENAI_API_BASE']
os.environ['OPENAI_API_VERSION'] = credentials['AD_OPENAI_API_VERSION']
os.environ["COHERE_API"] = credentials['COHERE_API']
os.environ['ENGINE'] = credentials['ENGINE']

In [3]:
embedding = HuggingFaceBgeEmbeddings(
                                    model_name="BAAI/bge-small-en-v1.5",
                                    model_kwargs={'device': 'mps'},
                                    encode_kwargs={'normalize_embeddings': True}
                                    )

llm = AzureChatOpenAI(
                    deployment_name=credentials['AD_DEPLOYMENT_ID'],
                    model_name=credentials['AD_ENGINE'],
                    temperature=0.9, 
                    max_tokens=256
                    )

  from .autonotebook import tqdm as notebook_tqdm


In [7]:
loader = PyPDFLoader("data/political/UN SDG.pdf")
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(
                                                chunk_size=1000,
                                                chunk_overlap=100
                                                )
texts = text_splitter.split_documents(documents)

embeddings = HuggingFaceBgeEmbeddings(
                                    model_name="BAAI/bge-small-en",
                                    model_kwargs={'device': 'mps'},
                                    encode_kwargs={'normalize_embeddings': False}
                                    )

url = "http://localhost:6333"
qdrant = Qdrant.from_documents(
                            texts,
                            embeddings,
                            url=url,
                            prefer_grpc=False,
                            collection_name="vector_db"
                            )

print("Vector DB Successfully Created!")

Vector DB Successfully Created!


In [None]:
client = QdrantClient(
                    url="http://localhost:6333", 
                    prefer_grpc=False
                    )

db = Qdrant(
            client=client, 
            embeddings=embeddings, 
            collection_name="vector_db"
            )

query = "<Pass Your Query>"

docs = db.similarity_search_with_score(query=query, k=5)
for i in docs:
    doc, score = i
    print({"score": score, "content": doc.page_content, "metadata": doc.metadata})