## Qdrant

- Vector Database
- Open Source
- An alternative to Pinecone database
- Managed Services

www.qdrant.tech

### Setup
- Setup 1GB Free cluster in cloud service
- Vector database persistent in time
- Database available from a URL
- Data available via simple APIs

In [None]:
# https://87cf3a23-f1db-424a-9d5f-da212d4074aa.us-east4-0.gcp.cloud.qdrant.io

In [None]:
# api_key = ''

In [None]:
# Cluster > Collection > Vector Store > Point or Vector

In [None]:
# Vector is a numerical representation of your text

In [None]:
!pip install qdrant_client openai tiktoken langchain-openai langchain_community langchain

In [None]:
import os
import qdrant_client

from langchain_community.vectorstores import Qdrant
from langchain_openai import OpenAIEmbeddings

In [None]:
# # Create a qdrant client

# os.environ['QDRANT_HOST'] = ''
# os.environ['QDRANT_API_KEY'] = ''
os.environ['QDRANT_HOST']


In [None]:
client = qdrant_client.QdrantClient(
    os.getenv('QDRANT_HOST'),
    #api_key = os.getenv('QDRANT_API_KEY')
)

In [None]:
os.getenv('QDRANT_HOST')

In [None]:
from qdrant_client.http import models

In [None]:
# create a collection (A database with vectors)
# name, size or dimension, distance calculations = COSINE

os.environ["QDRANT_COLLECTION_NAME"] = 'collection1'

vector_config = models.VectorParams(
    size = 1536, # same as openAI
    distance = models.Distance.COSINE
)

client.recreate_collection(
    collection_name = os.getenv("QDRANT_COLLECTION_NAME"),
    vectors_config = vector_config
)

In [None]:
client.get_collections()

In [None]:
# Create a vector store to store the documents

In [None]:
os.environ["OPENAI_API_KEY"]

In [None]:
# Qdrant client - client, collection name, embeddings

#os.environ["OPENAI_API_KEY"] = ""

embeddings = OpenAIEmbeddings()

vector_store = Qdrant(
    client = client,
    collection_name = os.getenv("QDRANT_COLLECTION_NAME"),
    embeddings = embeddings
)

In [None]:
# Add a document to vector store

In [None]:
# long documents > split the document into chunks > 1000 characters

In [None]:
with open('tsla_news.txt') as f:
  raw_text = f.read()
  print(raw_text)

In [None]:
from langchain.text_splitter import CharacterTextSplitter

def get_chunks(text):
  text_splitter = CharacterTextSplitter(
      separator = "\n",
      chunk_size = 200,
      chunk_overlap = 40,
      length_function = len
  )

  chunks = text_splitter.split_text(text)

  return chunks


In [None]:
texts = get_chunks(raw_text)
print(texts)

In [None]:
len(texts)

In [None]:
vector_store.add_texts(texts)

In [None]:
# query the data

In [None]:
# retriever

from langchain.llms import OpenAI
from langchain.chains import RetrievalQA
from langchain.schema import retriever


In [None]:
qa = RetrievalQA.from_chain_type(
    llm = OpenAI(),
    chain_type = "stuff",
    retriever = vector_store.as_retriever()
)

In [None]:
query = "What was the tesla earning price"
response = qa.run(query)
print(response)

In [None]:
query = "Give me the top 5 main key points of the news"
response = qa.run(query)
print(response)