https://python.langchain.com/en/latest/modules/indexes/vectorstores/examples/qdrant.html

In [1]:
import os
import qdrant_client
import sys
sys.path.insert(0, '../')
import local_secrets as secrets
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Qdrant
from langchain.vectorstores import Pinecone
from langchain.document_loaders import TextLoader
from llama_index.readers.qdrant import QdrantReader
from llama_index import GPTListIndex
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.chains.question_answering import load_qa_chain
import ipywidgets as widgets
from IPython.display import clear_output
import pinecone

In [2]:
os.environ['OPENAI_API_KEY'] = secrets.techstyle_openai_key
os.environ['PINECONE_API_KEY'] = secrets.techstyle_pinecone_api_key

In [9]:
loader = TextLoader('../state_of_the_union.txt')
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)
embeddings = OpenAIEmbeddings()
qdrant = Qdrant.from_documents(docs, embeddings, url='http://localhost:6333', collection_name="state_of_the_union",)

In [94]:
reader = QdrantReader(host="localhost")
query_vector=[0.3, 0.7]*768
documents = reader.load_data(collection_name="github_llama_index", query_vector=query_vector, limit=5)
index = GPTListIndex.from_documents(documents)

In [88]:
[doc.extra_info for doc in documents]

[{'file_name': 'mock_embed_model.py',
  'file_path': 'llama_index/token_counter/mock_embed_model.py'},
 {'file_name': 'sql_query.py',
  'file_path': 'llama_index/indices/struct_store/sql_query.py'},
 {'file_name': 'base.py',
  'file_path': 'llama_index/indices/struct_store/base.py'},
 {'file_name': 'pandas_query.py',
  'file_path': 'llama_index/indices/struct_store/pandas_query.py'},
 {'file_name': 'schema.py',
  'file_path': 'llama_index/indices/query/schema.py'}]

In [92]:
len(documents[0].get_embedding())

1536

In [7]:
# https://qdrant.tech/articles/langchain-integration/
os.environ['OPENAI_API_KEY'] = secrets.techstyle_openai_key
llm = ChatOpenAI()

# qdrant
#client = qdrant_client.QdrantClient(url='http://localhost:6333')
#retriever = Qdrant(client=client, collection_name='github_llama_index', embeddings=OpenAIEmbeddings(), content_payload_key='text').as_retriever(search_kwargs={"k": 4})

# pinecone
client = pinecone.init(api_key=secrets.techstyle_pinecone_api_key, environment='us-east-1-aws')
retriever = Pinecone(index=pinecone.Index('ssk'), embedding_function=OpenAIEmbeddings, text_key='text').as_retriever()

chain = load_qa_chain(llm=llm, chain_type='stuff') 

question = 'What are the classes in the llama_index package?'
question = 'How is the llama_index Node class used'
question = 'Show an example of how to use the llama_index Node class'
question = 'How can several llama_indexes be composed?'
question = 'Explain llama_index nodes'

# documents = retriever.get_relevant_documents(question)
#chain.run(input_documents=documents, question=question)

In [8]:
def on_button_clicked(b):
    with output:
        clear_output()
        documents = retriever.get_relevant_documents(question.value)
        print(chain.run(input_documents=documents, question=question.value))
output = widgets.Output()
question = widgets.Text()
button = widgets.Button(description='Chat')
button.on_click(on_button_clicked)
widgets.VBox([question, button, output])

VBox(children=(Text(value=''), Button(description='Chat', style=ButtonStyle()), Output()))

In [25]:
chain.document_prompt

PromptTemplate(input_variables=['page_content'], output_parser=None, partial_variables={}, template='{page_content}', template_format='f-string', validate_template=True)

In [103]:
len(documents)

5

In [19]:
retriever = Pinecone(index=pinecone.Index('ssk'), embedding_function=OpenAIEmbeddings.embed_query, text_key='text').as_retriever()
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True)
result = qa({"query": 'How can several llama_indexes be composed? Show an example.'})

TypeError: Pinecone.__init__() got an unexpected keyword argument 'text_field'

In [None]:
retriever = Qdrant(client=client, collection_name='github_llama_index', embeddings=OpenAIEmbeddings(), content_payload_key='text').as_retriever()
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="map_reduce", retriever=retriever, return_source_documents=True)
result = qa({"query": 'How can several llama_indexes be composed? Show an example.'})

In [46]:
index = pinecone.Index('ssk')
print(index.describe_index_stats())
vectorstore = Pinecone(index=index, embedding_function=OpenAIEmbeddings.embed_query, text_key='text')
documents = vectorstore.similarity_search('How can several llama_indexes be composed?')

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 335}},
 'total_vector_count': 335}


TypeError: OpenAIEmbeddings.embed_query() missing 1 required positional argument: 'text'

In [40]:
import langchain
print(langchain.__version__)

0.0.171


In [52]:
index.configuration.

TypeError: 'Configuration' object is not callable