In [1]:
import logging
import os
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import OpenAI
from langchain.vectorstores import Qdrant
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain.chains.query_constructor.base import AttributeInfo
from langchain.chains import RetrievalQA
from qdrant_client import QdrantClient


# see what langchain is doing
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger("langchain")

In [2]:
openai_api_key = !security find-generic-password -s 'OpenAI API Key' -w
os.environ["OPENAI_API_KEY"] = openai_api_key[0]

In [3]:
embeddings_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)
llm = OpenAI()
client = QdrantClient("localhost", port=6333)

  from .autonotebook import tqdm as notebook_tqdm
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2
INFO:sentence_transformers.SentenceTransformer:Use pytorch device: cpu
DEBUG:httpx:load_ssl_context verify=True cert=None trust_env=True http2=False
DEBUG:httpx:load_verify_locations cafile='/Users/lukelefebure/Library/Caches/pypoetry/virtualenvs/ungd-rag-j4EQDb0v-py3.10/lib/python3.10/site-packages/certifi/cacert.pem'


In [4]:
db = Qdrant(
    client,
    collection_name="speech_parts",
    embeddings=embeddings_model
)
retriever = db.as_retriever()
qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=retriever, return_source_documents=True)

In [5]:
response = qa("summarize the discussion about climate change")

Batches: 100%|██████████| 1/1 [00:01<00:00,  1.42s/it]
DEBUG:httpcore.connection:connect_tcp.started host='localhost' port=6333 local_address=None timeout=5.0 socket_options=None
DEBUG:httpcore.connection:connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x10769ded0>
DEBUG:httpcore.http11:send_request_headers.started request=<Request [b'POST']>
DEBUG:httpcore.http11:send_request_headers.complete
DEBUG:httpcore.http11:send_request_body.started request=<Request [b'POST']>
DEBUG:httpcore.http11:send_request_body.complete
DEBUG:httpcore.http11:receive_response_headers.started request=<Request [b'POST']>
DEBUG:httpcore.http11:receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'transfer-encoding', b'chunked'), (b'content-encoding', b'gzip'), (b'vary', b'accept-encoding, Origin, Access-Control-Request-Method, Access-Control-Request-Headers'), (b'content-type', b'application/json'), (b'date', b'Sun, 29 Oct 2023 01:02:42 GMT')])
INFO:httpx:

In [6]:
# schema
metadata_field_info=[
    AttributeInfo(
        name="country",
        description="The country of the speaker", 
        type="string", 
    ),
    AttributeInfo(
        name="iso_code",
        description="The ISO country code of the country of the speaker", 
        type="string", 
    ),
    AttributeInfo(
        name="post",
        description="The position of the speaker", 
        type="string",
    ),
    AttributeInfo(
        name="session",
        description="The session number",
        type="float"
    ),
    AttributeInfo(
        name="speaker",
        description="The name of the speaker",
        type="string"
    ),
    AttributeInfo(
        name="year",
        description="The year of the speech",
        type="float"
    ),
]
document_content_description = "A snippet of a speech given at the United Nations General Debate"

In [11]:
retriever = SelfQueryRetriever.from_llm(
    llm,
    db,
    document_content_description,
    metadata_field_info,
    verbose=True
)

In [46]:
from langchain.prompts import PromptTemplate
prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Cite the year and country of the speaker in your response.

{context}

Question: {question}
Helpful Answer:"""
document_prompt = PromptTemplate.from_template("Representative from {country} speaking in {year}\n{page_content}")
prompt = PromptTemplate.from_template(prompt_template)
qa = RetrievalQA.from_chain_type(
    llm=OpenAI(), chain_type="stuff", retriever=retriever, return_source_documents=True,
    chain_type_kwargs={"document_prompt": document_prompt, "prompt": prompt}
)
qa("what was discussed about climate change during the 1980s")

DEBUG:openai:message='Request to OpenAI API' method=post path=https://api.openai.com/v1/completions
DEBUG:openai:api_version=None data='{"prompt": ["Your goal is to structure the user\'s query to match the request schema provided below.\\n\\n<< Structured Request Schema >>\\nWhen responding use a markdown code snippet with a JSON object formatted in the following schema:\\n\\n```json\\n{\\n    \\"query\\": string \\\\ text string to compare to document contents\\n    \\"filter\\": string \\\\ logical condition statement for filtering documents\\n}\\n```\\n\\nThe query string should contain only text that is expected to match the contents of documents. Any conditions in the filter should not be mentioned in the query as well.\\n\\nA logical condition statement is composed of one or more comparison and logical operation statements.\\n\\nA comparison statement takes the form: `comp(attr, val)`:\\n- `comp` (eq | lt | lte | gt | gte): comparator\\n- `attr` (string):  name of attribute to ap

{'query': 'what was discussed about climate change during the 1980s',
 'result': ' During the 1980s, representatives from the UK and France discussed the possibility of climatic change caused by the increase in the greenhouse gases, leading to a global heat trap, during 1988 and 1989. They also discussed the need for a serious debate on the subject within the United Nations and the need to conclude an international agreement on climatic changes in 1989. In 1985, they discussed the need for increased dialog among the main industrialized countries.',
 'source_documents': [Document(page_content='The possibility of climatic change caused by the increase in the greenhouse gases, leading to a global heat trap, has become a real concern. The increases predicted in global temperatures would have substantial effects, within the next few decades, on life and human society. Even small variations would have great consequences in a crowded world.', metadata={'country': 'UK', 'iso_code': 'GBR', 'pos

In [45]:
qa.combine_documents_chain.llm_chain

LLMChain(prompt=PromptTemplate(input_variables=['context', 'question'], template="Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\n{context}\n\nQuestion: {question}\nHelpful Answer:"), llm=OpenAI(client=<class 'openai.api_resources.completion.Completion'>, openai_api_key='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', openai_api_base='', openai_organization='', openai_proxy=''))

In [30]:
qa = RetrievalQA.from_chain_type(
    llm=OpenAI(), chain_type="stuff", retriever=retriever, return_source_documents=True,
    chain_type_kwargs={"document_prompt": ""},
)

In [31]:
qa.combine_documents_chain

StuffDocumentsChain(llm_chain=LLMChain(prompt=PromptTemplate(input_variables=['context', 'question'], template="Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\n{context}\n\nQuestion: {question}\nHelpful Answer:"), llm=OpenAI(client=<class 'openai.api_resources.completion.Completion'>, openai_api_key='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', openai_api_base='', openai_organization='', openai_proxy='')), document_variable_name='context')

In [26]:
resp = qa("what was discussed about the arab-israel conflict during the 1970s")

DEBUG:openai:message='Request to OpenAI API' method=post path=https://api.openai.com/v1/completions
DEBUG:openai:api_version=None data='{"prompt": ["Your goal is to structure the user\'s query to match the request schema provided below.\\n\\n<< Structured Request Schema >>\\nWhen responding use a markdown code snippet with a JSON object formatted in the following schema:\\n\\n```json\\n{\\n    \\"query\\": string \\\\ text string to compare to document contents\\n    \\"filter\\": string \\\\ logical condition statement for filtering documents\\n}\\n```\\n\\nThe query string should contain only text that is expected to match the contents of documents. Any conditions in the filter should not be mentioned in the query as well.\\n\\nA logical condition statement is composed of one or more comparison and logical operation statements.\\n\\nA comparison statement takes the form: `comp(attr, val)`:\\n- `comp` (eq | lt | lte | gt | gte): comparator\\n- `attr` (string):  name of attribute to ap

In [27]:
resp

{'query': 'what was discussed about the arab-israel conflict during the 1970s',
 'result': ' The Arab-Israel conflict during the 1970s was discussed in terms of the October war strengthening the confidence of the Arab people in victory, the dispute being concerned with security and life, the root-cause being that the consequences of the aggression had still not been eliminated, and the Arab people being determined to shake off super-Power domination and recover their sacred territories and regain their national rights.',
 'source_documents': [Document(page_content='104.\tThe Arab people brought about an excellent situation through fighting the October war. This war has strengthened the confidence of the Arab people in victory over the Israeli aggressor and broken the stalemate that was deliberately created by the superPowers. Now, a disengagement has been effected between Egypt and Israel and between Syria and Israel, but the Middle East question is still far from being settled.', meta

In [28]:
from langchain.prompts import PromptTemplate

prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}

Question: {question}
Helpful Answer:"""
PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

In [29]:
PROMPT

PromptTemplate(input_variables=['context', 'question'], template="Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\n{context}\n\nQuestion: {question}\nHelpful Answer:")