In [9]:
import logging
import os
from langchain.embeddings import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.vectorstores import Qdrant
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain.chains.query_constructor.base import AttributeInfo
from langchain.chains import RetrievalQA
from qdrant_client import QdrantClient


# see what langchain is doing
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger("langchain")

In [10]:
openai_api_key = !security find-generic-password -s 'OpenAI API Key' -w
os.environ["OPENAI_API_KEY"] = openai_api_key[0]

In [11]:
embeddings_model = OpenAIEmbeddings()
llm = OpenAI()
client = QdrantClient("localhost", port=6333)

DEBUG:httpx:load_ssl_context verify=True cert=None trust_env=True http2=False
DEBUG:httpx:load_verify_locations cafile='/Users/lukelefebure/Library/Caches/pypoetry/virtualenvs/ungd-rag-j4EQDb0v-py3.10/lib/python3.10/site-packages/certifi/cacert.pem'


In [21]:
db = Qdrant(
    client,
    collection_name="speech_parts_subset",
    embeddings=OpenAIEmbeddings()
)
retriever = db.as_retriever()
qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=retriever, return_source_documents=True)

In [23]:
response = qa("summarize the discussion about climate change")

DEBUG:openai:message='Request to OpenAI API' method=post path=https://api.openai.com/v1/embeddings
DEBUG:openai:api_version=None data='{"input": [[1264, 5730, 553, 279, 10430, 922, 10182, 2349]], "model": "text-embedding-ada-002", "encoding_format": "base64"}' message='Post details'
DEBUG:urllib3.util.retry:Converted retries value: 2 -> Retry(total=2, connect=None, read=None, redirect=None, status=None)
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): api.openai.com:443
DEBUG:urllib3.connectionpool:https://api.openai.com:443 "POST /v1/embeddings HTTP/1.1" 200 None
DEBUG:openai:message='OpenAI API response' path=https://api.openai.com/v1/embeddings processing_ms=50 request_id=ea9a602ff15099acc44bd6bcd166ba70 response_code=200
DEBUG:httpcore.connection:connect_tcp.started host='localhost' port=6333 local_address=None timeout=5.0 socket_options=None
DEBUG:httpcore.connection:connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x11b5d1330>
DEBUG:h

In [17]:
# schema
metadata_field_info=[
    AttributeInfo(
        name="country",
        description="The country of the speaker", 
        type="string", 
    ),
    AttributeInfo(
        name="iso_code",
        description="The ISO country code of the country of the speaker", 
        type="string", 
    ),
    AttributeInfo(
        name="post",
        description="The position of the speaker", 
        type="string",
    ),
    AttributeInfo(
        name="session",
        description="The session number",
        type="float"
    ),
    AttributeInfo(
        name="speaker",
        description="The name of the speaker",
        type="string"
    ),
    AttributeInfo(
        name="year",
        description="The year of the speech",
        type="float"
    ),
]
document_content_description = "A snippet of a speech given at the United Nations General Debate"

In [30]:
retriever = SelfQueryRetriever.from_llm(
    llm,
    db,
    document_content_description,
    metadata_field_info,
    chain_kwargs={"allowed_operators": ["AND", "OR", "GT", "LTE"]},
    verbose=True
)

In [31]:
docs = retriever.get_relevant_documents("what was discussed about climate change in the 1980s")

DEBUG:openai:message='Request to OpenAI API' method=post path=https://api.openai.com/v1/completions
DEBUG:openai:api_version=None data='{"prompt": ["Your goal is to structure the user\'s query to match the request schema provided below.\\n\\n<< Structured Request Schema >>\\nWhen responding use a markdown code snippet with a JSON object formatted in the following schema:\\n\\n```json\\n{\\n    \\"query\\": string \\\\ text string to compare to document contents\\n    \\"filter\\": string \\\\ logical condition statement for filtering documents\\n}\\n```\\n\\nThe query string should contain only text that is expected to match the contents of documents. Any conditions in the filter should not be mentioned in the query as well.\\n\\nA logical condition statement is composed of one or more comparison and logical operation statements.\\n\\nA comparison statement takes the form: `comp(attr, val)`:\\n- `comp` (eq | lt | lte | gt | gte): comparator\\n- `attr` (string):  name of attribute to ap

In [32]:
docs

[]