In [128]:
from operator import itemgetter
from typing import List
from chain import model, vector_search_as_retriever
from langchain_core.documents import Document
from langchain_core.output_parsers import StrOutputParser, JsonOutputParser
from langchain_core.runnables import (
    RunnableLambda,
    RunnableParallel,
    RunnablePassthrough,
)

from langchain_community.retrievers import WikipediaRetriever
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

# wiki = WikipediaRetriever(top_k_results=6, doc_content_chars_max=2000)
# wiki = vector_search_as_retriever
wiki = RunnableParallel(
    {
        "docs": itemgetter("input")
        # | RunnableLambda(retrieve_preprocess)
        | vector_search_as_retriever,
        "question": RunnablePassthrough(),
        "history": itemgetter("history")
    }
)

def format_docs(docs: List[Document]) -> str:
    """Convert Documents to a single string.:"""
    formatted = [
        f"Doc URL: {doc.metadata['url']}\nDoc Snippet: {doc.page_content}</context>"
        for doc in docs
    ]
    return "\n\n" + "\n\n".join(formatted)

human_template = """Answer the question based only on the following context:
{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant. Answer all questions to the best of your ability.",
        ),
        MessagesPlaceholder(variable_name="history"),
        ("human", human_template),
    ]
)

format = itemgetter("docs") | RunnableLambda(format_docs)
# subchain for generating an answer once we've done retrieval
answer = prompt | model | StrOutputParser()

def parse_output(output:dict):
    return output['answer'] # + format_docs(output['docs'])

# complete chain that calls wiki -> formats docs to string -> runs answer subchain -> returns just the answer and retrieved docs.
chain = (
    # RunnableParallel(question=RunnablePassthrough(), docs=wiki)
    wiki
    .assign(context=format)
    .assign(answer=answer)
    .pick(["answer", "context"]) # | RunnableLambda(parse_output)
)

In [129]:
print(chain.input_schema.schema())
print(chain.output_schema.schema())

{'title': 'RunnableParallel<docs,question,history>Input', 'type': 'object', 'properties': {'input': {'title': 'Input'}, 'history': {'title': 'History'}}}
{'title': 'RunnableSequenceOutput', 'type': 'object', 'properties': {'context': {'title': 'Context', 'type': 'string'}, 'answer': {'title': 'Answer', 'type': 'string'}}}


In [130]:
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

store = {}


def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]


with_message_history = RunnableWithMessageHistory(
    chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="history",
)

In [131]:
await with_message_history.ainvoke(
    {"input": "what is dlt"},
    {"configurable": {"session_id": "unused"}},
)



{'answer': 'Based on the provided context, dlt is the Python module for Delta Live Tables. Delta Live Tables is a managed service for building and deploying reliable data pipelines on the Databricks platform. The dlt module provides Python functions for creating materialized views and streaming tables in Delta Live Tables.',
 'context': '\n\nArticle Title: https://docs.databricks.com/en/delta-live-tables/python-ref.html\nArticle Snippet: Import the dlt Python module\nDelta Live Tables Python functions are defined in the dlt module. Your pipelines implemented with the Python API must import this module:  \nimport dlt\n\nCreate a Delta Live Tables materialized view or streaming table\n\nArticle Title: https://docs.databricks.com/en/partners/prep/dbt-cloud.html\nArticle Snippet: Connect to dbt Cloud \xa0  \ndbt (data build tool) is a development environment that enables data analysts and data engineers to transform data by simply writing select statements. dbt handles turning these select

In [142]:
chunks = {'context': None, 'answer': ""}
async for chunk in with_message_history.astream({"input": "what is dlt"},
    {"configurable": {"session_id": "unused"}}):
    if 'context' in chunk.keys():
        chunks['context'] = chunk['context']
        continue
    print(chunk['answer'], end='', flush=True)
    chunks['answer'] += chunk['answer']



Based on the provided context, "dlt" refers to the Python module for Delta Live Tables. Delta Live Tables is a managed service for building and deploying reliable data pipelines on the Databricks platform. It provides features like automatic testing, monitoring, and error handling for data pipelines. The "dlt" module contains functions and classes for defining and executing Delta Live Tables pipelines in Python.



In [143]:
chunks

{'context': '\n\nArticle Title: https://docs.databricks.com/en/delta-live-tables/python-ref.html\nArticle Snippet: Import the dlt Python module\nDelta Live Tables Python functions are defined in the dlt module. Your pipelines implemented with the Python API must import this module:  \nimport dlt\n\nCreate a Delta Live Tables materialized view or streaming table\n\nArticle Title: https://docs.databricks.com/en/partners/prep/dbt-cloud.html\nArticle Snippet: Connect to dbt Cloud \xa0  \ndbt (data build tool) is a development environment that enables data analysts and data engineers to transform data by simply writing select statements. dbt handles turning these select statements into tables and views. dbt compiles your code into raw SQL and then runs that code on the specified database in Databricks. dbt supports collaborative coding patterns and best practices such as version control, documentation, and modularity.  \ndbt does not extract or load data. dbt focuses on the transformation s

In [99]:
with_message_history.get_graph().print_ascii()

                                     +------------------------+                                       
                                     | Parallel<history>Input |                                       
                                     +------------------------+                                       
                                          ***           ***                                           
                                        **                 **                                         
                                      **                     **                                       
                       +------------------------+        +-------------+                              
                       | Lambda(_enter_history) |        | Passthrough |                              
                       +------------------------+        +-------------+                              
                                          ***           ***              