In [13]:
from chain import chain, vector_search_as_retriever, model
import asyncio
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableParallel, RunnablePassthrough, RunnableLambda
from langchain_core.messages import AIMessage, HumanMessage
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.memory import ChatMessageHistory

In [6]:
input_example = {
    "messages": [
        {
            "role": "user",
            "content": "What is DLT?",
        },
    ]
}

In [21]:
async for chunk in chain.astream(input_example):
    print(chunk, end="", flush=True)

Based on the context provided, DLT refers to Delta Live Tables, which is a platform for building and deploying data pipelines in Databricks. It provides a Python API for defining data pipelines, and the functions for this API are defined in the `dlt` Python module. Therefore, to use Delta Live Tables in your Python code, you should import the `dlt` module as follows:

```python
import dlt
```

Delta Live Tables focuses on the transformation step of data pipelines, using a "transform after load" architecture. It assumes that you already have a copy of your data in your database, and it provides tools for transforming that data into tables and views using select statements. Delta Live Tables is available as a cloud-based service called dbt Cloud, as well as a local version called dbt Core. Both versions can use hosted git repositories, allowing you to create a dbt project and make it available to your dbt Cloud and dbt Core users.

In [3]:
human_template = """Answer the question based only on the following context:
{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant. Answer all questions to the best of your ability.",
        ),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", human_template),
    ]
)

output_parser = StrOutputParser()

def retrieve_preprocess(input:dict):
    return input['input']
retrieval = RunnableLambda()

# VS Index is getting a dict when it should be getting a str. Need to figure out how to preprocess this dict with a RunnableLambda
setup_and_retrieval = RunnableParallel(
    {"context": vector_search_as_retriever, "question": RunnablePassthrough()}
)
chain = setup_and_retrieval | prompt | model | output_parser

In [4]:
from langchain_core.runnables.history import RunnableWithMessageHistory

demo_ephemeral_chat_history_for_chain = ChatMessageHistory()

chain_with_message_history = RunnableWithMessageHistory(
    chain,
    lambda session_id: demo_ephemeral_chat_history_for_chain,
    input_messages_key="input",
    output_messages_key="query_text"
    history_messages_key="chat_history",
)

In [12]:
chain_with_message_history.invoke(
    {"input": "what is dlt"},
    {"configurable": {"session_id": "unused"}},
)



Exception: Response content b'{"error_code":"MALFORMED_REQUEST","message":"Could not parse request object: Expected Scalar value for String field \'query_text\'\\n at [Source: (ByteArrayInputStream); line: 1, column: 151]\\n at [Source: java.io.ByteArrayInputStream@58867505; line: 1, column: 151]"}', status_code 400

In [27]:
for chunk in chain.stream("What is DLT"):
    print(chunk, end="", flush=True)

Delta Live Tables (DLT) is a managed service for building and deploying data pipelines in Databricks. It provides features such as automatic schema enforcement, data quality monitoring, and auto-scaling to simplify the process of creating and maintaining data pipelines. DLT can be used with Python, and the `dlt` module provides various functions for creating materialized views and streaming tables.

In [11]:
from chain import vs_index

vs_index.describe()

{'name': 'field_demos.ssc_rag_chatbot.databricks_documentation_vs_index',
 'endpoint_name': 'dbdemos_vs_endpoint',
 'primary_key': 'id',
 'index_type': 'DELTA_SYNC',
 'delta_sync_index_spec': {'source_table': 'field_demos.ssc_rag_chatbot.databricks_documentation',
  'embedding_source_columns': [{'name': 'content',
    'embedding_model_endpoint_name': 'databricks-bge-large-en'}],
  'pipeline_type': 'TRIGGERED',
  'pipeline_id': '853da714-2cdd-44a2-bb99-518659f7217c'},
 'status': {'detailed_state': 'ONLINE',
  'message': 'Index is currently online but no updates are found for Delta Live Table: https://e2-dogfood.staging.cloud.databricks.com/explore/data/field_demos/ssc_rag_chatbot/databricks_documentation_vs_index. The pipeline may belong to another workspace. ',
  'indexed_row_count': 17238,
  'ready': True,
  'index_url': 'e2-dogfood.staging.cloud.databricks.com/api/2.0/vector-search/endpoints/dbdemos_vs_endpoint/indexes/field_demos.ssc_rag_chatbot.databricks_documentation_vs_index'},
