In [0]:
%pip install chromadb textstat gradio

[43mNote: you may need to restart the kernel using dbutils.library.restartPython() to use updated packages.[0m
[43mNote: you may need to restart the kernel using dbutils.library.restartPython() to use updated packages.[0m


In [0]:
dbutils.library.restartPython()

In [0]:
import os
import pandas as pd
import mlflow
import chromadb
from langchain.chains import RetrievalQA
from langchain.llms import Databricks
from langchain.chat_models import ChatDatabricks
from langchain.embeddings.databricks import DatabricksEmbeddings
from langchain.vectorstores import Chroma
from langchain_core.documents import Document

In [0]:
pdf_emb_table = spark.read.table("sarbani_dbrx_catalog.india_fe_demo.pdf_demo_tbl").toPandas()
pdf_emb_dict = pdf_emb_table.to_dict("records")
len(pdf_emb_dict)

227

In [0]:
pdf_emb_dict

[{'id': 1,
  'url': 'dbfs:/Volumes/sarbani_dbrx_catalog/india_fe_demo/fe_demo_pdf/India_outlook_2023.pdf',
  'content': 'Market Intelligence & Analytics\nRider in the storm\nTracing India’s growth in a volatile world\nIn fiscal 2024, the Indian economy will grow a tad slower, hemmed in by sluggish exports and the lagged im- pact of rate hikes manifesting fully. Yet, corporate reve- nue will continue to grow in double digits, helped by buoyant domestic demand. Margins are expected to recover from a decadal low.\nMarch 2023\n1\n2\nAnalytical contacts\nCRISIL Economic Research\nDharmakirti Joshi Dipti Deshpande Adhish Verma Pankhuri Tandon Sharvari Rajadhyaksha\nCRISIL MI&A Research\nHetal Gandhi\nPushan Sharma Aniket Dani Surbhi Kaushal Sehul Bhatt Jignesh Surti Someet Soumyapratim Mohit Adnani Vikas Solanki Nitin Prakash Ashish Bankar Vishnu Kumar Aritra Banerjee Heena Fatwani Govind Krishnan Paurin Zaveri Rajan Kumar\nEditorial\nRaj Nambisan Subrat Mohapatra Sowmya Sivakumar Roshan Kum

In [0]:
client = chromadb.Client()
collection = client.create_collection("pdf_collection")

In [0]:
for each_item in pdf_emb_dict:
  collection.add(
    embeddings=[each_item['embedding'].tolist()],
    uris=each_item['url'],
    documents=each_item['content'],
    ids=[str(each_item['id'])],
  )
collection.count()

227

In [0]:
llm = ChatDatabricks(
    endpoint="databricks-dbrx-instruct",
    max_tokens=256
)

# create the embedding function using Databricks Foundation Model APIs
embedding_function = DatabricksEmbeddings(endpoint="databricks-bge-large-en")
docsearch = Chroma(
    client=client,
    collection_name="pdf_collection",
    embedding_function=embedding_function,
)

qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=docsearch.as_retriever(fetch_k=3),
    return_source_documents=True,
)

* 'schema_extra' has been renamed to 'json_schema_extra'


In [0]:
# qa("what is Crisil India mission?")

In [0]:
def respond(message, history):
  if len(message.strip()) == 0:
    return "ERROR the question should not be empty"
  # q = {"inputs": [message]}
  # try:
  response_data=qa(message)["result"]
  # except Exception as error:
  #   response_data = f"ERROR status_code: {type(error).__name__}"
  # # print(response.json())
  return response_data

In [0]:
examples = respond("can you write 3 questions specific to Crisil India", "the document is a business report")

In [0]:
examples.split("\n\n")

['1. How does Crisil India plan to navigate the challenges of tight liquidity and elevated funding costs in the Indian market to achieve its targeted margin improvement from 3.4% to 3.7% over the next 18 to 24 months?',
 "2. With the current pace of branch expansion falling short of the target of 1,500 branches, what is Crisil India's strategy to meet its growth objectives and how does it plan to manage the replacement of eHDFC Limited liabilities with deposits?",
 "3. Given the recent stake sale in Bandhan Bank and the potential sale of Credila, how does Crisil India plan to utilize the gains from these transactions and what impact will it have on the company's financial performance in the upcoming quarters?"]

In [0]:
import gradio as gr
from gradio.themes.utils import sizes

theme = gr.themes.Soft(
    text_size=sizes.text_sm,radius_size=sizes.radius_sm, spacing_size=sizes.spacing_sm,
)
demo = gr.ChatInterface(
    respond,
    chatbot=gr.Chatbot(show_label=False, container=False, show_copy_button=True, bubble_full_width=True),
    textbox=gr.Textbox(placeholder="Ask me a question",
                       container=False, scale=7),
    title="Databricks LLM RAG demo - Chat with DBRX Databricks model serving endpoint",
    description="This chatbot is a demo example for DBRX llm chatbot. <br>This content is provided as a LLM RAG educational example, without support. It is using DBRX, can hallucinate and should not be used as production content.<br>Please review our dbdemos license and terms for more details.",
    #examples=[["Summarize the business report?"],
              #["What is the recent GDP estimates?"]],
    examples= examples.split("\n\n"),
    cache_examples=False,
    theme=theme,
    retry_btn=None,
    undo_btn=None,
    clear_btn="Clear",
)

demo.launch(share=True)

Running on local URL:  http://127.0.0.1:7862
Running on public URL: https://a8967bef9082663c34.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


