In [None]:
%pip install -U chromadb==0.3.22 langchain==0.0.164 transformers==4.29.0 accelerate==0.19.0 requests==2.28.2 pysqlite3-binary bs4 nvidia-cudnn-cu11

In [None]:
pip install openai==0.28.1

In [None]:
import pandas as pd
from typing import Iterator
from transformers import pipeline
import torch
import pyspark.sql.functions as F
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
import os
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, AutoModelForSeq2SeqLM
from langchain.memory import ConversationSummaryBufferMemory
from langchain import PromptTemplate
from langchain.llms import HuggingFacePipeline
from langchain.chains.question_answering import load_qa_chain
from dotenv import load_dotenv
from langchain.llms import AzureOpenAI
import openai
from langchain.prompts.chat import (
    ChatPromptTemplate,
    MessagesPlaceholder,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
)
from langchain.memory import ConversationBufferMemory
from langchain import LLMChain
from dotenv import load_dotenv



In [None]:
from dotenv import load_dotenv
load_dotenv(dotenv_path)
openai.api_key = os.getenv('OPENAI_API_KEY')
model_name = os.getenv('MODEL_NAME')
openai.api_type = os.getenv('OPENAI_API_TYPE')
openai.api_base = os.getenv('OPENAI_API_BASE')
openai.api_version = os.getenv('OPENAI_API_VERSION')
serp_api = os.getenv('SERPAPI_API_KEY')

In [None]:
data = sqlContext.sql("SELECT * FROM table").toPandas()

# Concatenate values in each row and create a new column 'concatenated'
data['text'] = data.apply(lambda row: ', '.join(f'{col} is {value}' for col, value in zip(data.columns, row)), axis=1)
a = pd.DataFrame(data['text'])

spark_df = spark.createDataFrame(a)
spark_df.write.mode("overwrite").saveAsTable("table")

In [None]:
docs_df = sqlContext.sql("SELECT * FROM table")
@F.pandas_udf("string")
def summarize(iterator: Iterator[pd.Series]) -> Iterator[pd.Series]:
    # Load the model for summarization
    torch.cuda.empty_cache()
    summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", device_map="auto")
    def summarize_txt(text):
      if len(text) > 5000:
        return summarizer(text)[0]['summary_text']
      return text
 
    for serie in iterator:
        # get a summary for each row
        yield serie.apply(summarize_txt)
 
# We won't run it as this can take some time in the entire dataset. In this demo we set repartition to 1 as we just have 1 GPU by default.
docs_df = docs_df.repartition(1).withColumn("text_short", summarize("text"))
docs_df.write.mode("overwrite").option("mergeSchema", "true").saveAsTable(f"flight_training_dataset")
display(spark.table("flight_training_dataset"))

In [None]:
# Prepare a directory to store the document database. Any path on `/dbfs` will do.
dbutils.widgets.dropdown("reset_vector_database", "false", ["false", "true"], "Recompute embeddings for chromadb")
flight_vector_db_path = "/dbdemos/product/llm/vector_db_flight_test"
 
# Don't recompute the embeddings if the're already available
compute_embeddings = dbutils.widgets.get("reset_vector_database") == "true" #or is_folder_empty(flight_vector_db_path)
 
if compute_embeddings:
  print(f"creating folder {flight_vector_db_path} under our blob storage (dbfs)")
  dbutils.fs.rm(flight_vector_db_path, True)
  dbutils.fs.mkdirs(flight_vector_db_path)

In [None]:
flight_vector_db_path = "/dbfs/dbdemos/product/llm/vector_db_flight_test"
hf_embed = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
chroma_db = Chroma(collection_name="flight_docs", embedding_function=hf_embed, persist_directory=flight_vector_db_path)

In [None]:
def build_qa_chain():
    torch.cuda.empty_cache()

    # Defining our prompt content.
    # langchain will load our similar documents as {context}
    # Update the template to handle two human inputs
    template =  """
    Do not generate user responses on your own and do not repeating same questions.

    You are a helpful flight ticket assistant. Your only task is to help user to understand their flight ticket purchased information. 
    The flight ticket purchased information are: departure date, flight number, ancillary purchases and flight-destination.
    To get the flight ticket purchased information, you need to collect information in the conversation such as PNR and email address. 
    Collect all of the information one by one. 
    After collecting all the information, make sure you display the details to the user at the end in this format:
    PNR: 
    Email Address:

    After user confirm the information is correct.
    Ask user what flight ticket purchased information that user want to know. 
    {context}
    Based on the context provide relevant information to user.
    For the flight ticket purchased information that do not covered. 
    Respond with just 'sorry, we dont have the information'.
     
    {chat_history}
 
    {human_input}
    """

    # Increase max_new_tokens for a longer response
    # Other settings might give better results! Play around
    prompt = PromptTemplate(input_variables=['context', 'human_input', 'chat_history'], template=template)
    # model_name = "databricks/dolly-v2-3b" # can use dolly-v2-3b, dolly-v2-7b or dolly-v2-12b for smaller model and faster inferences.

    # instruct_pipeline = pipeline(model=model_name, torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto", 
    #                            return_full_text=True, max_new_tokens=256, top_p=0.95, top_k=50)

    # hf_pipe = HuggingFacePipeline(pipeline=instruct_pipeline)
    llm = AzureOpenAI(model_name=model_name,engine=model_name)
    # Models we'll use to summarize our chat history
    # We could use one of these models: https://huggingface.co/models?filter=summarization. facebook/bart-large-cnn gives great results, we'll use t5-small for memory
    summarize_model = AutoModelForSeq2SeqLM.from_pretrained("t5-small", device_map="auto", torch_dtype=torch.bfloat16, trust_remote_code=True)
    summarize_tokenizer = AutoTokenizer.from_pretrained("t5-small", padding_side="left", model_max_length = 512)
    pipe_summary = pipeline("summarization", model=summarize_model, tokenizer=summarize_tokenizer)
    hf_summary = HuggingFacePipeline(pipeline=pipe_summary)
    memory = ConversationSummaryBufferMemory(llm=hf_summary, memory_key="chat_history", input_key="human_input", max_token_limit=500, human_prefix = "", ai_prefix = "")

    # Set verbose=True to see the full prompt:
    print("loading chain, this can take some time...")
    return load_qa_chain(llm=llm, chain_type="stuff", prompt=prompt, verbose=True, memory=memory)

In [None]:
class ChatBot():
  def __init__(self, db):
    self.reset_context()
    self.db = db
 
  def reset_context(self):
    self.sources = []
    self.discussion = []
    # Building the chain will load Dolly and can take some time depending on the model size and your GPU
    self.qa_chain = build_qa_chain()
    displayHTML("<h1>Hi! I'm a chat bot specialized in flight info. How Can I help you today?</h1>")
 
  def get_similar_docs(self, question, similar_doc_count):
    return self.db.similarity_search(question, k=similar_doc_count)
 
  def chat(self, question):
    # Keep the last 3 discussion to search similar content
    self.discussion.append(question)
    similar_docs = self.get_similar_docs(" \n".join(self.discussion[-3:]), similar_doc_count=1)
 
    result = self.qa_chain({"input_documents": similar_docs, "human_input": question})

    # Cleanup the answer for better display:
    answer = result['output_text'].capitalize()
    result_html = f"<p><blockquote style=\"font-size:24\">{question}</blockquote></p>"
    result_html += f"<p><blockquote style=\"font-size:18px\">{answer}</blockquote></p>"
    result_html += "<p><hr/></p>"
    displayHTML(result_html)
 
chat_bot = ChatBot(chroma_db)

loading chain, this can take some time...


In [None]:
chat_bot.chat("Hi")