#Import the required libraries




In [1]:
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import VectorDBQA, RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import TextLoader, PyPDFLoader
import openai
import os



# Loading Documents


In [2]:
loader = PyPDFLoader('https://www.nestle.com/sites/default/files/asset-library/documents/jobs/the_nestle_hr_policy_pdf_2012.pdf')
documents = loader.load()

 # Creating Vector Representation of Texts

In [3]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
embeddings = OpenAIEmbeddings()
vectordb = Chroma.from_documents(texts, embeddings)



Using embedded DuckDB without persistence: data will be transient


# Setting Up Question-Answering System

In [4]:
qa = RetrievalQA.from_chain_type(llm=ChatOpenAI(model_name="gpt-3.5-turbo"), chain_type="stuff", retriever=vectordb.as_retriever())

# Defining Prompt Template

In [5]:
from langchain import PromptTemplate

# Define the prompt template in English
template = """
I am a HR helpful assistant. Please answer the following question in English.
Question: {question}
Answer:
"""

# Create the PromptTemplate instance with the modified English template
prompt = PromptTemplate(
    input_variables=["question"],
    template=template,
)


#  Building Chat Interface with Gradio and Launching the Chat Interface

In [6]:
import gradio as gr

def add_text(history, text):
    history = history + [(text, None)]
    return history, ""

def bot(history):
    query = history[-1][0]
    query = prompt.format(question=query)
    answer = qa.run(query)
    source = qa._get_docs(query)[0]
    source_sentence = source.page_content
    answer_source = source_sentence +"\n"+"source:"+source.metadata["source"] + ", page:" + str(source.metadata["page"])
    history[-1][1] = answer # + answer_source
    return history

with gr.Blocks() as demo:
    chatbot = gr.Chatbot([], elem_id="chatbot").style(height=400)

    with gr.Row():
        with gr.Column(scale=0.6):
            txt = gr.Textbox(
                show_label=False,
                placeholder="Enter text and press enter",
            ).style(container=False)

    txt.submit(add_text, [chatbot, txt], [chatbot, txt]).then(
        bot, chatbot, chatbot
    )

demo.launch(share=True)

Running on local URL:  http://127.0.0.1:7860
Running on public URL: https://db1924366e9519add0.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades (NEW!), check out Spaces: https://huggingface.co/spaces


