<a href="https://colab.research.google.com/github/tanishamg/tanishamg/blob/main/chatbot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

tan

In [2]:
!pip install googletrans==4.0.0-rc1
!pip install gradio
!pip install langchain
!pip install langchain_community
!pip install langchain_huggingface
!pip install pymupdf
!pip install chromadb
!pip install --upgrade googletrans httpcore httpx

Collecting googletrans==4.0.0-rc1
  Downloading googletrans-4.0.0rc1.tar.gz (20 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting httpx==0.13.3 (from googletrans==4.0.0-rc1)
  Downloading httpx-0.13.3-py3-none-any.whl.metadata (25 kB)
Collecting hstspreload (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading hstspreload-2024.8.1-py3-none-any.whl.metadata (2.1 kB)
Collecting chardet==3.* (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading chardet-3.0.4-py2.py3-none-any.whl.metadata (3.2 kB)
Collecting idna==2.* (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading idna-2.10-py2.py3-none-any.whl.metadata (9.1 kB)
Collecting rfc3986<2,>=1.3 (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading rfc3986-1.5.0-py2.py3-none-any.whl.metadata (6.5 kB)
Collecting httpcore==0.9.* (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading httpcore-0.9.1-py3-none-any.whl.metadata (4.6 kB)
Collecting h11<0.10,>=0.8 (from httpcore==0.9.*->httpx==0.13.3->googl

In [3]:
import gradio as gr
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.llms import HuggingFaceHub
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain_community.document_loaders import PyMuPDFLoader
from googletrans import Translator
import os

# Set up your Hugging Face API token
os.environ["HUGGINGFACEHUB_API_TOKEN"] = "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"

# Initialize the translator and search history
translator = Translator()
search_history = []

# Global variables
chain = None
last_query = None
stop_generation = False  # Flag to control generation

# Function to load and process the PDF document
def load_doc(pdf_doc):
    try:
        loader = PyMuPDFLoader(pdf_doc.name)
        documents = loader.load()
        embedding = HuggingFaceEmbeddings()
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
        text = text_splitter.split_documents(documents)
        db = Chroma.from_documents(text, embedding)
        llm = HuggingFaceHub(repo_id="OpenAssistant/oasst-sft-1-pythia-12b", model_kwargs={"temperature": 0.5, "max_length": 150})
        global chain
        chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=db.as_retriever())
        return 'Document has successfully been loaded'
    except Exception as e:
        return f"Error loading document: {str(e)}"

# Function to clean the responses
def clean_response(response):
    if "Use the following pieces of context" in response:
        response = response.split("Use the following pieces of context")[-1]
    if "Question:" in response:
        response = response.split("Question:")[-1]
    if "Helpful Answer:" in response:
        response = response.split("Helpful Answer:")[-1]

    return response.strip()

# Function to answer queries using the loaded PDF document
def answer_query(query):
    global stop_generation
    try:
        if stop_generation:
            stop_generation = False
            return "Response generation stopped.", format_history(search_history)

        global last_query
        last_query = query  # Store the query
        search_history.append(query)  # Add query to history
        raw_response = chain.run(query)
        clean_resp = clean_response(raw_response)
        history_str = format_history(search_history)  # Format search history
        return clean_resp, history_str  # Return both response and formatted history
    except Exception as e:
        history_str = format_history(search_history)
        return f"Error processing query: {str(e)}", history_str

# Function to regenerate the response for the last query
def regenerate_response():
    if last_query:
        return answer_query(last_query)[0]
    else:
        return "No previous query to regenerate."

# Function to clear the contents of the input box
def clear_input():
    return "", gr.update(value=None)  # Clear input box

# Function to translate the response to a selected language
def translate_response(response, target_language):
    try:
        translated = translator.translate(response, dest=target_language)
        return translated.text
    except Exception as e:
        return f"Error translating response: {str(e)}"

# Function to format the search history
def format_history(history):
    formatted_history = ""
    for i, query in enumerate(history):
        formatted_history += f"- [{query}](#) \n"  # Create clickable links
    return formatted_history

# Function to handle feedback
def handle_feedback(feedback):
    if feedback == "👍":
        return "Thank you for the feedback!"
    elif feedback == "👎":
        return "Sorry to hear that. We'll strive to improve."
    return ""

# Function to stop generation
def stop_generation_func():
    global stop_generation
    stop_generation = True
    return "Response generation stopped."

# Function to handle clicking on a history item
def handle_history_click(history_index):
    query = search_history[history_index]  # Get the query based on the index
    return answer_query(query)[0]  # Re-run the query and return the output

# HTML and CSS for the Gradio interface
html = """
<div style="text-align:center; max-width: 700px;">
    <h1>ChatPDF</h1>
    <p>Upload a PDF File, click on Load PDF, and start chatting with your document.</p>
</div>"""
css = """
.container {
    max-width: 700px;
    margin: 10px auto;
    padding: 10px;
    background-color: #ffffff;  /* White background for the container */
    border-radius: 8px;
    box-shadow: 0px 4px 8px rgba(0, 0, 0, 0.1);
}

.gradio-container {
    font-family: 'Arial', sans-serif;
}

.gr-button {
    background-color: #f0f0f0;  /* Light gray for the button background */
    color: #000000;  /* Black text color */
    border-radius: 6px;
    border: 1px solid #ccc;
    padding: 6px 10px;
    font-size: 12px;
    font-weight: bold;
    margin: 2px;
}

.gr-button:hover {
    background-color: #e0e0e0;  /* Slightly darker gray for hover state */
}

.gr-textbox, .gr-dropdown, .gr-markdown {
    border-radius: 6px;
    border: 1px solid #ccc;
    padding: 6px;
    font-size: 14px;
    background-color: #ffffff;  /* White background for textboxes */
}
"""

# Gradio interface setup
with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
    gr.HTML(html)
    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown("### Search History", elem_classes="history-title")
            history_display = gr.Markdown(label="History")  # Display history
        with gr.Column(scale=3):
            gr.Markdown("## ChatPDF")
            pdf_doc = gr.File(label="Load a PDF", file_types=['.pdf'], type='filepath')
            with gr.Row():
                load_pdf = gr.Button("Load PDF File", elem_classes="gr-button")
                status = gr.Textbox(label="Status", placeholder='', interactive=False, elem_classes="gr-textbox")
            with gr.Row():
                input_box = gr.Textbox(label="Type your question here", placeholder="Enter your query...", elem_classes="gr-textbox")
                submit_query = gr.Button("Submit", elem_classes="gr-button")  # Submit button next to input box
            with gr.Row():
                regenerate_button = gr.Button("Regenerate", elem_classes="gr-button")  # Regenerate button
                clear_button = gr.Button("Clear Input", elem_classes="gr-button")  # Clear button
                stop_button = gr.Button("Stop Generation", elem_classes="gr-button")  # Stop Generation button
            with gr.Row():
                output_box = gr.Textbox(label="Output", placeholder="The response will appear here.", elem_classes="gr-textbox")
            with gr.Row():
                language_selector = gr.Dropdown(choices=["es", "fr", "de", "zh-cn", "ja", "hi"], label="Select Language", value="es")
                translate_button = gr.Button("Translate", elem_classes="gr-button")  # Translate button
            with gr.Row():
                feedback_buttons = gr.Radio(["👍", "👎"], label="Rate the Response", elem_classes="gr-button")  # Feedback buttons
                feedback_output = gr.Textbox(label="Feedback", elem_classes="gr-textbox")

            # Bind the functions to Gradio components
            load_pdf.click(load_doc, inputs=pdf_doc, outputs=status)
            submit_query.click(answer_query, inputs=input_box, outputs=[output_box, history_display])  # Update history
            regenerate_button.click(regenerate_response, inputs=None, outputs=output_box)  # Regenerate response
            clear_button.click(clear_input, inputs=None, outputs=[input_box])  # Clear input box only
            stop_button.click(stop_generation_func, inputs=None, outputs=status)  # Stop generation
            translate_button.click(translate_response, inputs=[output_box, language_selector], outputs=output_box)  # Translate output
            feedback_buttons.change(handle_feedback, inputs=feedback_buttons, outputs=feedback_output)  # Feedback handling

            # Click handling for history items
            def rerun_query(history):
                for i in range(len(search_history)):
                    history_display.change(lambda x=i: handle_history_click(x), inputs=None, outputs=output_box)

            history_display.change(rerun_query, inputs=history_display, outputs=history_display)  # Rerun the query when clicked in history

# Launch the Gradio interface
demo.launch()


Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://0f777ae752b68602dc.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




my_ver

In [4]:
import gradio as gr
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.llms import HuggingFaceHub
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain_community.document_loaders import PyMuPDFLoader
from googletrans import Translator
import os

# Set up your Hugging Face API token
os.environ["HUGGINGFACEHUB_API_TOKEN"] = "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"

# Initialize the translator and search history
translator = Translator()
search_history = []

# Global variable to store the last query
last_query = None

# Function to load and process the PDF document
def load_doc(pdf_doc):
    try:
        loader = PyMuPDFLoader(pdf_doc.name)
        documents = loader.load()
        embedding = HuggingFaceEmbeddings()
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
        text = text_splitter.split_documents(documents)
        db = Chroma.from_documents(text, embedding)
        llm = HuggingFaceHub(repo_id="OpenAssistant/oasst-sft-1-pythia-12b", model_kwargs={"temperature": 0.5, "max_length": 150})
        global chain
        chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=db.as_retriever())
        return 'Document has successfully been loaded'
    except Exception as e:
        return f"Error loading document: {str(e)}"

# Function to clean the responses
def clean_response(response):
    if "Use the following pieces of context" in response:
        response = response.split("Use the following pieces of context")[-1]
    if "Question:" in response:
        response = response.split("Question:")[-1]
    if "Helpful Answer:" in response:
        response = response.split("Helpful Answer:")[-1]

    return response.strip()

# Function to answer queries using the loaded PDF document
def answer_query(query):
    try:
        global last_query
        last_query = query  # Store the query
        search_history.append(query)  # Add query to history
        raw_response = chain.run(query)
        clean_resp = clean_response(raw_response)
        history_str = format_history(search_history)  # Format search history as bullet points with links
        return clean_resp, history_str  # Return both response and formatted history
    except Exception as e:
        history_str = format_history(search_history)
        return f"Error processing query: {str(e)}", history_str

# Function to regenerate the response for the last query
def regenerate_response():
    if last_query:
        return answer_query(last_query)[0]
    else:
        return "No previous query to regenerate."

# Function to clear the contents of both input and output boxes
def clear_input():
    return "", ""  # Return empty strings to clear both input and output boxes

# Function to translate the response to a selected language
def translate_response(response, target_language):
    try:
        translated = translator.translate(response, dest=target_language)
        return translated.text
    except Exception as e:
        return f"Error translating response: {str(e)}"

# Function to format the search history as bullet points with links
def format_history(history):
    formatted_history = ""
    for i, query in enumerate(history):
        formatted_history += f"- [{query}](#) \n"  # Create a clickable link (no actual link to follow)
    return formatted_history

# Function to handle clicking on a history item (optional implementation)
def handle_history_click(query):
    return answer_query(query)[0]  # Rerun the query when clicked

# HTML and CSS for the Gradio interface
html = """
<div style="text-align:center; max-width: 700px;">
    <h1>ChatPDF</h1>
    <p>Upload a PDF File, then click on Load PDF File.<br>
    Once the document has been loaded, you can begin chatting with the PDF.</p>
</div>"""
css = """container { max-width: 700px; margin-left: auto; margin-right: auto; padding: 20px; }"""

# Gradio interface setup
with gr.Blocks(css=css, theme=gr.themes.Monochrome()) as demo:
    gr.HTML(html)
    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown("## Search History")
            history_display = gr.Markdown(label="History")  # Display history
        with gr.Column(scale=3):
            gr.Markdown('ChatPDF')
            pdf_doc = gr.File(label="Load a PDF", file_types=['.pdf'], type='filepath')
            with gr.Row():
                load_pdf = gr.Button("Load PDF File")
                status = gr.Textbox(label="Status", placeholder='', interactive=False)
            with gr.Row():
                input_box = gr.Textbox(label="Type in your question")
                submit_query = gr.Button("Submit")  # Move Submit button next to input box
            with gr.Row():
                regenerate_button = gr.Button("Regenerate Response")  # Add Regenerate button
                clear_button = gr.Button("Clear")  # Add Clear button
            with gr.Row():
                output_box = gr.Textbox(label="Output")
            with gr.Row():
                language_selector = gr.Dropdown(choices=["es", "fr", "de", "zh-cn", "ja", "hi"], label="Select Language", value="es")
                translate_button = gr.Button("Translate")  # Add Translate button

            # Bind the functions to Gradio components
            load_pdf.click(load_doc, inputs=pdf_doc, outputs=status)
            submit_query.click(answer_query, inputs=input_box, outputs=[output_box, history_display])  # Update history
            regenerate_button.click(regenerate_response, inputs=None, outputs=output_box)  # Regenerate response
            clear_button.click(clear_input, inputs=None, outputs=[input_box, output_box])  # Clear input and output boxes
            translate_button.click(translate_response, inputs=[output_box, language_selector], outputs=output_box)  # Translate output

# Launch the Gradio interface
demo.launch()


Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://a91977d7d7c7c84856.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




In [12]:
import gradio as gr
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.llms import HuggingFaceHub
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain_community.document_loaders import PyMuPDFLoader
from googletrans import Translator
import os

# Set up your Hugging Face API token
os.environ["HUGGINGFACEHUB_API_TOKEN"] = "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"

# Initialize the translator and search history
translator = Translator()
search_history = []
last_query = None
stop_generation = False  # Flag to control generation
chain = None

# Function to load and process the PDF document
def load_doc(pdf_doc):
    try:
        loader = PyMuPDFLoader(pdf_doc.name)
        documents = loader.load()
        embedding = HuggingFaceEmbeddings()
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
        text = text_splitter.split_documents(documents)
        db = Chroma.from_documents(text, embedding)
        llm = HuggingFaceHub(repo_id="OpenAssistant/oasst-sft-1-pythia-12b", model_kwargs={"temperature": 0.5, "max_length": 150})
        global chain
        chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=db.as_retriever())
        return 'Document has successfully been loaded'
    except Exception as e:
        return f"Error loading document: {str(e)}"

# Function to clean the responses
def clean_response(response):
    if "Use the following pieces of context" in response:
        response = response.split("Use the following pieces of context")[-1]
    if "Question:" in response:
        response = response.split("Question:")[-1]
    if "Helpful Answer:" in response:
        response = response.split("Helpful Answer:")[-1]

    return response.strip()

# Function to answer queries using the loaded PDF document
def answer_query(query):
    global stop_generation, last_query
    try:
        if stop_generation:
            stop_generation = False
            return "Response generation stopped.", format_history(search_history)

        last_query = query  # Store the query
        search_history.append(query)  # Add query to history
        raw_response = chain.run(query)
        clean_resp = clean_response(raw_response)
        history_str = format_history(search_history)  # Format search history
        return clean_resp, history_str  # Return both response and formatted history
    except Exception as e:
        history_str = format_history(search_history)
        return f"Error processing query: {str(e)}", history_str

# Function to regenerate the response for the last query
def regenerate_response():
    if last_query:
        return answer_query(last_query)[0]
    else:
        return "No previous query to regenerate."

# Function to clear the contents of both input and output boxes
def clear_input():
    return "", ""  # Return empty strings to clear both input and output boxes

# Function to translate the response to a selected language
def translate_response(response, target_language):
    try:
        translated = translator.translate(response, dest=target_language)
        return translated.text
    except Exception as e:
        return f"Error translating response: {str(e)}"

# Function to format the search history as bullet points with links
def format_history(history):
    formatted_history = ""
    for i, query in enumerate(history):
        formatted_history += f"- [{query}](#) \n"  # Create a clickable link (no actual link to follow)
    return formatted_history

# Function to handle feedback
def handle_feedback(feedback):
    if feedback == "👍":
        return "Thank you for the feedback!"
    elif feedback == "👎":
        return "Sorry to hear that. We'll strive to improve."
    return ""

# Function to stop generation
def stop_generation_func():
    global stop_generation
    stop_generation = True
    return "Response generation stopped."

# Function to handle clicking on a history item
def handle_history_click(history_index):
    query = search_history[history_index]  # Get the query based on the index
    return answer_query(query)[0]  # Re-run the query and return the output

# HTML and CSS for the Gradio interface
html = """
<div style="text-align:center; max-width: 700px;">
    <h1>ChatPDF</h1>
    <p>Upload a PDF File, then click on Load PDF File.<br>
    Once the document has been loaded, you can begin chatting with the PDF.</p>
</div>"""
css = """container { max-width: 700px; margin-left: auto; margin-right: auto; padding: 20px; }"""

# Gradio interface setup
with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
    gr.HTML(html)
    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown("### Search History", elem_classes="history-title")
            history_display = gr.Markdown(label="History")  # Display history
        with gr.Column(scale=3):
            gr.Markdown("## ChatPDF")
            pdf_doc = gr.File(label="Load a PDF", file_types=['.pdf'], type='filepath')
            with gr.Row():
                load_pdf = gr.Button("Load PDF File", elem_classes="gr-button")
                status = gr.Textbox(label="Status", placeholder='', interactive=False, elem_classes="gr-textbox")
            with gr.Row():
                input_box = gr.Textbox(label="Type your question here", placeholder="Enter your query...", elem_classes="gr-textbox")
                submit_query = gr.Button("Submit", elem_classes="gr-button")  # Submit button next to input box
            with gr.Row():
                regenerate_button = gr.Button("Regenerate", elem_classes="gr-button")  # Regenerate button
                clear_button = gr.Button("Clear Input", elem_classes="gr-button")  # Clear button
                stop_button = gr.Button("Stop Generation", elem_classes="gr-button")  # Stop Generation button
            with gr.Row():
                output_box = gr.Textbox(label="Output", placeholder="The response will appear here.", elem_classes="gr-textbox")
            with gr.Row():
                language_selector = gr.Dropdown(choices=["es", "fr", "de", "zh-cn", "ja", "hi"], label="Select Language", value="es")
                translate_button = gr.Button("Translate", elem_classes="gr-button")  # Translate button
            with gr.Row():
                feedback_buttons = gr.Radio(["👍", "👎"], label="Rate the Response", elem_classes="gr-button")  # Feedback buttons
                feedback_output = gr.Textbox(label="Feedback", elem_classes="gr-textbox")

            # Bind the functions to Gradio components
            load_pdf.click(load_doc, inputs=pdf_doc, outputs=status)
            submit_query.click(answer_query, inputs=input_box, outputs=[output_box, history_display])  # Update history
            regenerate_button.click(regenerate_response, inputs=None, outputs=output_box)  # Regenerate response
            clear_button.click(clear_input, inputs=None, outputs=[input_box, output_box])  # Clear input and output boxes
            stop_button.click(stop_generation_func, inputs=None, outputs=status)  # Stop generation
            translate_button.click(translate_response, inputs=[output_box, language_selector], outputs=output_box)  # Translate output
            feedback_buttons.change(handle_feedback, inputs=feedback_buttons, outputs=feedback_output)  # Feedback handling

# Launch the Gradio interface
demo.launch()


Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://f141da524ab5e3fe1b.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




trails