<a href="https://colab.research.google.com/github/karjalpp/genai-chatbot/blob/main/Copy_of_contract_copilot_using_streamlit.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import streamlit as st
from streamlit_option_menu import option_menu
import base64
import os
import warnings
from pathlib import Path
import backoff
import pandas as pd
import PyPDF2
import ratelimit
from google.api_core import exceptions
from tqdm import tqdm
from vertexai.preview.language_models import TextGenerationModel
from langchain.embeddings import VertexAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.llms import VertexAI
# from langchain_google_vertexai import VertexAI
from langchain import PromptTemplate, LLMChain
from langchain.chains.question_answering import load_qa_chain
from langchain.memory import ConversationBufferWindowMemory
# from langchain.vectorstores import Chroma
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain.document_loaders import PyPDFLoader
from langchain.document_loaders import PyPDFDirectoryLoader
import pypdf
import os
import tiktoken
import docx2txt
from docx import Document
from dotenv import load_dotenv
from langchain.document_loaders import Docx2txtLoader
from langchain.document_loaders import TextLoader

os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = r'data-engineering-gcp-practice-a400a0302437.json'



warnings.filterwarnings("ignore")

def add_bg_from_local(image_file):
    with open(image_file, "rb") as image_file:
        encoded_string = base64.b64encode(image_file.read())
    st.markdown(
    f"""
    <style>
    .stApp {{
        background-image: url(data:image/{"png"};base64,{encoded_string.decode()});
        background-size: cover
    }}
    </style>
    """,
    unsafe_allow_html=True
    )

def get_option():
    # option = st.sidebar.selectbox("Please select the Use Case",
    #                                     ["<select>", "Centralized Repository", "Upload New Contract", "Contract Generation"],
    #                                     index=0)
    with st.sidebar:
        # option = option_menu("Main Menu", ["Home", "Centralized Repository", "Upload New Contract", "QnA on Multiple Contracts", "Contract Generation"],
        #                     icons=['house', 'list', 'cloud-upload', 'cloud-upload', 'gear'], menu_icon="cast", default_index=0)
        option = option_menu("Main Menu", ["Home", "Summarization","Upload New Contract", "QnA on Multiple Contracts", "Contract Generation", "FAQs"],
                    icons=['house', 'book','cloud-upload', 'list', 'gear', 'patch-question'], menu_icon="cast", default_index=0)
    return option

# def displayPDF(file):
#     with open(file, "rb") as f:
#         base64_pdf = base64.b64encode(f.read()).decode('utf-8')
#     pdf_display = F'<iframe src="data:application/pdf;base64,{base64_pdf}" width="700" height="1000" type="application/pdf"> </iframe>'
#     st.markdown(pdf_display, unsafe_allow_html=True)


def Contract_Generation(email,model_with_limit_and_backoff):
    prompt= f"""
    Your task is to extract key parameters from email doc.\
    These key parameters are following:
    1. Supplier Address
    2. Date
    3. TERM
    4. SOW
    5. Deliverables
    6. Supplier Name
    \"\"\"{email}\"\"\"
    """
    response = model_with_limit_and_backoff(prompt=prompt,max_output_tokens=1024).text
    # generation_model.predict(prompt).text
    # print(response)
    #converting it into dictionary
    output = response.replace('Key Parameters:', '')

    lines = output.split('\n')
    key_value_pairs = []
    for line in lines:
        line = line.strip()
        if line:
            # Split the line at the first occurrence of ':'
            parts = line.split(': ', 1)
            if len(parts) == 2:
                key = parts[0].strip()
                if key[0].isdigit():
                    # Remove the initial number and dot from the key
                    key = key.split('. ', 1)[1]
                value = parts[1].strip()
                key_value_pairs.append((key, value))

    output_dict = {f'[{key.upper()}]': value for key, value in key_value_pairs}

    # print(output_dict)
    file_path = "MSA template.docx"

    # word_dict = {'[SUPPLIER ADDRESS]': '123 Main Street, Anytown, CA 12345', '[COST]': 'Not mentioned', '[DATE]': 'March 8, 2023', '[TERM]': '1 year'}

    document = Document(file_path)

    for paragraph in document.paragraphs:
        for key, value in output_dict.items():
            if key in paragraph.text:
                # st.write(key)
                # st.write(value)
                paragraph.text = paragraph.text.replace(key, value)

    path = f"Repo/{output_dict['[SUPPLIER NAME]'].split()[0]}.docx"
    document.save(path)

    # document.save(f"email/pdf/{output_dict['[SUPPLIER NAME]'].split()[0]}.pdf")
    return document



def main():
    st.set_page_config(page_title='Contract Wizard', layout='centered', initial_sidebar_state='auto')

    option = get_option()
    generation_model = TextGenerationModel.from_pretrained("text-bison@001")

    CALL_LIMIT = 20  # Number of calls to allow within a period
    ONE_MINUTE = 60  # One minute in seconds
    FIVE_MINUTE = 5 * ONE_MINUTE

    # A function to print a message when the function is retrying
    def backoff_hdlr(details):
        print(
            "Backing off {} seconds after {} tries".format(
                details["wait"], details["tries"]
            )
        )


    @backoff.on_exception(  # Retry with exponential backoff strategy when exceptions occur
        backoff.expo,
        (
            exceptions.ResourceExhausted,
            ratelimit.RateLimitException,
        ),  # Exceptions to retry on
        max_time=FIVE_MINUTE,
        on_backoff=backoff_hdlr,  # Function to call when retrying
    )
    @ratelimit.limits(  # Limit the number of calls to the model per minute
        calls=CALL_LIMIT, period=ONE_MINUTE
    )

    def model_with_limit_and_backoff(**kwargs):
        return generation_model.predict(**kwargs)



    if option == "Home":
        # url = 'https://www.linkedin.com/in/aslam-khan-243265158'
        # st.sidebar.markdown(f'''
        # <a href={url}><button style="background-color:GreenYellow;">Mohd Aslam Khan</button></a>
        # ''',
        # unsafe_allow_html=True)
        add_bg_from_local('bg2.png')

    if option == "Summarization":
        st.markdown("## Summarization")
        opt = st.selectbox(
            '',
            ('--SELECT FROM DROPDOWN--','Broadcom', 'Google', 'Ikea','Nextiva')
        )

        if opt == "Broadcom":
            file_path = os.path.join("Repo/summaries", "Broadcom.txt")
            try:
                with open(file_path,"r") as file:
                    content = file.read()
                    st.write(content)
            except Exception as e:
                st.write("Failed to read file:", e)

        elif opt == "Google":
            file_path = os.path.join("Repo/summaries", "Google.txt")
            try:
                with open(file_path,"r", encoding="utf-8") as file:
                    content = file.read()
                    st.write(content)
            except Exception as e:
                st.write("Failed to read file:", e)

        elif opt == "Ikea":
            file_path = os.path.join("Repo/summaries", "ikea.txt")
            try:
                with open(file_path,"r",) as file:
                    content = file.read()
                    st.write(content)
            except Exception as e:
                st.write("Failed to read file:", e)

        elif opt == "Nextiva":
            file_path = os.path.join("Repo/summaries", "Nextiva.txt")
            try:
                with open(file_path,"r") as file:
                    content = file.read()
                    st.write(content)
            except Exception as e:
                st.write("Failed to read file:", e)


    if option == "Upload New Contract":
        st.markdown("## Insights from New Contract")
        pdf_file = st.file_uploader(".", label_visibility="hidden")
        if pdf_file != None:
            # dir = Path.cwd()
            path = f"Repo\{pdf_file.name}"
            # path = os.path.join(dir, path)
            # st.write(path)
            with open(path,"wb") as f:
                f.write(pdf_file.getbuffer())

            # reader = PyPDF2.PdfReader(os.path.join("Repo",pdf_file.name))
            reader = PyPDF2.PdfReader(path)
            pages = reader.pages

            t1, t2, t3 = st.tabs([":point_right:", "Summarize", "QnA"])
            with t1:
                st.write(" ")

            with t2:
                st.header("Summarization of Contract")
                if st.button("Click to Summrize"):
                    initial_prompt_template = """
                    Taking the following context delimited by triple backquotes into consideration:
                    ```{context}```
                    Write a concise summary of the following text delimited by triple backquotes which also covers the key points of the text including Scope of Services, Deliverables if available.

                    ```{text}```
                    CONCISE SUMMARY:
                    """
                    final_prompt_template = """
                        Write a concise summary of the following text delimited by triple backquotes.
                        Return your response in 10 bullet points.

                        ```{text}```
                        BULLET POINT SUMMARY:
                    """
                    initial_summary = []
                    for idx, page in enumerate(tqdm(pages)):
                        text = page.extract_text().strip()
                        if idx == 0:
                            prompt = initial_prompt_template.format(context="", text=text)
                        else:
                            prompt = initial_prompt_template.format(
                                context=initial_summary[idx - 1], text=text
                            )
                        summary = model_with_limit_and_backoff(prompt=prompt, max_output_tokens=1024).text
                        initial_summary.append(summary)
                    def reduce(initial_summary, prompt_template):
                        concat_summary = "\n".join(initial_summary)
                        prompt = prompt_template.format(text=concat_summary)
                        summary = model_with_limit_and_backoff(prompt=prompt, max_output_tokens=1024).text
                        return summary
                    initial_summary = set(initial_summary)
                    summary = reduce(initial_summary, final_prompt_template)
                    st.write(summary)
            with t3:
                st.header("Ask anything from your Contract")
                raw_text = ''
                for i, page in enumerate(tqdm(pages)):
                    content = page.extract_text()
                    if content:
                        raw_text += content

                text_splitter = CharacterTextSplitter(
                separator = "\n",
                chunk_size = 5000,
                chunk_overlap  = 200,
                length_function = len,
                )
                texts = text_splitter.split_text(raw_text)
                embeddings = VertexAIEmbeddings(model_name = "textembedding-gecko@001")
                document_search = FAISS.from_texts(texts, embeddings)
                chain = load_qa_chain(VertexAI(), chain_type="stuff")
                query = str(st.text_input( "", placeholder="What would you like to ask?", value="", max_chars=None, key=str))
                if query:
                    memory = ConversationBufferWindowMemory(k=3)
                    docs = document_search.similarity_search(query)
                    response = chain.run(input_documents=docs, question=query)
                    st.markdown(response)


        else:
            st.write("Upload the Contract document for Insights")

    if option == "QnA on Multiple Contracts":
        st.markdown("## QnA on Multiple Contracts")


        # vectordb = Chroma(persist_directory=perist_dir, embedding=VertexAIEmbeddings(), client_settings = CHROMA_SETTINGS)
        # vectordb = Chroma(persist_directory=persist_dir, embedding_function=VertexAIEmbeddings())

        vectordb = FAISS.load_local("Repo/db", VertexAIEmbeddings(model_name = "textembedding-gecko@003"),allow_dangerous_deserialization=True)
        memory=ConversationBufferMemory(memory_key="chat_history", return_messages=True)
        pdf_qa = ConversationalRetrievalChain.from_llm(llm=VertexAI(temperature=0.0,max_output_tokens=1024,model_name="text-unicorn@001"),
                                                       retriever=vectordb.as_retriever(),
                                                       memory=memory,
                                                       verbose=False)

        yellow = "\033[0;33m"
        green = "\033[0;32m"
        white = "\033[0;39m"

        query = str(st.text_input( "Ask anything from the saved Contracts", placeholder="What would you like to ask?", value="", max_chars=None, key=str))
        if query:
            response = pdf_qa({"question": query})
            st.write(response["answer"])


    if option == "Contract Generation":
        st.markdown("## Contract Generation from Email Chain")
        pdf_file = st.file_uploader(".", label_visibility="hidden")
        if pdf_file:
            dir = Path.cwd()
            path = f"email/{pdf_file.name}"
            path = os.path.join(dir, path)
            # st.write(path)
            with open(path,"wb") as f:
                f.write(pdf_file.getbuffer())

            email = str(docx2txt.process(os.path.join("email",pdf_file.name)))
            document=Contract_Generation(email,model_with_limit_and_backoff)

            for para in document.paragraphs:
                st.write(para.text)
            # directory = '/home/jupyter/email/pdf'
            # for directory, _, files in os.walk(directory):
            #     for file in files:
            #         st.write(file)
            #         if file.endswith('.pdf'):
            #              with open(file, "rb") as f:
            #                     base64_pdf = base64.b64encode(f.read()).decode('utf-8')
            #                     pdf_display = F'<iframe src="data:application/pdf;base64,{base64_pdf}" width="700" height="1000" type="application/pdf"> </iframe>'
            #                     st.markdown(pdf_display, unsafe_allow_html=True)
            # doc = aw.Document(path)
            # options = aw.saving.PdfSaveOptions()
            # options.compliance = aw.saving.PdfCompliance.PDF17
            # doc.save("Repo/Output.pdf", options)
            # displayPDF("Repo/Output.pdf")




            st.write(" The Generated Contract is Saved in the Centralized Repository")
        else:
            st.write("Please upload an Email")


    if option == "FAQs":
        st.markdown("## Frequently Asked Questions")
        faq_data = {
                "List out all the contract/vendor names?": "The contract is between Mediaone and Broadcom, Nextiva and Google.",
                "For Mediaone, what are the payment terms?": "Mediaone shall pay all undisputed invoices net ninety (90) days of receipt of a correct invoice.",
                "What is the value of professional liability?": "The value of professional liability is $10,000.",
                "What is the value of employer employment practices liability insurance?": "The employer employment practices liability insurance is $2,000,000 per occurrence.",
                "The original contract amount for Broadcom is?": "The original contract amount is $15,000.",
                "Where are the companies located?": "The companies are located in the United States, the European Union, and all other applicable jurisdictions.",
                "Will Google and Broadcom charge Mediaone for traveling from their location?": "No, Google and Broadcom will not charge Mediaone for traveling from their location.",
                "Who would manage responsibilities from Mediaone and Google side?": "Google and Mediaone will each assign a person to act as their respective project manager for the purposes of this SOW.",
                "Can supplier subcontract services?": "Yes, the supplier can subcontract some contract services only but only with prior written consent of the customer."
            }

        st.write("")
        for question, answer in faq_data.items():
            st.write("")
            st.markdown("##### {}".format(question))
            st.write(answer)


if __name__ == "__main__":
    main()