<a href="https://colab.research.google.com/github/karjalpp/genai-chatbot/blob/main/contarct_copilot_using__fastapi.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import warnings
from pathlib import Path
import backoff
import PyPDF2
import ratelimit
from google.api_core import exceptions
from tqdm import tqdm
from vertexai.preview.language_models import TextGenerationModel
from langchain.embeddings import VertexAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.llms import VertexAI
from langchain.chains.question_answering import load_qa_chain
from langchain.memory import ConversationBufferWindowMemory
from langchain.vectorstores import Chroma
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory

import docx2txt
from docx import Document

from fastapi import FastAPI
from fastapi import FastAPI, UploadFile, File
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import FileResponse

os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = r'data-engineering-gcp-practice-a400a0302437.json'
app = FastAPI()

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)


vertex_llm_text = VertexAI(model_name="text-bison@001")
vertex_embeddings = VertexAIEmbeddings(model_name="textembedding-gecko@001")

warnings.filterwarnings("ignore")
generation_model = TextGenerationModel.from_pretrained("text-bison@001")

CALL_LIMIT = 20  # Number of calls to allow within a period
ONE_MINUTE = 60  # One minute in seconds
FIVE_MINUTE = 5 * ONE_MINUTE

# A function to print a message when the function is retrying
def backoff_hdlr(details):
    print(
        "Backing off {} seconds after {} tries".format(
            details["wait"], details["tries"]
        )
    )
@backoff.on_exception(  # Retry with exponential backoff strategy when exceptions occur
    backoff.expo,
    (
        exceptions.ResourceExhausted,
        ratelimit.RateLimitException,
    ),  # Exceptions to retry on
    max_time=FIVE_MINUTE,
    on_backoff=backoff_hdlr,  # Function to call when retrying
)
@ratelimit.limits(  # Limit the number of calls to the model per minute
    calls=CALL_LIMIT, period=ONE_MINUTE
)

def model_with_limit_and_backoff(**kwargs):
    return generation_model.predict(**kwargs)



def Contract_Generation(email,model_with_limit_and_backoff):
    prompt= f"""
    Your task is to extract key parameters from email doc.\
    These key parameters are following:
    1. Supplier Address
    2. Date
    3. TERM
    4. SOW
    5. Deliverables
    6. Supplier Name
    \"\"\"{email}\"\"\"
    """
    response = model_with_limit_and_backoff(prompt=prompt,max_output_tokens=1024).text
    # generation_model.predict(prompt).text
    # print(response)
    #converting it into dictionary
    output = response.replace('Key Parameters:', '')
    lines = output.split('\n')
    key_value_pairs = []
    for line in lines:
        line = line.strip()
        if line:
            # Split the line at the first occurrence of ':'
            parts = line.split(': ', 1)
            if len(parts) == 2:
                key = parts[0].strip()
                if key[0].isdigit():
                    # Remove the initial number and dot from the key
                    key = key.split('. ', 1)[1]
                value = parts[1].strip()
                key_value_pairs.append((key, value))

    output_dict = {f'[{key.upper()}]': value for key, value in key_value_pairs}
    # print(output_dict)
    file_path = "MSA template.docx"
    # word_dict = {'[SUPPLIER ADDRESS]': '123 Main Street,
    #  Anytown, CA 12345', '[COST]': 'Not mentioned', '[DATE]': 'March 8, 2023', '[TERM]': '1 year'}
    document = Document(file_path)
    for paragraph in document.paragraphs:
        for key, value in output_dict.items():
            if key in paragraph.text:
                # st.write(key)
                # st.write(value)
                paragraph.text = paragraph.text.replace(key, value)
    # path = f"Repo/{output_dict['[SUPPLIER NAME]'].split()[0]}.docx"
    # document.save(path)
    document.save(f"email/docx/response.docx")
    return document


@app.get("/")
def read_root():
    return {"Hello": "World"}

# @app.post('/genai/api/v1/summarization')
# async def summarization(file: UploadFile = File(...)):
#     file_location = f"Repo/{file.filename}"
#     with open(file_location, "wb+") as file_object:
#         file_object.write(file.file.read())

#     reader = PyPDF2.PdfReader(os.path.join("Repo",file.filename))
#     pages = reader.pages

#     initial_prompt_template = """
#     Taking the following context delimited by triple backquotes into consideration:
#     ```{context}```
#     Write a concise summary of the following text delimited by triple backquotes which also covers the key points of the text including Scope of Services, Deliverables if available.

#     ```{text}```
#     CONCISE SUMMARY:
#     """
#     final_prompt_template = """
#         Write a concise summary of the following text delimited by triple backquotes.
#         Return your response in 10 bullet points.

#         ```{text}```
#         BULLET POINT SUMMARY:
#     """
#     initial_summary = []
#     for idx, page in enumerate(tqdm(pages)):
#         text = page.extract_text().strip()
#         if idx == 0:
#             prompt = initial_prompt_template.format(context="", text=text)
#         else:
#             prompt = initial_prompt_template.format(
#                 context=initial_summary[idx - 1], text=text
#             )
#         summary = model_with_limit_and_backoff(prompt=prompt, max_output_tokens=1024).text
#         initial_summary.append(summary)

#     def reduce(initial_summary, prompt_template):
#         concat_summary = "\n".join(initial_summary)
#         prompt = prompt_template.format(text=concat_summary)
#         summary = model_with_limit_and_backoff(prompt=prompt, max_output_tokens=1024).text
#         return summary

#     initial_summary = set(initial_summary)
#     summary = reduce(initial_summary, final_prompt_template)
#     return {"summary": summary}



@app.post('/genai/api/v1/singQandA')
async def singQandA(input,file: UploadFile = File(...)):
    file_location = f"Repo/{file.filename}"
    with open(file_location, "wb+") as file_object:
        file_object.write(file.file.read())

    reader = PyPDF2.PdfReader(os.path.join("Repo",file.filename))
    pages = reader.pages

    raw_text = ''
    for i, page in enumerate(tqdm(pages)):
        content = page.extract_text()
        if content:
            raw_text += content

    text_splitter = CharacterTextSplitter(
                                            separator = "\n",
                                            chunk_size = 800,
                                            chunk_overlap  = 200,
                                            length_function = len,
                                        )
    texts = text_splitter.split_text(raw_text)
    embeddings = VertexAIEmbeddings()
    document_search = FAISS.from_texts(texts, embeddings)
    chain = load_qa_chain(VertexAI(), chain_type="stuff")
    query = input
    if query:
        memory = ConversationBufferWindowMemory(k=3)
        docs = document_search.similarity_search(query)
        response = chain.run(input_documents=docs, question=query)
        return {"response":response}





@app.post('/genai/api/v1/contgen')
async def contgen(file: UploadFile = File(...)):
    file_location = f"email/{file.filename}"
    with open(file_location, "wb+") as file_object:
        file_object.write(file.file.read())

    email = str(docx2txt.process(os.path.join("email",file.filename)))
    document=Contract_Generation(email,model_with_limit_and_backoff)
    res=""
    for para in document.paragraphs:
        res = res + "\n" + para.text
        # return para.text
    return {'res':res}

@app.get("/genai/api/v1/download-file")
def download_file():
    file_name ='response'
    folder_path = r"email/docx"
    file_location = f'{folder_path}{os.sep}{file_name}.docx'
    return FileResponse(file_location, media_type='application/vnd.openxmlformats-officedocument.wordprocessingml.document', filename=file_name)

@app.post('/genai/api/v1/multipleQandA')
async def multipleQandA(input):
    persist_dir = 'Repo/db'
    vectordb = Chroma(persist_directory=persist_dir, embedding_function=VertexAIEmbeddings())
    memory=ConversationBufferMemory(memory_key="chat_history", return_messages=True)
    pdf_qa = ConversationalRetrievalChain.from_llm(llm=VertexAI(temperature=0.0,max_output_tokens=1024,model_name="text-bison@001"),
                                                    retriever=vectordb.as_retriever(),
                                                    memory=memory,
                                                    verbose=False
                                                   )
    # yellow = "\033[0;33m"
    # green = "\033[0;32m"
    # white = "\033[0;39m"
    query = input
    if query:
        response = pdf_qa({"question": query})
        return {"response":response}


@app.get('/genai/api/v1/list_files')
async def list_files():
    file_location = f"Repo/summaries"
    try:
        files = os.listdir(file_location)
        return {'files':files}
    except Exception as e:
        return {"error" : str(e)}


@app.post('/genai/api/v1/display_summary')
async def display_summary(file_name:str):
    path = f"Repo/summaries/{file_name}.docx"
    document = Document(path)
    res=""
    for para in document.paragraphs:
        res = res + "\n" + para.text
    return {'res':res}





In [1]:
import os
os.chdir('/content/sample_data/docker/docker')

NotADirectoryError: [Errno 20] Not a directory: '/content/sample_data/docker/docker'

In [2]:
import os

# Create a new folder named 'new_folder'
os.makedirs('new_folder', exist_ok=True)


In [3]:
!mkdir -p new_folder


In [4]:
os.chdir('new_folder')



In [6]:
# Write code to a new Python file
with open('docker.py', 'w') as file:
    file.write('''FROM python:3.10.10-slim-bullseye

WORKDIR /

COPY docker/requirements.txt ./

RUN export HNSWLIB_NO_NATIVE=1
RUN apt-get update && apt-get install build-essential -y
RUN pip install -r requirements.txt

COPY . .

ENTRYPOINT ["uvicorn", "test2:app" , "--host" , "0.0.0.0" , "--port" , "8080"]

''')

# Run the newly created Python file
#!python example_script.py


In [7]:
with open('requirements.txt','w') as file:
    file.write('''python-multipart
deep_translator
fastapi
uvicorn
pyyaml
happytransformer
google-api-core==2.11.0
google-auth==2.16.3
google-cloud==0.34.0
google-cloud-core==2.3.2
googleapis-common-protos==1.59.0
scikit-learn
pandas
chromadb
langchain==0.0.205
google-cloud-aiplatform==1.26.0
pypdf2
docx2txt
tqdm
requests
ratelimit
python-docx
faiss-cpu==1.7.4''')

In [8]:
with open('dockerignore','w') as file:
  file.write('''.git
.gitignore
Dockerfile
README.md
node_modules
npm-debug.log
build/
tmp/
.env
''')