In [None]:
#install the required libraries/dependencies
!pip install langchain_community pypdf langchain transformers sentence-transformers chromadb gradio accelerate fpdf

In [None]:
#Importing the required libraries/dependencies
import accelerate
import gradio as gr
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
import torch
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
from fpdf import FPDF

In [None]:
# Initialize the language model
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model_name = "mistralai/Mistral-7B-Instruct-v0.2"
access_token = "Your_hugging_face_access_token"

model = AutoModelForCausalLM.from_pretrained(model_name, token=access_token, trust_remote_code=True, torch_dtype=torch.bfloat16, low_cpu_mem_usage=True, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(model_name, device=device, token=access_token)
text_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer, do_sample=True, top_k=3, temperature=0.1, max_new_tokens=1000, num_return_sequences=1)

torch.backends.cuda.enable_mem_efficient_sdp(False)
torch.backends.cuda.enable_flash_sdp(False)

db=None


In [None]:
generated_sop_text = None  # Global variable to store generated SOP text

# Function to load and process the uploaded PDF
def process_pdf(pdf_file):
    # Load the PDF document
    loader = PyPDFLoader(pdf_file.name)
    documents = loader.load_and_split()

    # Split the documents into manageable chunks
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=128)
    texts = text_splitter.split_documents(documents)

    # Create embeddings for the text chunks
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    db = Chroma.from_documents(texts, embeddings, persist_directory="db2")
    return db

# Function to generate SOP based on the PDF data
def generate_sop_from_pdf(db):
    # Retrieve all documents from the database
    all_texts = db._collection.get()['documents']
    combined_text = " ".join([doc for doc in all_texts])

    # Define the prompt with the specific structure and combined content
    prompt = f"""You are an expert in creating Standard Operating Procedures (SOP). Based on the following context, generate a detailed and structured SOP:

    Context:
    {combined_text}

    Ensure that the SOP is specific to the information provided in the context. Include relevant details and steps as outlined in the PDF content. The SOP should cover the following sections:

    1. Purpose: [State the purpose of this SOP]
    2. Scope: [Describe the scope of the SOP]
    3. Responsibilities: [List the responsibilities of individuals involved]
    4. Procedure:
       a. Step 1: [Detail the first step]
       b. Step 2: [Detail the second step]
       c. Step 3: [Detail the third step]
       d. Step 4: [Detail the fourth step]
       e. Step 5: [Detail the fifth step]

    Ensure the SOP is clear, concise, and easy to follow."""

    # Generate text using the prompt
    generated_text = text_pipeline(prompt)[0]['generated_text']

    # Separate the generated SOP from the prompt
    prompt_lines = prompt.split("\n")
    sop_lines = generated_text.split("\n")

    # Find the index where the SOP content starts
    start_index = len(prompt_lines)  # Start just after the prompt ends

    # Join SOP lines into a single string, excluding the initial context
    sop_text = "\n".join(sop_lines[start_index:])

    return sop_text.strip()  # Strip any leading/trailing whitespace

# Function to generate SOP based on a specific question
def generate_sop_for_question(question, db):
    # Retrieve relevant chunks based on the question
    relevant_texts = db.similarity_search(question, k=5)  # Retrieve more chunks for better context
    combined_text = " ".join([text.page_content for text in relevant_texts])

    # Define the prompt with the specific structure
    prompt = f"""You are an expert in creating Standard Operating Procedures (SOP). Based on the following context, generate a detailed and structured SOP for the question '{question}':

    {combined_text}

    Ensure that the SOP is specific to the information provided in the context. Include relevant details and steps as outlined in the PDF content. The SOP should cover the following sections:

    1. Purpose: [State the purpose of this SOP]
    2. Scope: [Describe the scope of the SOP]
    3. Responsibilities: [List the responsibilities of individuals involved]
    4. Procedure:
       a. Step 1: [Detail the first step]
       b. Step 2: [Detail the second step]
       c. Step 3: [Detail the third step]
       d. Step 4: [Detail the fourth step]
       e. Step 5: [Detail the fifth step]

    Ensure the SOP is clear, concise, and easy to follow."""

    # Generate text using the prompt
    generated_text = text_pipeline(prompt)[0]['generated_text']

    # Separate the generated SOP from the prompt
    prompt_lines = prompt.split("\n")
    sop_lines = generated_text.split("\n")

    # Find the index where the SOP content starts
    start_index = len(prompt_lines)  # Start just after the prompt ends

    # Join SOP lines into a single string, excluding the initial context
    sop_text = "\n".join(sop_lines[start_index:])

    return sop_text.strip()  # Strip any leading/trailing whitespace

# Function to save SOP as PDF
def save_sop_as_pdf(sop_text, file_path="generated_sop.pdf"):
    pdf = FPDF()
    pdf.add_page()
    pdf.set_font("Arial", size=12)

    for line in sop_text.split("\n"):
        pdf.multi_cell(0, 10, txt=line, align='L')

    pdf.output(file_path)
    return file_path



In [None]:
# Gradio interface
def chat(chat_history, user_input, db):
    if db is None:
        return chat_history + [("Please upload a PDF file first.", "")]
    answer = generate_sop_for_question(user_input, db)
    chat_history.append((user_input, answer))
    return chat_history

with gr.Blocks(css=".gradio-container {background-color: lightblue}") as demo:
    gr.Markdown('# AutoSOP')

    with gr.Tab("Upload PDF and Generate SOP"):
        pdf_upload = gr.File(label="Upload your PDF file")
        upload_status = gr.Markdown()
        generate_button = gr.Button("Generate SOP", visible=False)
        save_button = gr.Button("Save SOP as PDF", visible=False)
        sop_output = gr.Markdown()
        download_output = gr.File(visible=False)  # Gradio file component for downloading the PDF

        def on_pdf_upload(pdf):
            global db
            db = process_pdf(pdf)
            return "PDF uploaded and processed successfully.", gr.update(visible=True)

        pdf_upload.upload(on_pdf_upload, pdf_upload, [upload_status, generate_button])

        def on_generate_sop():
            global generated_sop_text  # Use global variable
            generated_sop_text = generate_sop_from_pdf(db)
            return generated_sop_text, gr.update(visible=True)

        generate_button.click(on_generate_sop, None, [sop_output, save_button])

        def on_save_sop():
            global generated_sop_text  # Use global variable
            file_path = save_sop_as_pdf(generated_sop_text)
            return gr.File.update(value=file_path, visible=True)

        save_button.click(on_save_sop, None, download_output)  # Update download_output instead of save_button

    with gr.Tab("Ask AutoSOP"):
        chatbot = gr.Chatbot(height=300)
        message = gr.Textbox(label='Please type your query and press Enter.')
        clear = gr.ClearButton([message])

        # Define the submit action for the message input
        def on_message_submit(chat_history, user_input):
            return chat(chat_history, user_input, db)

        message.submit(on_message_submit, [chatbot, message], chatbot)
        message.submit(lambda x: gr.update(value=""), None, [message])

In [None]:
#Running the model
demo.launch(debug=True)