In [None]:

import streamlit as st

# Placeholder function for processing the uploaded documents
def process_documents(uploaded_files):
    # Implement the document processing logic here
    return "Placeholder processed output"

# Placeholder function for postprocessing into PPT template
def postprocess_to_ppt(processed_output, selected_template):
    # Implement the postprocessing logic here
    return "Placeholder PPT content"

def main():
    st.title("Document Processing and PPT Generation App")

    # Step 1: Document Upload
    st.header("Step 1: Upload Documents")
    uploaded_files = st.file_uploader("Upload your documents", accept_multiple_files=True, type=["pdf"])
    process_button = st.button("Process Documents")
    
    if process_button:
        if uploaded_files:
            # Call the processing function on the uploaded documents
            processed_output = process_documents(uploaded_files)
            st.header("Processed Output")
            st.write(processed_output)
            
            # Store the processed output in session state
            st.session_state.processed_output = processed_output

    # Step 2: PPT Template Selection
    st.header("Step 2: Select PPT Template")
    # Add radio buttons for template selection here
    selected_template = st.radio("Select PPT Template", options=["Template 1", "Template 2", "Template 3"])
    generate_ppt_button = st.button("Generate PPT")

    if generate_ppt_button:
        # Retrieve the processed output from session state
        processed_output = st.session_state.processed_output

        if processed_output:
            # Call the postprocessing function to generate PPT content
            ppt_content = postprocess_to_ppt(processed_output, selected_template)
            # Display the PPT content using st.markdown or st.write
            st.markdown(ppt_content, unsafe_allow_html=True)

            # Store the PPT content in session state
            st.session_state.ppt_content = ppt_content

    # Step 3: PPT Download
    if "ppt_content" in st.session_state:
        ppt_content = st.session_state.ppt_content
        # Add a download button to download the PPT
        st.download_button("Download PPT", data=ppt_content, file_name="output.pptx", mime="application/vnd.openxmlformats-officedocument.presentationml.presentation")

if __name__ == "__main__":
    main()


In [1]:
%%writefile app8.py
#this is working smooth and perfect in the streamlit app. only thing left is the API call now
import os
import streamlit as st
from pptx import Presentation
from datetime import datetime  # Import the 'datetime' class from the 'datetime' module
from io import BytesIO
from PIL import Image


# Placeholder function for processing the uploaded documents
def process_documents(uploaded_files, tense, pls_grade):
    # Implement the document processing logic here
    # For demonstration purposes, we'll create a placeholder replacements dictionary
    replacements = {
        "<Title>": GPTAPIcall("title", tense, pls_grade),
        "<Subtitle>": GPTAPIcall("subtitle", tense, pls_grade),
        "<Introduction>": GPTAPIcall("introduction", tense, pls_grade),
        "<Phonetics>": GPTAPIcall("phonetics", tense, pls_grade),
        "<Key takeaway>": GPTAPIcall("keytakeaway", tense, pls_grade),
        "<Results>": GPTAPIcall("results", tense, pls_grade),
        "<Intro summary>": GPTAPIcall("conclusion", tense, pls_grade)
    }
    return replacements

# Placeholder function for GPT API call
def GPTAPIcall(key, tense, pls_grade):
    # Placeholder logic to generate values based on the key, tense, and PLS grade
    # Replace this with your actual GPT API call or any other processing logic
    # Convert tense strings
    tense_mapping = {"on-going": "present", "completed": "past", "upcoming": "future"}
    tense = tense_mapping.get(tense, tense)

    # Placeholder value for the key with the tense and PLS grade
    value = f"Placeholder value for {key} (Tense: {tense}, PLS Grade: {pls_grade})"
    return value

# Placeholder function for postprocessing into PPT template
def postprocess_to_ppt(replacements, selected_template):
    # Implement the postprocessing logic here
    # For demonstration purposes, we'll load a presentation object and copy the text from replacements dictionary
    
    #rootdir = os.path.realpath('./')
    rootdir = "/home/cdsw/experimentation_project1/PLS_project"
    #selected_template = "PLS_PPT_Template"
    ppt_file = f"{selected_template}.pptx"
    prs = Presentation(os.path.join(rootdir, ppt_file))

    for slide in prs.slides:
        for shape in slide.shapes:
            if shape.has_text_frame:
                text_frame = shape.text_frame
                for paragraph in text_frame.paragraphs:
                    for run in paragraph.runs:
                        for placeholder, new_text in replacements.items():
                            if run.text == placeholder:
                                # Preserve formatting of the first run in the paragraph
                                first_run = paragraph.runs[0]
                                font_size = first_run.font.size
                                font_name = first_run.font.name
                                font_bold = first_run.font.bold
                                font_italic = first_run.font.italic

                                # Check if font color is explicitly defined
                                if first_run.font.color.type == "rgb":
                                    font_color = first_run.font.color.rgb
                                else:
                                    font_color = None

                                # Replace text while preserving formatting
                                run.text = new_text

                                # Apply preserved formatting to the entire paragraph
                                for run in paragraph.runs:
                                    run.font.size = font_size
                                    run.font.name = font_name
                                    run.font.bold = font_bold
                                    run.font.italic = font_italic
                                    if font_color:
                                        run.font.color.rgb = font_color

    # Return the modified presentation object
    return prs

def main():
    #Custom icons
    img = Image.open('/home/cdsw/experimentation_project1/PLS_project/pfizer.png')
    st.set_page_config(page_title = 'Pfizer PLS Generator', page_icon = img)
    
    st.title("RCT Document Processing and PLS Generation App")

    # Step 1: Document Upload
    st.header("Step 1: Upload Documents")
    uploaded_files = st.file_uploader("Upload Clinical trial documents", accept_multiple_files=True, type=["pdf"])
    
    # Step 2: User Inputs
    st.header("Step 2: User Inputs")
    # Set default values for radio button and slider
    default_tense = "completed"
    default_pls_grade = 10

    # Radio button for tense selection
    tense = st.radio("Current status of the study", options=["on-going", "completed", "upcoming"], key="tense", index=["on-going", "completed", "upcoming"].index(default_tense))

    # Slider for PLS grade selection
    pls_grade = st.slider("Select the Grade of Plain language summary", min_value=1, max_value=10, step=1, key="pls_grade", value=default_pls_grade)

    process_button = st.button("Process Documents")
    
    if process_button:
        if uploaded_files:
            # Retrieve user inputs
            tense = st.session_state.tense
            pls_grade = st.session_state.pls_grade

            # Call the processing function on the uploaded documents with user inputs
            replacements = process_documents(uploaded_files, tense, pls_grade)
            st.header("Processed Output")
            st.write(replacements)
            
            # Store the replacements dictionary in session state
            st.session_state.replacements = replacements

    # Step 3: PPT Template Selection and Download
    st.header("Step 3: Select PLS PPT Template and Download")
    # Add radio buttons for template selection here
    
    default_template = "PLS_PPT_Template"
    selected_template = st.radio("Select PPT Template", options=["PLS_PPT_Template", "Template2", "Template3"], index=["PLS_PPT_Template", "Template2", "Template3"].index(default_template))
    generate_ppt_button = st.button("Generate PLS")

    if generate_ppt_button:
        # Retrieve the replacements dictionary from session state
        replacements = st.session_state.replacements

        if replacements:
            with st.spinner('Generating awesome slides for you...⏳'):
                # Call the postprocessing function to generate PPT content
                ppt_content = postprocess_to_ppt(replacements, selected_template)

                # Display the PPT content using st.markdown or st.write
                st.markdown(ppt_content, unsafe_allow_html=True)

                # Store the PPT content in session state
                st.session_state.ppt_content = ppt_content
            
            
     # Step 3: PPT Download
    if "ppt_content" in st.session_state:
        ppt_content = st.session_state.ppt_content
             
        # Save the modified presentation object to a temporary file
        #ppt_output_file = f"PLS_{replacements['title']}_{datetime.now().strftime('%Y%m%d%H%M%S')}.pptx"
        ppt_output_file = "PLS_output.pptx"
        #ppt_content.save(ppt_output_file)
        
        # save presentation as binary output
        binary_output = BytesIO()
        ppt_content.save(binary_output)
        
        # display success message and download button
        st.success('The slides have been generated! :tada:')
        
        # Provide the download link for the generated PPT
        st.download_button("Download PLS", data=binary_output.getvalue(), file_name=ppt_output_file, mime="application/vnd.openxmlformats-officedocument.presentationml.presentation")

if __name__ == "__main__":
    main()


Overwriting app8.py


In [10]:
#justwriting this code for debuggin the above code - not using webapp. this works well
from pptx import Presentation
import os
rootdir = os.path.realpath('./')
selected_template = "PLS_PPT_Template"
ppt_file = f"{selected_template}.pptx"
prs = Presentation(os.path.join(rootdir, ppt_file))

replacements = {
        "<Title>": 'GPTAPIcall("title", tense, pls_grade)',
        "<Subtitle>": 'GPTAPIcall("subtitle", tense, pls_grade)',
        "<Introduction>": 'GPTAPIcall("introduction", tense, pls_grade)',
        "<Phonetics>": 'GPTAPIcall("phonetics", tense, pls_grade)',
        "<Key takeaway>": 'GPTAPIcall("keytakeaway", tense, pls_grade)',
        "<Results>": 'GPTAPIcall("results", tense, pls_grade)',
        "<Intro summary>": 'GPTAPIcall("conclusion", tense, pls_grade)'
    }

for slide in prs.slides:
        for shape in slide.shapes:
            if shape.has_text_frame:
                text_frame = shape.text_frame
                for paragraph in text_frame.paragraphs:
                    for run in paragraph.runs:
                        for placeholder, new_text in replacements.items():
                            if run.text == placeholder:
                                # Preserve formatting of the first run in the paragraph
                                first_run = paragraph.runs[0]
                                font_size = first_run.font.size
                                font_name = first_run.font.name
                                font_bold = first_run.font.bold
                                font_italic = first_run.font.italic

                                # Check if font color is explicitly defined
                                if first_run.font.color.type == "rgb":
                                    font_color = first_run.font.color.rgb
                                else:
                                    font_color = None

                                # Replace text while preserving formatting
                                run.text = new_text

                                # Apply preserved formatting to the entire paragraph
                                for run in paragraph.runs:
                                    run.font.size = font_size
                                    run.font.name = font_name
                                    run.font.bold = font_bold
                                    run.font.italic = font_italic
                                    if font_color:
                                        run.font.color.rgb = font_color


prs.save('updated_presentation.pptx')
print("end")

end


In [6]:
import os
rootdir = os.path.realpath('./')
selected_template = "PLS_PPT_Template"
ppt_file = f"{selected_template}.pptx"
print(os.path.join(rootdir, ppt_file))

/home/cdsw/experimentation_project1/PLS_project/PLS_PPT_Template.pptx


In [15]:
rootdir = "/home/cdsw/experimentation_project1/PLS_project"
selected_template = "PLS_PPT_Template"
ppt_file = f"{selected_template}.pptx"
print(os.path.join(rootdir, ppt_file))

/home/cdsw/experimentation_project1/PLS_project/PLS_PPT_Template.pptx


In [9]:
%%writefile app8.py
#enhancing the above perfect code with GPT API call -- working smooth for title/intro/kt/results section; don't touch it
import os
import urllib
import base64
import json
import streamlit as st
from pptx import Presentation
from datetime import datetime  # Import the 'datetime' class from the 'datetime' module
from io import BytesIO
from PIL import Image
from shakti3_index import llama_listvectorkeyword_index

rootdir = "/home/cdsw/experimentation_project1/PLS_project"
datadir = "/home/cdsw/experimentation_project1/PLS_project/data"

#function to read prompt from corresponding text file
def prompt(file):
    with open(file) as f:
        return f.read()
    
#function to save a file
def save_uploadedfile(uploaded_file):
     with open(os.path.join(datadir, uploaded_file.name),"wb") as f:
         f.write(uploaded_file.getbuffer())
     return st.success(f"""Saved File:{uploaded_file.name} to directory""")

@st.cache_data
#function to display the PDF of a given file 
def displayPDF(file):
    # Opening file from file path
    with open(file, "rb") as f:
        base64_pdf = base64.b64encode(f.read()).decode('utf-8')

    # Embedding PDF in HTML
    pdf_display = F'<iframe src="data:application/pdf;base64,{base64_pdf}" width="300" height="200" type="application/pdf"></iframe>'

    # Displaying File
    st.markdown(pdf_display, unsafe_allow_html=True)

# Placeholder function for processing the uploaded documents
def process_documents(uploaded_file, tense, pls_grade):
    # Implement the document processing logic here
    
    tense_mapping = {"on-going": "present", "completed": "past", "upcoming": "future"}
    tense = tense_mapping.get(tense, tense)
    
#     summary_query = f"""With the above instructions and the clinical trial document provided, write a detailed APLS with mentioned sections in {tense}, comprehendable by a grade {pls_grade} student. Provide the response in JSON format with section names as keys. Enclose each key in JSON in angular brackets for example '<Title>'. Enclose each value in JSON in double quotes. Make sure to complete the JSON with opening and closing flower brackets.\n"""
#     replacements = json.loads(llama_listvectorkeyword_index(uploaded_file , (prompt(os.path.join(rootdir, 'apls_persona_2606.txt')) + "\n\n\n" + summary_query)))
    
     
    title_query = f"Strictly following the above instructions and the clinical trial document provided, write the content of 'Title' section of the APLS in {tense}, comprehendable by a grade {pls_grade} student. Do not violate the section-wise instructions provided in any case. The content should be strictly inferred from the clinical trial document provided only and not any other sources.\n"
    intro_query = f"Strictly following the above instructions and the clinical trial document provided, write the content of 'Introduction' section of the APLS in {tense}, comprehendable by a grade {pls_grade} student. Do not violate the section-wise instructions provided in any case. The content should be strictly inferred from the clinical trial document provided only and not any other sources.\n"
    keytakeaway_query = f"Strictly following the above instructions and the clinical trial document provided, write the content of 'Key Takeaway' section of the APLS in {tense}, comprehendable by a grade {pls_grade} student. Do not violate the section-wise instructions provided in any case. The content should be strictly inferred from the clinical trial document provided only and not any other sources.\n"
    results_query = f"Strictly following the above instructions and the clinical trial document provided, write the content of 'Results' section of the APLS in {tense}, comprehendable by a grade {pls_grade} student. Do not violate the section-wise instructions provided in any case. The content should be strictly inferred from the clinical trial document provided only and not any other sources.\n"
    
    # For demonstration purposes, we'll create a placeholder replacements dictionary
    replacements = {
        "<Title>": llama_listvectorkeyword_index(uploaded_file, prompt(os.path.join(rootdir, 'apls_persona_2606.txt')) + "\n\n\n" + title_query),
    #     "<Subtitle>": GPTAPIcall("subtitle", tense, pls_grade),
        "<Introduction>": llama_listvectorkeyword_index(uploaded_file, prompt(os.path.join(rootdir, 'apls_persona_2606.txt')) + "\n\n\n" + intro_query),
    #     "<Phonetics>": GPTAPIcall("phonetics", tense, pls_grade),
        "<Key takeaway>": llama_listvectorkeyword_index(uploaded_file, prompt(os.path.join(rootdir, 'apls_persona_2606.txt')) + "\n\n\n" + keytakeaway_query),
        "<Results>": llama_listvectorkeyword_index(uploaded_file, prompt(os.path.join(rootdir, 'apls_persona_2606.txt')) + "\n\n\n" + results_query)
    #     "<Intro summary>": GPTAPIcall("conclusion", tense, pls_grade)
    }
    return replacements

# Placeholder function for GPT API call
def GPTAPIcall(key, tense, pls_grade):
    # Placeholder logic to generate values based on the key, tense, and PLS grade
    # Replace this with your actual GPT API call or any other processing logic
    # Convert tense strings
    tense_mapping = {"on-going": "present", "completed": "past", "upcoming": "future"}
    tense = tense_mapping.get(tense, tense)

    # Placeholder value for the key with the tense and PLS grade
    value = f"Placeholder value for {key} (Tense: {tense}, PLS Grade: {pls_grade})"
    return value

# Placeholder function for postprocessing into PPT template
def postprocess_to_ppt(replacements, selected_template):
    # Implement the postprocessing logic here
    # For demonstration purposes, we'll load a presentation object and copy the text from replacements dictionary
    
    #rootdir = os.path.realpath('./')
    
    #selected_template = "PLS_PPT_Template"
    ppt_file = f"{selected_template}.pptx"
    prs = Presentation(os.path.join(rootdir, ppt_file))

    for slide in prs.slides:
        for shape in slide.shapes:
            if shape.has_text_frame:
                text_frame = shape.text_frame
                for paragraph in text_frame.paragraphs:
                    for run in paragraph.runs:
                        for placeholder, new_text in replacements.items():
                            if run.text == placeholder:
                                # Preserve formatting of the first run in the paragraph
                                first_run = paragraph.runs[0]
                                font_size = first_run.font.size
                                font_name = first_run.font.name
                                font_bold = first_run.font.bold
                                font_italic = first_run.font.italic

                                # Check if font color is explicitly defined
                                if first_run.font.color.type == "rgb":
                                    font_color = first_run.font.color.rgb
                                else:
                                    font_color = None

                                # Replace text while preserving formatting
                                run.text = new_text

                                # Apply preserved formatting to the entire paragraph
                                for run in paragraph.runs:
                                    run.font.size = font_size
                                    run.font.name = font_name
                                    run.font.bold = font_bold
                                    run.font.italic = font_italic
                                    if font_color:
                                        run.font.color.rgb = font_color

    # Return the modified presentation object
    return prs

def main():
    #Custom icons
    img = Image.open(os.path.join(rootdir, 'pfizer.png'))
    st.set_page_config(page_title = 'Pfizer PLS Generator', page_icon = img)
    
    st.title("RCT Document Processing and PLS Generation App")
    with st.sidebar:
        # Step 1: Document Upload
        st.header("Step 1: Upload Documents")
        uploaded_file = st.file_uploader("Upload Clinical trial document", accept_multiple_files=False, type=["pdf"])

        # Step 2: User Inputs
        st.header("Step 2: User Inputs")
        # Set default values for radio button and slider
        default_tense = "completed"
        default_pls_grade = 5

        # Radio button for tense selection
        tense = st.radio("Current status of the study", options=["on-going", "completed", "upcoming"], key="tense", index=["on-going", "completed", "upcoming"].index(default_tense))
        st.write('<style>div.row-widget.stRadio > div{flex-direction:row;}</style>', unsafe_allow_html=True)
        
        # Slider for PLS grade selection
        pls_grade = st.slider("Select the Grade of Plain language summary", min_value=1, max_value=10, step=1, key="pls_grade", value=default_pls_grade)

        process_button = st.button("Process Documents")
    
    if process_button:
        if uploaded_file:
            # Retrieve user inputs
            tense = st.session_state.tense
            pls_grade = st.session_state.pls_grade
            
            col1, col2 = st.columns([1,2])
            with col1:
                input_file = save_uploadedfile(uploaded_file)
                pdf_file = os.path.join(datadir, uploaded_file.name) #rootdir + "/" + uploaded_file.name
                pdf_view = displayPDF(pdf_file)
            with col2:
                with st.spinner(text='Processing trial doc...⏳'):
            
                    # Call the processing function on the uploaded documents with user inputs
                    replacements = process_documents(pdf_file, tense, pls_grade)
                    st.success("Processed Output")
                    st.write(replacements)
            
            # Store the replacements dictionary in session state
            st.session_state.replacements = replacements

    # Step 3: PPT Template Selection and Download
    st.header("Step 3: Select PLS PPT Template and Download")
    
    # Add radio buttons for template selection here    
    default_template = "PLS_PPT_Template"
    selected_template = st.radio("Select PPT Template", options=["PLS_PPT_Template", "Template2", "Template3"], index=["PLS_PPT_Template", "Template2", "Template3"].index(default_template))
    st.write('<style>div.row-widget.stRadio > div{flex-direction:row;}</style>', unsafe_allow_html=True)
    
    generate_ppt_button = st.button("Generate PLS")

    if generate_ppt_button:
        # Retrieve the replacements dictionary from session state
        replacements = st.session_state.replacements

        if replacements:
            with st.spinner('Generating awesome slides for you...⏳'):
                # Call the postprocessing function to generate PPT content
                ppt_content = postprocess_to_ppt(replacements, selected_template)

                # Display the PPT content using st.markdown or st.write
                st.markdown(ppt_content, unsafe_allow_html=True)

                # Store the PPT content in session state
                st.session_state.ppt_content = ppt_content
            
            
     # Step 3: PPT Download
    if "ppt_content" in st.session_state:
        ppt_content = st.session_state.ppt_content
             
        # Save the modified presentation object to a temporary file
        #ppt_output_file = f"PLS_{replacements['title']}_{datetime.now().strftime('%Y%m%d%H%M%S')}.pptx"
        ppt_output_file = "PLS_output.pptx"
        #ppt_content.save(ppt_output_file)
        
        # save presentation as binary output
        binary_output = BytesIO()
        ppt_content.save(binary_output)
        
        # display success message and download button
        st.success('The slides have been generated! :tada:')
        
        # Provide the download link for the generated PPT
        st.download_button("Download PLS", data=binary_output.getvalue(), file_name=ppt_output_file, mime="application/vnd.openxmlformats-officedocument.presentationml.presentation")

if __name__ == "__main__":
    main()


Overwriting app8.py


In [12]:
%%writefile app8.py
#enhance the GPT API call function here..and put couple of banner/logo -- do not touch
import os
import urllib
import base64
import json
import streamlit as st
from pptx import Presentation
from datetime import datetime  # Import the 'datetime' class from the 'datetime' module
from io import BytesIO
from PIL import Image
from shakti3_index import llama_listvectorkeyword_index

rootdir = "/home/cdsw/experimentation_project1/PLS_project"
datadir = "/home/cdsw/experimentation_project1/PLS_project/data"

#function to set background image
def set_bg_hack(main_bg):
    '''
    A function to unpack an image from root folder and set as bg.
 
    Returns
    -------
    The background.
    '''
    # set bg name
    main_bg_ext = "png"
        
    st.markdown(
         f"""
         <style>
         .stApp {{
             background: url(data:image/{main_bg_ext};base64,{base64.b64encode(open(main_bg, "rb").read()).decode()});
             background-size: cover
         }}
         </style>
         """,
         unsafe_allow_html=True
     )

#function to read prompt from corresponding text file
def prompt(file):
    with open(file) as f:
        return f.read()
    
#function to save a file
def save_uploadedfile(uploaded_file):
     with open(os.path.join(datadir, uploaded_file.name),"wb") as f:
         f.write(uploaded_file.getbuffer())
     return st.success(f"""Saved File:{uploaded_file.name} to directory""")

@st.cache_data
#function to display the PDF of a given file 
def displayPDF(file):
    # Opening file from file path
    with open(file, "rb") as f:
        base64_pdf = base64.b64encode(f.read()).decode('utf-8')

    # Embedding PDF in HTML
    pdf_display = F'<iframe src="data:application/pdf;base64,{base64_pdf}" width="300" height="200" type="application/pdf"></iframe>'

    # Displaying File
    st.markdown(pdf_display, unsafe_allow_html=True)

# Placeholder function for processing the uploaded documents
def process_documents(uploaded_file, tense, pls_grade):
    # Implement the document processing logic here
    
    # tense_mapping = {"on-going": "present", "completed": "past", "upcoming": "future"}
    # tense = tense_mapping.get(tense, tense)
    
#     summary_query = f"""With the above instructions and the clinical trial document provided, write a detailed APLS with mentioned sections in {tense}, comprehendable by a grade {pls_grade} student. Provide the response in JSON format with section names as keys. Enclose each key in JSON in angular brackets for example '<Title>'. Enclose each value in JSON in double quotes. Make sure to complete the JSON with opening and closing flower brackets.\n"""
#     replacements = json.loads(llama_listvectorkeyword_index(uploaded_file , (prompt(os.path.join(rootdir, 'apls_persona_2606.txt')) + "\n\n\n" + summary_query)))
         
#     title_query = f"Strictly following the above instructions and the clinical trial document provided, write the content of 'Title' section of the APLS in {tense}, comprehendable by a grade {pls_grade} student. Do not violate the section-wise instructions provided in any case. The content should be strictly inferred from the clinical trial document provided only and not any other sources.\n"
#     intro_query = f"Strictly following the above instructions and the clinical trial document provided, write the content of 'Introduction' section of the APLS in {tense}, comprehendable by a grade {pls_grade} student. Do not violate the section-wise instructions provided in any case. The content should be strictly inferred from the clinical trial document provided only and not any other sources.\n"
#     keytakeaway_query = f"Strictly following the above instructions and the clinical trial document provided, write the content of 'Key Takeaway' section of the APLS in {tense}, comprehendable by a grade {pls_grade} student. Do not violate the section-wise instructions provided in any case. The content should be strictly inferred from the clinical trial document provided only and not any other sources.\n"
#     results_query = f"Strictly following the above instructions and the clinical trial document provided, write the content of 'Results' section of the APLS in {tense}, comprehendable by a grade {pls_grade} student. Do not violate the section-wise instructions provided in any case. The content should be strictly inferred from the clinical trial document provided only and not any other sources.\n"
    
#     # For demonstration purposes, we'll create a placeholder replacements dictionary
#     replacements = {
#         "<Title>": llama_listvectorkeyword_index(uploaded_file, prompt(os.path.join(rootdir, 'apls_persona_2606.txt')) + "\n\n\n" + title_query),
#     #     "<Subtitle>": GPTAPIcall("subtitle", tense, pls_grade),
#         "<Introduction>": llama_listvectorkeyword_index(uploaded_file, prompt(os.path.join(rootdir, 'apls_persona_2606.txt')) + "\n\n\n" + intro_query),
#     #     "<Phonetics>": GPTAPIcall("phonetics", tense, pls_grade),
#         "<Key takeaway>": llama_listvectorkeyword_index(uploaded_file, prompt(os.path.join(rootdir, 'apls_persona_2606.txt')) + "\n\n\n" + keytakeaway_query),
#         "<Results>": llama_listvectorkeyword_index(uploaded_file, prompt(os.path.join(rootdir, 'apls_persona_2606.txt')) + "\n\n\n" + results_query)
#     #     "<Intro summary>": GPTAPIcall("conclusion", tense, pls_grade)
#     }

    replacements = {
        "<Title>": GPTAPIcall(uploaded_file, "'Title'", tense, pls_grade),
    #     "<Subtitle>": GPTAPIcall(uploaded_file, "subtitle", tense, pls_grade),
    #    "<Introduction>": GPTAPIcall(uploaded_file, "'Introduction'", tense, pls_grade),
    #     "<Phonetics>": GPTAPIcall(uploaded_file, "phonetics", tense, pls_grade),
    #    "<Key takeaway>": GPTAPIcall(uploaded_file, "'Key Takeaway'", tense, pls_grade),
    #    "<Results>": GPTAPIcall(uploaded_file, "'Results'", tense, pls_grade)
    #     "<Intro summary>": GPTAPIcall(uploaded_file, "conclusion", tense, pls_grade)
    }

    return replacements

# Placeholder function for GPT API call
def GPTAPIcall(uploaded_file, key, tense, pls_grade):
    # Placeholder logic to generate values based on the key, tense, and PLS grade
    # Replace this with your actual GPT API call or any other processing logic
    # Convert tense strings
    tense_mapping = {"on-going": "present", "completed": "past", "upcoming": "future"}
    tense = tense_mapping.get(tense, tense)

    # Placeholder value for the key with the tense and PLS grade
    #value = f"Placeholder value for {key} (Tense: {tense}, PLS Grade: {pls_grade})"
    query = f"Strictly following the above instructions and the clinical trial document provided, write the content of {key} section of the APLS in {tense}, comprehendable by a grade {pls_grade} student. Do not violate the section-wise instructions provided in any case. The content should be strictly inferred from the clinical trial document provided only and not any other sources.\n"
    
    return llama_listvectorkeyword_index(uploaded_file, prompt(os.path.join(rootdir, 'apls_persona_2606.txt')) + "\n\n\n" + query)

# Placeholder function for postprocessing into PPT template
def postprocess_to_ppt(replacements, selected_template):
    # Implement the postprocessing logic here
    # For demonstration purposes, we'll load a presentation object and copy the text from replacements dictionary
    
    #rootdir = os.path.realpath('./')
    
    #selected_template = "PLS_PPT_Template"
    ppt_file = f"{selected_template}.pptx"
    prs = Presentation(os.path.join(rootdir, ppt_file))

    for slide in prs.slides:
        for shape in slide.shapes:
            if shape.has_text_frame:
                text_frame = shape.text_frame
                for paragraph in text_frame.paragraphs:
                    for run in paragraph.runs:
                        for placeholder, new_text in replacements.items():
                            if run.text == placeholder:
                                # Preserve formatting of the first run in the paragraph
                                first_run = paragraph.runs[0]
                                font_size = first_run.font.size
                                font_name = first_run.font.name
                                font_bold = first_run.font.bold
                                font_italic = first_run.font.italic

                                # Check if font color is explicitly defined
                                if first_run.font.color.type == "rgb":
                                    font_color = first_run.font.color.rgb
                                else:
                                    font_color = None

                                # Replace text while preserving formatting
                                run.text = new_text

                                # Apply preserved formatting to the entire paragraph
                                for run in paragraph.runs:
                                    run.font.size = font_size
                                    run.font.name = font_name
                                    run.font.bold = font_bold
                                    run.font.italic = font_italic
                                    if font_color:
                                        run.font.color.rgb = font_color

    # Return the modified presentation object
    return prs

def main():
    
    #Page icons n tab name on browser tab
    img = Image.open(os.path.join(rootdir, 'pfizer.png'))
    st.set_page_config(page_title = 'Pfizer PLS Generator', page_icon = img)
    
    #hide padding above before title and footer
    # st.markdown(f""" <style>
    # #root > div:nth-child(1) > div > div > div > div > section > div {padding-top: 0rem;}
    # #MainMenu {visibility: hidden;}
    # footer {visibility: hidden;}
    # header {visibility: hidden;}
    # </style> """, unsafe_allow_html=True)
    
    #set bg image cover
    #set_bg_hack(os.path.join(rootdir, 'pfizer-bg.png'))
    
    #setting banner image
    st.image(Image.open(os.path.join(rootdir, 'Pfizer-AI.jpg')))
    
    #setting title
    #st.header("IQVIA PLS Generator")
    
    #setting input components on sidebar
    with st.sidebar:
        st.image(Image.open(os.path.join(rootdir, 'iqvia-pls-generator.png')))
        # Step 1: Document Upload
        st.subheader("Step 1: Upload Clinical trial document")
        uploaded_file = st.file_uploader("Upload document", accept_multiple_files=False, type=["pdf"])

        # Step 2: User Inputs
        st.subheader("Step 2: Define the tone and Grade of PLS")
        # Set default values for radio button and slider
        default_tense = "completed"
        default_pls_grade = 5

        # Radio button for tense selection
        tense = st.radio("Current status of the study", options=["on-going", "completed", "upcoming"], key="tense", index=["on-going", "completed", "upcoming"].index(default_tense))
        st.write('<style>div.row-widget.stRadio > div{flex-direction:row;}</style>', unsafe_allow_html=True)
        
        # Slider for PLS grade selection
        pls_grade = st.slider("Select the Health Literacy Grade Reading level", min_value=1, max_value=10, step=1, key="pls_grade", value=default_pls_grade)

        process_button = st.button("Process Documents")
    
    if process_button:
        if uploaded_file:
            # Retrieve user inputs
            tense = st.session_state.tense
            pls_grade = st.session_state.pls_grade
            
            col1, col2 = st.columns([1,2])
            with col1:
                input_file = save_uploadedfile(uploaded_file)
                pdf_file = os.path.join(datadir, uploaded_file.name) #rootdir + "/" + uploaded_file.name
                pdf_view = displayPDF(pdf_file)
            with col2:
                with st.spinner(text='Processing trial doc...⏳'):
            
                    # Call the processing function on the uploaded documents with user inputs
                    replacements = process_documents(pdf_file, tense, pls_grade)
                    st.success("Processed Output")
                    st.write(replacements)
            
            # Store the replacements dictionary in session state
            st.session_state.replacements = replacements

    # Step 3: PPT Template Selection and Download
    st.subheader("Step 3: Select PLS PPT Template and Download")
    
    # Add radio buttons for template selection here    
    default_template = "PLS_PPT_Template"
    selected_template = st.radio("Select PPT Template", options=["PLS_PPT_Template", "Template2", "Template3"], index=["PLS_PPT_Template", "Template2", "Template3"].index(default_template))
    st.write('<style>div.row-widget.stRadio > div{flex-direction:row;}</style>', unsafe_allow_html=True)
    
    generate_ppt_button = st.button("Generate PLS")

    if generate_ppt_button:
        # Retrieve the replacements dictionary from session state
        replacements = st.session_state.replacements

        if replacements:
            with st.spinner('Generating awesome slides for you...⏳'):
                # Call the postprocessing function to generate PPT content
                ppt_content = postprocess_to_ppt(replacements, selected_template)

                # Display the PPT content using st.markdown or st.write
                #st.markdown(ppt_content, unsafe_allow_html=True)
                st.markdown(list(replacements.keys()))

                # Store the PPT content in session state
                st.session_state.ppt_content = ppt_content
            
            
                 # Step 4: PPT Download
                if "ppt_content" in st.session_state:
                    ppt_content = st.session_state.ppt_content
                    #replacements = st.session_state.replacements
                    
                    # Save the modified presentation object to a temporary file
                    #ppt_output_file = f"PLS_{replacements['<Title>']}_{datetime.now().strftime('%Y%m%d%H%M%S')}.pptx"                    
                    ppt_output_file = "PLS_output.pptx"
                    #ppt_content.save(ppt_output_file)

                    # save presentation as binary output
                    binary_output = BytesIO()
                    ppt_content.save(binary_output)

                    # display success message and download button
                    st.success(':tada: The PLS template has been filled with above sections ')

                    # Provide the download link for the generated PPT
                    st.download_button("Download PLS", data=binary_output.getvalue(), file_name=ppt_output_file, mime="application/vnd.openxmlformats-officedocument.presentationml.presentation")

if __name__ == "__main__":
    main()


Overwriting app8.py


In [7]:
from datetime import datetime 
print( f"PLS_{datetime.now().strftime('%Y%m%d%H%M%S')}.pptx")

PLS_20230704074221.pptx


In [6]:
%%writefile app8.py
#implemented all 3 ppt templates (streaming didnt work)-- baselined version - 6Jul -2:30pm - dont touch
import os
import urllib
import base64
import json
import streamlit as st
from pptx import Presentation
from pptx.util import Inches
from pptx.dml.color import RGBColor
from datetime import datetime  # Import the 'datetime' class from the 'datetime' module
import time
from io import BytesIO
from PIL import Image
from shakti3_index import llama_listvectorkeyword_index
from streamlit_pills import pills

rootdir = "/home/cdsw/experimentation_project1/PLS_project"
datadir = "/home/cdsw/experimentation_project1/PLS_project/data"

#function to set background image
def set_bg_hack(main_bg):
    '''
    A function to unpack an image from root folder and set as bg.
 
    Returns
    -------
    The background.
    '''
    # set bg name
    main_bg_ext = "jpg"
        
    st.markdown(
         f"""
         <style>
         .stApp {{
             background: url(data:image/{main_bg_ext};base64,{base64.b64encode(open(main_bg, "rb").read()).decode()});
             background-size: cover
         }}
         </style>
         """,
         unsafe_allow_html=True
     )

def sidebar_bg(side_bg):

   side_bg_ext = 'png'

   st.markdown(
      f"""
      <style>
      [data-testid="stSidebar"] > div:first-child {{
          background: url(data:image/{side_bg_ext};base64,{base64.b64encode(open(side_bg, "rb").read()).decode()});
      }}
      </style>
      """,
      unsafe_allow_html=True,
      )
    
def header_bg(side_bg):

   side_bg_ext = 'png'

   st.markdown(
      f"""
      <style>
      header.css-1avcm0n {{
          background: url(data:image/{side_bg_ext};base64,{base64.b64encode(open(side_bg, "rb").read()).decode()});
      }}
      </style>
      """,
      unsafe_allow_html=True,
      )
    
#function to read prompt from corresponding text file
def prompt(file):
    with open(file) as f:
        return f.read()
    
#function to save a file
def save_uploadedfile(uploaded_file):
     with open(os.path.join(datadir, uploaded_file.name),"wb") as f:
         f.write(uploaded_file.getbuffer())
     return st.success(f"""Saved File:{uploaded_file.name} to directory""")

@st.cache_data
#function to display the PDF of a given file 
def displayPDF(file):
    # Opening file from file path
    with open(file, "rb") as f:
        base64_pdf = base64.b64encode(f.read()).decode('utf-8')

    # Embedding PDF in HTML
    pdf_display = F'<iframe src="data:application/pdf;base64,{base64_pdf}" width="700" height="900" type="application/pdf"></iframe>'

    # Displaying File
    st.markdown(pdf_display, unsafe_allow_html=True)

# Placeholder function for processing the uploaded documents
def process_documents(uploaded_file, tense, pls_grade):
    # Implement the document processing logic here

    '''
        replacements = {
            "<Title>": GPTAPIcall(uploaded_file, "'Title'", tense, pls_grade),
        #     "<Subtitle>": GPTAPIcall(uploaded_file, "subtitle", tense, pls_grade),
            "<Introduction>": GPTAPIcall(uploaded_file, "'Introduction'", tense, pls_grade),
        #     "<Phonetics>": GPTAPIcall(uploaded_file, "phonetics", tense, pls_grade),
        #    "<Key takeaway>": GPTAPIcall(uploaded_file, "'Key Takeaway'", tense, pls_grade),
        #    "<Results>": GPTAPIcall(uploaded_file, "'Results'", tense, pls_grade)
        #     "<Intro summary>": GPTAPIcall(uploaded_file, "conclusion", tense, pls_grade)
        }
    '''
    replacements = {
        "<Title>": "",
        "<Subtitle>": "",
        "<Key takeaway>": "",
        "<Phonetics>": "",
        "<Introduction>": "",
        "<Intro summary>": "",
        "<Inclusion criteria>": "",
        "<Exclusion crtieria>": "",
        "<Results>": "",
        "<Aims>": "",
        "<Conclusions>": "",
        "<Sponsor>": "",
        "<More Information>": "",
    }
        
    # Get the text for each section using GPTAPIcall function
    for section_name in replacements:
        text = GPTAPIcall(uploaded_file, section_name, tense, pls_grade)
        replacements[section_name] = str(text)
    
    return replacements

# Placeholder function for GPT API call
def GPTAPIcall(uploaded_file, key, tense, pls_grade):
    # Placeholder logic to generate values based on the key, tense, and PLS grade
    # Replace this with your actual GPT API call or any other processing logic
    # Convert tense strings
    tense_mapping = {"on-going": "present", "completed": "past", "upcoming": "future"}
    tense = tense_mapping.get(tense, tense)

    # Placeholder value for the key with the tense and PLS grade
    #value = f"Placeholder value for {key} (Tense: {tense}, PLS Grade: {pls_grade})"
    query = f"Strictly following the above instructions and the clinical trial document provided, write the content of {key} section of the APLS in {tense}, comprehendable by a {pls_grade} health literacy grade person. Do not violate the section-wise instructions provided in any case. The content should be strictly inferred from the clinical trial document provided only and not any other sources."
    
    return llama_listvectorkeyword_index(uploaded_file, prompt(os.path.join(rootdir, 'apls_persona_2606.txt')) + "\n" + query)

# Placeholder function for postprocessing into PPT template
def postprocess_to_ppt(replacements, selected_template):
    # Implement the postprocessing logic here
    # For demonstration purposes, we'll load a presentation object and copy the text from replacements dictionary
    
    #rootdir = os.path.realpath('./')
    
    #selected_template = "PLS_PPT_Template"
    ppt_file = f"{selected_template}.pptx"
    prs = Presentation(os.path.join(rootdir, ppt_file))

    for slide in prs.slides:
        for shape in slide.shapes:
            if shape.has_text_frame:
                text_frame = shape.text_frame
                for paragraph in text_frame.paragraphs:
                    for run in paragraph.runs:
                        for placeholder, new_text in replacements.items():
                            if run.text == placeholder:
                                # Preserve formatting of the first run in the paragraph
                                first_run = paragraph.runs[0]
                                font_size = first_run.font.size
                                font_name = first_run.font.name
                                font_bold = first_run.font.bold
                                font_italic = first_run.font.italic

                                # Check if font color is explicitly defined
                                if first_run.font.color.type == "rgb":
                                    font_color = first_run.font.color.rgb
                                else:
                                    font_color = None

                                # Replace text while preserving formatting
                                run.text = new_text

                                # Apply preserved formatting to the entire paragraph
                                for run in paragraph.runs:
                                    run.font.size = font_size
                                    run.font.name = font_name
                                    run.font.bold = font_bold
                                    run.font.italic = font_italic
                                    if font_color:
                                        run.font.color.rgb = font_color

    # Return the modified presentation object
    return prs

def main():
    
    #Page icons n tab name on browser tab
    #img = Image.open(os.path.join(rootdir, 'pfizer.png'))
    st.set_page_config(page_title = 'MAIA', page_icon = ":robot_face:", layout="wide")
    
    #set bg image cover
    set_bg_hack(os.path.join(rootdir, 'iqvia-bg.jpg'))
    sidebar_bg(os.path.join(rootdir, 'iqvia-blue.png'))
    header_bg(os.path.join(rootdir, 'iqvia-dark-blue.png'))
    
    #setting banner image
    st.image(Image.open(os.path.join(rootdir, 'Pfizer-AI.jpg')))
    
    
    #setting input components on sidebar
    with st.sidebar:
        st.image(Image.open(os.path.join(rootdir, 'iqvia-logo.png')))
        #setting title
        st.markdown("""<h3 style='text-align: center; color:red'>MAIA - Medical Affairs Intelligence Assistant</h3>""", unsafe_allow_html=True)
        
        # Step 1: Document Upload
        st.subheader("Step 1: Upload Clinical trial document")
        uploaded_file = st.file_uploader("Upload document", accept_multiple_files=False, type=["pdf"])

        # Step 2: User Inputs
        st.subheader("Step 2: Define the tone and Grade of PLS")
        # Set default values for radio button and slider
        default_tense = "completed"
        default_pls_grade = "Moderate"

        # Radio button for tense selection
        tense = st.radio("Current status of the study", options=["on-going", "completed", "upcoming"], key="tense", index=["on-going", "completed", "upcoming"].index(default_tense))
        st.write('<style>div.row-widget.stRadio > div{flex-direction:row;}</style>', unsafe_allow_html=True)
        
        # Slider for PLS grade selection
        #pls_grade = st.slider("Health Literacy Grade Reading level", min_value=0, max_value=10, step=5, key="pls_grade", value=default_pls_grade)
        pls_grade = st.select_slider("Health Literacy Grade Reading level", options=["Low", "Moderate", "High"], key="pls_grade", value = default_pls_grade)
        
        
        process_button = st.button("Process Documents")
        
    if process_button:
        if uploaded_file:
            # Retrieve user inputs if you haven't initialized them to any variable, then retrieve from streamlit session state
            # st.session_state.tense = tense
            # st.session_state.pls_grade = pls_grade
            
            col1, col2 = st.columns([0.3,0.7])
            with col1:
                input_file = save_uploadedfile(uploaded_file)
                pdf_file = os.path.join(datadir, uploaded_file.name) #rootdir + "/" + uploaded_file.name
                pdf_view = displayPDF(pdf_file)
            with col2:
                with st.spinner(text='Processing trial doc...⏳'):
            
                    # Call the processing function on the uploaded documents with user inputs
                    replacements = process_documents(pdf_file, tense, pls_grade)
                    st.success("Processed Output")
                    
                    #Display processed output
                    st.write(replacements)
                    #st.dataframe(replacements)
                    
#                     output_text = st.empty()  # Create an empty element for real-time updates

#                     # Display the processed output character by character
#                     for section_name, text in replacements.items():
#                         output_text.markdown(f"{section_name}: ", unsafe_allow_html=True)
#                         full_text = ""
#                         for char in text:
#                             full_text += char
#                             time.sleep(0.001)  # Adjust the sleep time to control the streaming speed
#                             output_text.markdown(full_text, unsafe_allow_html=True)
                            
                    # output_text.markdown("\n", unsafe_allow_html=True)
            
            # Store the replacements dictionary in session state
            st.session_state.replacements = replacements

    # Step 3: PPT Template Selection and Download
    st.subheader("Step 3: Select PLS Template and Download")
    
    # Add radio buttons for template selection here    
    default_template = "Pfizer_Blue_PLS_Template"
    selected_template = st.radio("Select PPT Template", options=["Pfizer_Blue_PLS_Template", "Pfizer_Red_PLS_Template", "Pfizer_Long_PLS_Template"], index=["Pfizer_Blue_PLS_Template", "Pfizer_Red_PLS_Template", "Pfizer_Long_PLS_Template"].index(default_template))
    st.write('<style>div.row-widget.stRadio > div{flex-direction:row;}</style>', unsafe_allow_html=True)
    #selected_template = pills("", ["Pfizer_Blue_PLS_Template", "Pfizer_Red_PLS_Template", "Pfizer_Long_PLS_Template"], ["🍀", "🎈", "🌈"])
    
    generate_ppt_button = st.button("Generate PLS")

    if generate_ppt_button:
        # Retrieve the replacements dictionary from session state
        replacements = st.session_state.replacements

        if replacements:
            with st.spinner('Generating awesome slides for you...⏳'):
                # Call the postprocessing function to generate PPT content
                ppt_content = postprocess_to_ppt(replacements, selected_template)

                # Display the PPT content using st.markdown or st.write
                #st.markdown(ppt_content, unsafe_allow_html=True)
                st.markdown(list(replacements.keys()))

                # Store the PPT content in session state
                st.session_state.ppt_content = ppt_content
            
            
                 # Step 4: PPT Download
                if "ppt_content" in st.session_state:
                    ppt_content = st.session_state.ppt_content
                    #replacements = st.session_state.replacements
                    
                    # Save the modified presentation object to a temporary file
                    #ppt_output_file = f"PLS_{replacements['<Title>']}_{datetime.now().strftime('%Y%m%d%H%M%S')}.pptx"                    
                    ppt_output_file = "PLS_output.pptx"
                    #ppt_content.save(ppt_output_file)

                    # save presentation as binary output
                    binary_output = BytesIO()
                    ppt_content.save(binary_output)

                    # display success message and download button
                    st.success(':tada: The PLS template has been filled with above sections in ' + selected_template)

                    # Provide the download link for the generated PPT
                    st.download_button("Download PLS", data=binary_output.getvalue(), file_name=ppt_output_file, mime="application/vnd.openxmlformats-officedocument.presentationml.presentation")

if __name__ == "__main__":
    main()


Overwriting app8.py


In [None]:
#####################################################version for Pfizer demo###############################################################

In [4]:
%%writefile app9.py
#enhanced for taking csv input and producing graphs - baselined version
import os
import pandas as pd
import urllib
import base64
import json
import streamlit as st
from pptx import Presentation
from datetime import datetime  # Import the 'datetime' class from the 'datetime' module
import time
from io import BytesIO
from PIL import Image
from shakti3_index import llama_listvectorkeyword_index
from shakti_pptreport import postprocess_to_ppt
from streamlit_pills import pills

rootdir = "/home/cdsw/experimentation_project1/PLS_project"
datadir = "/home/cdsw/experimentation_project1/PLS_project/data"

#function to set background image
def set_bg_hack(main_bg):
    '''
    A function to unpack an image from root folder and set as bg.
 
    Returns
    -------
    The background.
    '''
    # set bg name
    main_bg_ext = "jpg"
        
    st.markdown(
         f"""
         <style>
         .stApp {{
             background: url(data:image/{main_bg_ext};base64,{base64.b64encode(open(main_bg, "rb").read()).decode()});
             background-size: cover
         }}
         </style>
         """,
         unsafe_allow_html=True
     )

def sidebar_bg(side_bg):

   side_bg_ext = 'png'

   st.markdown(
      f"""
      <style>
      [data-testid="stSidebar"] > div:first-child {{
          background: url(data:image/{side_bg_ext};base64,{base64.b64encode(open(side_bg, "rb").read()).decode()});
      }}
      </style>
      """,
      unsafe_allow_html=True,
      )
    
def header_bg(side_bg):

   side_bg_ext = 'png'

   st.markdown(
      f"""
      <style>
      header.css-1avcm0n {{
          background: url(data:image/{side_bg_ext};base64,{base64.b64encode(open(side_bg, "rb").read()).decode()});
      }}
      </style>
      """,
      unsafe_allow_html=True,
      )
    
#function to read prompt from corresponding text file
def prompt(file):
    with open(file) as f:
        return f.read()
    
#function to save a file
def save_uploadedfile(uploaded_file):
     with open(os.path.join(datadir, uploaded_file.name),"wb") as f:
         f.write(uploaded_file.getbuffer())
     return st.success(f"""Saved File:{uploaded_file.name} to directory""")

@st.cache_data
#function to display the PDF of a given file 
def displayPDF(file):
    # Opening file from file path
    with open(file, "rb") as f:
        base64_pdf = base64.b64encode(f.read()).decode('utf-8')

    # Embedding PDF in HTML
    pdf_display = F'<iframe src="data:application/pdf;base64,{base64_pdf}" width="300" height="200" type="application/pdf"></iframe>'

    # Displaying File
    st.markdown(pdf_display, unsafe_allow_html=True)

# Placeholder function for processing the uploaded documents
def process_documents(uploaded_file, tense, pls_grade):
    # Implement the document processing logic here

    '''
        replacements = {
            "<Title>": GPTAPIcall(uploaded_file, "'Title'", tense, pls_grade),
        #     "<Subtitle>": GPTAPIcall(uploaded_file, "subtitle", tense, pls_grade),
            "<Introduction>": GPTAPIcall(uploaded_file, "'Introduction'", tense, pls_grade),
        #     "<Phonetics>": GPTAPIcall(uploaded_file, "phonetics", tense, pls_grade),
        #    "<Key takeaway>": GPTAPIcall(uploaded_file, "'Key Takeaway'", tense, pls_grade),
        #    "<Results>": GPTAPIcall(uploaded_file, "'Results'", tense, pls_grade)
        #     "<Intro summary>": GPTAPIcall(uploaded_file, "conclusion", tense, pls_grade)
        }
    '''
    replacements = {
        "<Title>": "",
        "<Introduction>": "",
    }
        
    # Get the text for each section using GPTAPIcall function
    for section_name in replacements:
        text = GPTAPIcall(uploaded_file, section_name, tense, pls_grade)
        replacements[section_name] = str(text)
    
    return replacements

# Placeholder function for GPT API call
def GPTAPIcall(uploaded_file, key, tense, pls_grade):
    # Placeholder logic to generate values based on the key, tense, and PLS grade
    # Replace this with your actual GPT API call or any other processing logic
    # Convert tense strings
    tense_mapping = {"on-going": "present", "completed": "past", "upcoming": "future"}
    tense = tense_mapping.get(tense, tense)

    # Placeholder value for the key with the tense and PLS grade
    #value = f"Placeholder value for {key} (Tense: {tense}, PLS Grade: {pls_grade})"
    query = f"Strictly following the above instructions and the clinical trial document provided, write the content of {key} section of the APLS in {tense}, comprehendable by a {pls_grade} health literacy grade person. Do not violate the section-wise instructions provided in any case. The content should be strictly inferred from the clinical trial document provided only and not any other sources.\n"
    
    return llama_listvectorkeyword_index(uploaded_file, prompt(os.path.join(rootdir, 'apls_persona_2606.txt')) + "\n\n\n" + query)

# Placeholder function for postprocessing into PPT template
# def postprocess_to_ppt(replacements, selected_template):
#     # Implement the postprocessing logic here
#     # For demonstration purposes, we'll load a presentation object and copy the text from replacements dictionary
    
#     #rootdir = os.path.realpath('./')
    
#     #selected_template = "PLS_PPT_Template"
#     ppt_file = f"{selected_template}.pptx"
#     prs = Presentation(os.path.join(rootdir, ppt_file))

#     for slide in prs.slides:
#         for shape in slide.shapes:
#             if shape.has_text_frame:
#                 text_frame = shape.text_frame
#                 for paragraph in text_frame.paragraphs:
#                     for run in paragraph.runs:
#                         for placeholder, new_text in replacements.items():
#                             if run.text == placeholder:
#                                 # Preserve formatting of the first run in the paragraph
#                                 first_run = paragraph.runs[0]
#                                 font_size = first_run.font.size
#                                 font_name = first_run.font.name
#                                 font_bold = first_run.font.bold
#                                 font_italic = first_run.font.italic

#                                 # Check if font color is explicitly defined
#                                 if first_run.font.color.type == "rgb":
#                                     font_color = first_run.font.color.rgb
#                                 else:
#                                     font_color = None

#                                 # Replace text while preserving formatting
#                                 run.text = new_text

#                                 # Apply preserved formatting to the entire paragraph
#                                 for run in paragraph.runs:
#                                     run.font.size = font_size
#                                     run.font.name = font_name
#                                     run.font.bold = font_bold
#                                     run.font.italic = font_italic
#                                     if font_color:
#                                         run.font.color.rgb = font_color

#     # Return the modified presentation object
#     return prs

def main():
    
    #Page icons n tab name on browser tab
    img = Image.open(os.path.join(rootdir, 'pfizer.png'))
    st.set_page_config(page_title = 'Pfizer PLS Generator', page_icon = img, layout="wide")
    
    #set bg image cover
    set_bg_hack(os.path.join(rootdir, 'iqvia-bg.jpg'))
    sidebar_bg(os.path.join(rootdir, 'iqvia-blue.png'))
    header_bg(os.path.join(rootdir, 'iqvia-dark-blue.png'))
    
    #setting banner image
    st.image(Image.open(os.path.join(rootdir, 'Pfizer-AI.jpg')))
    
    
    #setting input components on sidebar
    with st.sidebar:
        st.image(Image.open(os.path.join(rootdir, 'iqvia-logo.png')))
        #setting title
        st.header("PLS Generator")
        
        # Step 1: Document Upload
        st.subheader("Step 1: Upload Clinical trial document")
        uploaded_file = st.file_uploader("Upload document", accept_multiple_files=False, type=["pdf"])
        
        # Allow user to upload CSV file
        csv_file = st.file_uploader("Upload csv file for Results related numericals", accept_multiple_files=False, type=["csv"])
        
        # Step 2: User Inputs
        st.subheader("Step 2: Define the tone and Grade of PLS")
        # Set default values for radio button and slider
        default_tense = "completed"
        default_pls_grade = "Moderate"

        # Radio button for tense selection
        tense = st.radio("Current status of the study", options=["on-going", "completed", "upcoming"], key="tense", index=["on-going", "completed", "upcoming"].index(default_tense))
        st.write('<style>div.row-widget.stRadio > div{flex-direction:row;}</style>', unsafe_allow_html=True)
        
        # Slider for PLS grade selection
        #pls_grade = st.slider("Health Literacy Grade Reading level", min_value=0, max_value=10, step=5, key="pls_grade", value=default_pls_grade)
        pls_grade = st.select_slider("Health Literacy Grade Reading level", options=["Low", "Moderate", "High"], key="pls_grade", value = default_pls_grade)
        
        
        process_button = st.button("Process Documents")
        
    if process_button:
        if uploaded_file and csv_file:
            # Retrieve user inputs if you haven't initialized them to any variable, then retrieve from streamlit session state
            # st.session_state.tense = tense
            # st.session_state.pls_grade = pls_grade
            
            col1, col2 = st.columns([1,2])
            with col1:
                input_file = save_uploadedfile(uploaded_file)
                pdf_file = os.path.join(datadir, uploaded_file.name) #rootdir + "/" + uploaded_file.name
                pdf_view = displayPDF(pdf_file)
                
                df = pd.read_csv(csv_file)
                st.write(df)
                
            with col2:
                with st.spinner(text='Processing trial doc...⏳'):
            
                    # Call the processing function on the uploaded documents with user inputs
                    replacements = process_documents(pdf_file, tense, pls_grade)
                    st.success("Processed Output")
                    
                    #Display processed output
                    st.write(replacements)
                    #st.dataframe(replacements)
                    
#                     output_text = st.empty()  # Create an empty element for real-time updates

#                     # Display the processed output character by character
#                     for section_name, text in replacements.items():
#                         output_text.markdown(f"{section_name}: ", unsafe_allow_html=True)
#                         full_text = ""
#                         for char in text:
#                             full_text += char
#                             time.sleep(0.001)  # Adjust the sleep time to control the streaming speed
#                             output_text.markdown(full_text, unsafe_allow_html=True)
                            
                    # output_text.markdown("\n", unsafe_allow_html=True)
            
            # Store the replacements dictionary in session state
            st.session_state.replacements = replacements

    # Step 3: PPT Template Selection and Download
    st.subheader("Step 3: Select PLS PPT Template and Download")
    
    # Add radio buttons for template selection here    
    default_template = "PLS_PPT_Template"
    #selected_template = st.radio("Select PPT Template", options=["PLS_PPT_Template", "Template2", "Template3"], index=["PLS_PPT_Template", "Template2", "Template3"].index(default_template))
    #st.write('<style>div.row-widget.stRadio > div{flex-direction:row;}</style>', unsafe_allow_html=True)
    selected_template = pills("", ["PLS_PPT_Template", "PLS_Red_PPT_Template", "Table_Chart_PPT_Template"], ["🍀", "🎈", "🌈"])
    
    generate_ppt_button = st.button("Generate PLS")

    if generate_ppt_button:
        # Retrieve the replacements dictionary from session state
        replacements = st.session_state.replacements

        if replacements:
            with st.spinner('Generating awesome slides for you...⏳'):
                # Call the postprocessing function to generate PPT content
                ppt_content = postprocess_to_ppt(replacements, selected_template)

                # Display the PPT content using st.markdown or st.write
                #st.markdown(ppt_content, unsafe_allow_html=True)
                st.markdown(list(replacements.keys()))

                # Store the PPT content in session state
                st.session_state.ppt_content = ppt_content
            
            
                 # Step 4: PPT Download
                if "ppt_content" in st.session_state:
                    ppt_content = st.session_state.ppt_content
                    #replacements = st.session_state.replacements
                    
                    # Save the modified presentation object to a temporary file
                    #ppt_output_file = f"PLS_{replacements['<Title>']}_{datetime.now().strftime('%Y%m%d%H%M%S')}.pptx"                    
                    ppt_output_file = "PLS_output.pptx"
                    #ppt_content.save(ppt_output_file)

                    # save presentation as binary output
                    binary_output = BytesIO()
                    ppt_content.save(binary_output)

                    # display success message and download button
                    st.success(':tada: The PLS template has been filled with above sections in ' + selected_template)

                    # Provide the download link for the generated PPT
                    st.download_button("Download PLS", data=binary_output.getvalue(), file_name=ppt_output_file, mime="application/vnd.openxmlformats-officedocument.presentationml.presentation")

if __name__ == "__main__":
    main()


Overwriting app9.py


In [None]:
###############################yet to add authenticator page and streaming text###############################################

In [None]:
##########################adding one word answers and doc download#############################################

In [15]:
%%writefile app8.py
#enhancing - baselined version with QnA feature added. dont touch --Pfizer version
import os
import urllib
import base64
import json
import streamlit as st
from pptx import Presentation
from pptx.util import Inches
from pptx.dml.color import RGBColor
from docx import Document
from docx.shared import Pt
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
from datetime import datetime  # Import the 'datetime' class from the 'datetime' module
import time
from io import BytesIO
from PIL import Image
from shakti_stream_index import llama_vector_index
from streamlit_pills import pills
import streamlit_authenticator as stauth
from streamlit_option_menu import option_menu
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI
from langchain.callbacks import get_openai_callback


rootdir = "/home/cdsw/experimentation_project1/PLS_project"
datadir = "/home/cdsw/experimentation_project1/PLS_project/data"

#function to set background image
def set_bg_hack(main_bg):
    '''
    A function to unpack an image from root folder and set as bg.
 
    Returns
    -------
    The background.
    '''
    # set bg name
    main_bg_ext = "jpg"
        
    st.markdown(
         f"""
         <style>
         .stApp {{
             background: url(data:image/{main_bg_ext};base64,{base64.b64encode(open(main_bg, "rb").read()).decode()});
             background-size: cover
         }}
         </style>
         """,
         unsafe_allow_html=True
     )

def sidebar_bg(side_bg):

   side_bg_ext = 'png'

   st.markdown(
      f"""
      <style>
      [data-testid="stSidebar"] > div:first-child {{
          background: url(data:image/{side_bg_ext};base64,{base64.b64encode(open(side_bg, "rb").read()).decode()});
      }}
      </style>
      """,
      unsafe_allow_html=True,
      )
    
def header_bg(side_bg):

   side_bg_ext = 'png'

   st.markdown(
      f"""
      <style>
      header.css-1avcm0n {{
          background: url(data:image/{side_bg_ext};base64,{base64.b64encode(open(side_bg, "rb").read()).decode()});
      }}
      </style>
      """,
      unsafe_allow_html=True,
      )
    
#function to read prompt from corresponding text file
def prompt(file):
    with open(file) as f:
        return f.read()
    
#function to save a file
def save_uploadedfile(uploaded_file):
     with open(os.path.join(datadir, uploaded_file.name),"wb") as f:
         f.write(uploaded_file.getbuffer())
     return st.success(f"""Saved File:{uploaded_file.name} to directory""")

@st.cache_data
#function to display the PDF of a given file 
def displayPDF(file):
    # Opening file from file path
    with open(file, "rb") as f:
        base64_pdf = base64.b64encode(f.read()).decode('utf-8')

    # Embedding PDF in HTML
    pdf_display = F'<iframe src="data:application/pdf;base64,{base64_pdf}" width="300" height="1100" type="application/pdf"></iframe>'

    # Displaying File
    st.markdown(pdf_display, unsafe_allow_html=True)

# Placeholder function for processing the uploaded documents
def process_documents(uploaded_file, tense, pls_grade):
    # Implement the document processing logic here

    '''
        replacements = {
            "<Title>": GPTAPIcall(uploaded_file, "'Title'", tense, pls_grade),
        #     "<Subtitle>": GPTAPIcall(uploaded_file, "subtitle", tense, pls_grade),
            "<Introduction>": GPTAPIcall(uploaded_file, "'Introduction'", tense, pls_grade),
        #     "<Phonetics>": GPTAPIcall(uploaded_file, "phonetics", tense, pls_grade),
        #    "<Key takeaway>": GPTAPIcall(uploaded_file, "'Key Takeaway'", tense, pls_grade),
        #    "<Results>": GPTAPIcall(uploaded_file, "'Results'", tense, pls_grade)
        #     "<Intro summary>": GPTAPIcall(uploaded_file, "conclusion", tense, pls_grade)
        }
    '''
    replacements = {
        "<Title>": "",
        "<Subtitle>": "",
        "<Key takeaway>": "",
        "<Phonetics>": "",
        "<Introduction>": "",
        "<Intro summary>": "",
        "<Inclusion criteria>": "",
        "<Exclusion crtieria>": "",
        "<Results>": "",
        "<Aims>": "",
        "<Conclusions>": "",
        "<Sponsor>": "",
        "<More Information>": "",
    }
        
    # Get the text for each section using GPTAPIcall function
    for section_name in replacements:
        
        st.subheader(f""":red[{section_name[1:-1]} :]""")
        text = GPTAPIcall(uploaded_file, section_name, tense, pls_grade)
        replacements[section_name] = str(text)
        
    replacements = {**replacements, 
                    "<Participants>": "274",
                    "<Disease condition>": "Sickle cell disease",
                    "<Demographics>": "Aged 12 to 65 years",
                    "<treatment arm>": "182",
                    "<control arm>": "92",
                    "<Study number>": "NCT03036813",
                    "<Start date>": "April 2018",
                    "<End date>": "April 2021",
                    "<clinical trials gov link>": "https://clinicaltrials.gov/ct2/show/NCT03036813",
                    "<Summary date>": datetime.now().strftime('%d-%b-%Y'),
                   }
    
    return replacements

# Placeholder function for GPT API call
def GPTAPIcall(uploaded_file, key, tense, pls_grade):
    # Placeholder logic to generate values based on the key, tense, and PLS grade
    # Replace this with your actual GPT API call or any other processing logic
    # Convert tense strings
    tense_mapping = {"on-going": "present", "completed": "past", "upcoming": "future"}
    tense = tense_mapping.get(tense, tense)

    # Placeholder value for the key with the tense and PLS grade
    #value = f"Placeholder value for {key} (Tense: {tense}, PLS Grade: {pls_grade})"
    query = f"Strictly following the above instructions and the clinical trial document provided, write the content of {key} section of the APLS in {tense}, comprehendable by a {pls_grade} health literacy grade person. Do not violate the section-wise instructions provided in any case. The content should be strictly inferred from the clinical trial document provided only and not any other sources."
    
    return llama_vector_index(uploaded_file, prompt(os.path.join(rootdir, 'apls_persona_2606.txt')) + "\n" + query)

# Placeholder function for postprocessing into PPT template
def postprocess_to_ppt(replacements, selected_template):
    # Implement the postprocessing logic here
    # For demonstration purposes, we'll load a presentation object and copy the text from replacements dictionary
    
    #rootdir = os.path.realpath('./')
    
    #selected_template = "PLS_PPT_Template"
    ppt_file = f"{selected_template}.pptx"
    prs = Presentation(os.path.join(rootdir, ppt_file))

    for slide in prs.slides:
        for shape in slide.shapes:
            if shape.has_text_frame:
                text_frame = shape.text_frame
                for paragraph in text_frame.paragraphs:
                    for run in paragraph.runs:
                        for placeholder, new_text in replacements.items():
                            if run.text == placeholder:
                                # Preserve formatting of the first run in the paragraph
                                first_run = paragraph.runs[0]
                                font_size = first_run.font.size
                                font_name = first_run.font.name
                                font_bold = first_run.font.bold
                                font_italic = first_run.font.italic

                                # Check if font color is explicitly defined
                                if first_run.font.color.type == "rgb":
                                    font_color = first_run.font.color.rgb
                                else:
                                    font_color = None

                                # Replace text while preserving formatting
                                run.text = new_text

                                # Apply preserved formatting to the entire paragraph
                                for run in paragraph.runs:
                                    run.font.size = font_size
                                    run.font.name = font_name
                                    run.font.bold = font_bold
                                    run.font.italic = font_italic
                                    if font_color:
                                        run.font.color.rgb = font_color

    # Return the modified presentation object
    return prs


# Placeholder function for postprocessing into DOC template
def postprocess_to_doc(replacements):
    # Create a new document
    document = Document()

    # Set the font size of the document
    style = document.styles['Normal']
    font = style.font
    font.size = Pt(11)

    # Set the title
    title = replacements.get("Title")
    if title:
        document.add_heading(title, level=1).bold = True

    # Add sections and paragraphs
    section_count = 0
    for key, value in replacements.items():
        if key != "Title":
            section_count += 1
            if section_count <= 12:
                document.add_heading(key, level=1)
                if value:
                    document.add_paragraph(value)

    # Add the table
    table_replacements = {k: v for k, v in replacements.items() if k != "Title" and k not in list(replacements.keys())[1:13]}
    if table_replacements:
        table_heading = "Additional Information"
        document.add_heading(table_heading, level=1)

        # Create the table
        table = document.add_table(rows=1, cols=2)
        table.style = 'Table Grid'
        
        # Set table column widths
        table.autofit = False
        table.columns[0].width = Pt(200)
        table.columns[1].width = Pt(300)

        # Add table headers
        table_header_cells = table.rows[0].cells
        table_header_cells[0].text = "Variable"
        table_header_cells[1].text = "Value"
        for cell in table_header_cells:
            cell.paragraphs[0].alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
            cell.paragraphs[0].bold = True

        # Add table rows
        for key, value in table_replacements.items():
            row_cells = table.add_row().cells
            row_cells[0].text = key
            row_cells[1].text = value

    return document
    
    
def main():
    
    #Page icons n tab name on browser tab
    #img = Image.open(os.path.join(rootdir, 'pfizer.png'))
    st.set_page_config(page_title = 'MAIA', page_icon = ":robot_face:", layout="wide")
    
    hide_default_format = """
       <style>
       #MainMenu {visibility: hidden; }
       footer {visibility: hidden;}
       </style>
       """
    st.markdown(hide_default_format, unsafe_allow_html=True)
    
    names = ["admin","shakti"]
    usernames = ["adm", "shrp"]
    passwords = ["abc123", "def456"]

    credentials = {"usernames":{}}
    hashed_passwords = stauth.Hasher(passwords).generate()
    
    for uname, name, pwd in zip(usernames, names, hashed_passwords):
        user_dict = {"name": name, "password": pwd}
        credentials["usernames"].update({uname: user_dict})

    
    #add a cookie which will be stored on client browser to save credentials till 30days
    authenticator = stauth.Authenticate(credentials, "pls_generator", "abcdef", cookie_expiry_days = 30)

    #u can locate the authenticator in the main body or the sidebar
    name, authentication_status, username = authenticator.login("Login", "main")
    
    if st.session_state["authentication_status"] == False:
        st.error("Username/password is incorrect")
        
    if st.session_state["authentication_status"] == None:
        st.warning("Please enter your username and password")
        
    if st.session_state["authentication_status"]:
        
        #logout button on main container
        authenticator.logout('Logout', 'main')
        st.subheader(f'Welcome *{st.session_state["name"]}*')
        
        #set bg image cover
        set_bg_hack(os.path.join(rootdir, 'iqvia-bg.jpg'))
        sidebar_bg(os.path.join(rootdir, 'iqvia-blue.png'))
        header_bg(os.path.join(rootdir, 'iqvia-dark-blue.png'))

        #setting banner image
        st.image(Image.open(os.path.join(rootdir, 'Pfizer-AI.jpg')))
        
        selected_tab = option_menu(
            menu_title=None,  # required
            options=["PLS Generator", "RCT QnA", "RCT Chatbot"],  # required
            icons=["house", "book", "envelope"],  # optional
            menu_icon="cast",  # optional
            default_index=0,  # optional
            orientation="horizontal",
            # styles={
            #     "container": {"padding": "0!important"},
            #     "icon": {"color": "orange", "font-size": "25px"},
            #     "nav-link": {
            #         "font-size": "25px",
            #         "text-align": "left",
            #         "margin": "0px",
            #         "--hover-color": "#eee",
            #     },
            #     "nav-link-selected": {"background-color": "green"},
            # },
        )
        
        #setting input components on sidebar
        with st.sidebar:

            st.image(Image.open(os.path.join(rootdir, 'iqvia-logo.png')))
            #setting title
            st.markdown("""<h3 style='text-align: center'>MAIA - Medical Affairs Intelligence Assistant</h3>""", unsafe_allow_html=True)

            # Step 1: Document Upload
            st.subheader("Step 1: Upload Clinical trial document")
            uploaded_file = st.file_uploader("Upload document", accept_multiple_files=False, type=["pdf"])

            # Step 2: User Inputs
            st.subheader("Step 2: Define the tone and Grade of PLS")
            # Set default values for radio button and slider
            default_tense = "Completed"
            default_pls_grade = "Low"

            # Radio button for tense selection
            tense = st.radio("Current status of the study for writing tense", options=["On-going", "Completed", "Upcoming"], key="tense", index=["On-going", "Completed", "Upcoming"].index(default_tense), horizontal=True)
            #st.write('<style>div.row-widget.stRadio > div{flex-direction:row;}</style>', unsafe_allow_html=True)

            # Slider for PLS grade selection
            #pls_grade = st.slider("Health Literacy Grade Reading level", min_value=0, max_value=10, step=5, key="pls_grade", value=default_pls_grade)
            pls_grade = st.select_slider("Health Literacy Grade of audience", options=["Low", "High"], key="pls_grade", value = default_pls_grade)

            st.session_state.process_button = False
            process_button = st.button("Process Documents")
            st.session_state.process_button = process_button
            st.session_state.uploaded_file = uploaded_file
            st.session_state.selected_tab = selected_tab
        
        #if st.session_state.process_button and st.session_state.uploaded_file:
        #if process_button and uploaded_file:
            
            # Retrieve user inputs if you haven't initialized them to any variable, then retrieve from streamlit session state
            # st.session_state.tense = tense
            # st.session_state.pls_grade = pls_grade
            
        if st.session_state.selected_tab == "RCT QnA":
            st.subheader("Ask your PDF 💬")
            # show user input
            user_question = st.text_input("Ask a question about your PDF:")
            
            if st.session_state.uploaded_file:              
                
                # extract the text
                if uploaded_file is not None:
                  pdf_reader = PdfReader(uploaded_file)
                  text = ""
                  for page in pdf_reader.pages:
                    text += page.extract_text()

                  # split into chunks
                  text_splitter = CharacterTextSplitter(
                    separator="\n",
                    chunk_size=1000,
                    chunk_overlap=200,
                    length_function=len
                  )
                  chunks = text_splitter.split_text(text)

                  # create embeddings
                  embeddings = OpenAIEmbeddings()
                  knowledge_base = FAISS.from_texts(chunks, embeddings)

                  if user_question:
                    docs = knowledge_base.similarity_search(user_question)

                    llm = OpenAI()
                    chain = load_qa_chain(llm, chain_type="stuff")
                    with get_openai_callback() as cb:
                      response = chain.run(input_documents=docs, question=user_question)
                      print(cb)

                    st.write(response)    
            
        if st.session_state.selected_tab == "RCT Chatbot":    
            st.subheader(f"You have selected {selected_tab}")

        if st.session_state.selected_tab == "PLS Generator":
            if st.session_state.process_button and st.session_state.uploaded_file:
                col1, col2 = st.columns([0.2,0.8], gap="large")
                with col1:
                    input_file = save_uploadedfile(uploaded_file)
                    pdf_file = os.path.join(datadir, uploaded_file.name) #rootdir + "/" + uploaded_file.name
                    pdf_view = displayPDF(pdf_file)
                with col2:
                    with st.spinner(text='Processing research document you gave on the left to generate Plain Language Summary for you...⏳'):

                        # Call the processing function on the uploaded documents with user inputs
                        replacements = process_documents(pdf_file, tense, pls_grade)
                        st.success("Processed Output to be filled up in the preferred PLS template")

                        #Display processed output
                        #st.write(replacements)

                # Store the replacements dictionary in session state
                st.session_state.replacements = replacements

            # Step 3: PPT Template Selection and Download
            st.subheader("Step 3: Select PLS Template and Download")

            # Add radio buttons for template selection here    
            default_template = "Pfizer_Blue_PLS_Template"
            selected_template = st.radio("Select PPT Template", options=["Pfizer_Blue_PLS_Template", "Pfizer_Red_PLS_Template", "Pfizer_Long_PLS_Template"], index=["Pfizer_Blue_PLS_Template", "Pfizer_Red_PLS_Template", "Pfizer_Long_PLS_Template"].index(default_template), horizontal=True)
            #st.write('<style>div.row-widget.stRadio > div{flex-direction:row;}</style>', unsafe_allow_html=True)
            #selected_template = pills("", ["Pfizer_Blue_PLS_Template", "Pfizer_Red_PLS_Template", "Pfizer_Long_PLS_Template"], ["🍀", "🎈", "🌈"])

            generate_ppt_button = st.button("Generate PLS")

            if generate_ppt_button:
                # Retrieve the replacements dictionary from session state
                replacements = st.session_state.replacements
                st.session_state.process_button = False

                if replacements:
                    with st.spinner('Generating PLS slides for you...⏳'):
                        # Call the postprocessing function to generate PPT content
                        ppt_content = postprocess_to_ppt(replacements, selected_template)

                        doc_content = postprocess_to_doc(replacements)

                        # Display the PPT content using st.markdown or st.write
                        #st.markdown(ppt_content, unsafe_allow_html=True)
                        st.markdown(list(replacements.keys()))

                        # Store the PPT content in session state
                        st.session_state.ppt_content = ppt_content
                        st.session_state.doc_content = doc_content

                         # Step 4: PPT Download
                        if "ppt_content" and "doc_content" in st.session_state:
                            ppt_content = st.session_state.ppt_content
                            doc_content = st.session_state.doc_content

                            st.session_state.replacements = replacements
                            st.session_state.process_button = False

                            # Save the modified presentation object to a temporary file
                            #ppt_output_file = f"PLS_{replacements['<Title>']}_{datetime.now().strftime('%Y%m%d%H%M%S')}.pptx"                    
                            ppt_output_file = "PLS_PPT.pptx"
                            #ppt_content.save(ppt_output_file)

                            # save presentation as binary output
                            binary_output = BytesIO()
                            ppt_content.save(binary_output)

                            binary_output_doc = BytesIO()
                            doc_content.save(binary_output_doc)

                            # display success message and download button
                            st.success(':tada: The PLS template has been filled with above sections in ' + selected_template)

                            # Provide the download link for the generated PPT
                            st.download_button("Download PLS PPT", data=binary_output.getvalue(), file_name=ppt_output_file, mime="application/vnd.openxmlformats-officedocument.presentationml.presentation")
                            st.download_button("Download PLS Doc", data=binary_output_doc.getvalue(), file_name="PLS_DOC.docx", mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document")
                    
if __name__ == "__main__":
    main()


Overwriting app8.py


In [1]:
%%writefile app9.py
#enhancing - anonymized version for chandresh, removed pfizer banner and changed bg color; hidden the header running and footer;
#anonymized the PPT generation buttons -  added image selector --add ctgov api call for some fields with streaming --baselined 7/26
import os
import urllib
import urllib.request
import base64
import json
import streamlit as st
from pptx import Presentation
from pptx.util import Inches
from pptx.dml.color import RGBColor
from docx import Document
from docx.shared import Pt
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
from datetime import datetime  # Import the 'datetime' class from the 'datetime' module
import time
from io import BytesIO
from PIL import Image
from shakti_stream_index import llama_vector_index
from streamlit_pills import pills
import streamlit_authenticator as stauth
from streamlit_option_menu import option_menu
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI
from langchain.callbacks.base import BaseCallbackHandler
from langchain.callbacks import get_openai_callback
from langchain.agents import create_json_agent, AgentExecutor
from langchain.agents.agent_toolkits import JsonToolkit
from langchain.chains import LLMChain
from langchain.chat_models import ChatOpenAI
from langchain.requests import TextRequestsWrapper
from langchain.tools.json.tool import JsonSpec
from streamlit_image_select import image_select
from wordcloud import WordCloud
import matplotlib.pyplot as plt
from time import sleep
from stqdm import stqdm
import pickle

rootdir = "/home/cdsw/experimentation_project1/PLS_project"
datadir = "/home/cdsw/experimentation_project1/PLS_project/data"

# def progress_bar_method(secs):
#     # Code for your second asynchronous method goes here
#     for i in stqdm(range(secs), backend=True, frontend=True):
#         sleep(0.5)

class StreamHandler(BaseCallbackHandler):
    def __init__(self, container, initial_text="", display_method='markdown'):
        self.container = container
        self.text = initial_text
        self.display_method = display_method

    def on_llm_new_token(self, token: str, **kwargs) -> None:
        self.text += token + "/"
        display_function = getattr(self.container, self.display_method, None)
        if display_function is not None:
            display_function(self.text)
        else:
            raise ValueError(f"Invalid display_method: {self.display_method}")

#function to set background image
def set_bg_hack(main_bg):
    '''
    A function to unpack an image from root folder and set as bg.
 
    Returns
    -------
    The background.
    '''
    # set bg name
    main_bg_ext = "png"
        
    st.markdown(
         f"""
         <style>
         .stApp {{
             background: url(data:image/{main_bg_ext};base64,{base64.b64encode(open(main_bg, "rb").read()).decode()});
             background-size: cover
         }}
         </style>
         """,
         unsafe_allow_html=True
     )

def sidebar_bg(side_bg):

   side_bg_ext = 'png'

   st.markdown(
      f"""
      <style>
      [data-testid="stSidebar"] > div:first-child {{
          background: url(data:image/{side_bg_ext};base64,{base64.b64encode(open(side_bg, "rb").read()).decode()});
      }}
      </style>
      """,
      unsafe_allow_html=True,
      )
    
def header_bg(side_bg):

   side_bg_ext = 'png'

   st.markdown(
      f"""
      <style>
      header.css-1avcm0n {{
          background: url(data:image/{side_bg_ext};base64,{base64.b64encode(open(side_bg, "rb").read()).decode()});
      }}
      </style>
      """,
      unsafe_allow_html=True,
      )
    
#function to read prompt from corresponding text file
def prompt(file):
    with open(file) as f:
        return f.read()
    
#function to save a file
def save_uploadedfile(uploaded_file):
     with open(os.path.join(datadir, uploaded_file.name),"wb") as f:
         f.write(uploaded_file.getbuffer())
     return st.success(f"""Saved File:{uploaded_file.name} to directory""")

@st.cache_data
#function to display the PDF of a given file 
def displayPDF(file):
    # Opening file from file path
    with open(file, "rb") as f:
        base64_pdf = base64.b64encode(f.read()).decode('utf-8')

    # Embedding PDF in HTML
    pdf_display = F'<iframe src="data:application/pdf;base64,{base64_pdf}" width="300" height="1100" type="application/pdf"></iframe>'

    # Displaying File
    st.markdown(pdf_display, unsafe_allow_html=True)

# Placeholder function for processing the uploaded documents
def process_documents(NCT, uploaded_file, tense, pls_grade):
    # Implement the document processing logic here

    '''
        replacements = {
            "<Title>": GPTAPIcall(uploaded_file, "'Title'", tense, pls_grade),
        #     "<Subtitle>": GPTAPIcall(uploaded_file, "subtitle", tense, pls_grade),
            "<Introduction>": GPTAPIcall(uploaded_file, "'Introduction'", tense, pls_grade),
        #     "<Phonetics>": GPTAPIcall(uploaded_file, "phonetics", tense, pls_grade),
        #    "<Key takeaway>": GPTAPIcall(uploaded_file, "'Key Takeaway'", tense, pls_grade),
        #    "<Results>": GPTAPIcall(uploaded_file, "'Results'", tense, pls_grade)
        #     "<Intro summary>": GPTAPIcall(uploaded_file, "conclusion", tense, pls_grade)
        }
    '''
    summary_replacements = {
        "<Title>": "", #prompt(os.path.join(rootdir, 'title.txt'))
        "<Subtitle>": "", #prompt(os.path.join(rootdir, 'subtitle.txt'))
        "<Key takeaway>": "",
        "<Phonetics>": "",
        "<Introduction>": "",
        "<Intro summary>": "",
        # "<Inclusion criteria>": "",
        # "<Exclusion crtieria>": "",
        # "<Results>": "",
        "<Aims>": "",
        "<Conclusions>": "",
        # "<Sponsor>": "",
        # "<More Information>": "",
    }
        
    # Get the text for each section using GPTAPIcall function
    for section_name in summary_replacements:
        
        st.subheader(f""":red[{section_name[1:-1]} :]""")
        text = GPTAPIcall(uploaded_file, section_name, tense, pls_grade)
        summary_replacements[section_name] = str(text)
        
    ctgov_replacements = {
                    "<Start date>": "Answer the Study Start date in ```MMM-YYYY``` format",
                    "<End date>": "Answer the Study End date in ```MMM-YYYY``` format",
                    "<Participants>": "Total number of Participants in the study including drug arms, placebo arm, soc arm. Give one number answer",
                    "<Arms count>": "Number of arms in the study including the drug arms, placebo arm, soc arm. Give one number answer",
                    "<Disease condition>": "What is the disease condition for which drug is undergoing trials on patients in the study. Give answer as one disease",
                    "<Demographics>": "What are the Demographics of participants in the study",
                    "<treatment arm>": "Number of participants only in the drug arms of the study, do not count the participants from placebo arm or soc arm. Give one number answer",
                    "<control arm>": "Number of participants in the placebo arm or soc arm. Give one number answer",
                    "<Inclusion criteria>": "Inclusion criteria in EligibilityCriteria",
                    "<Exclusion criteria>": "Exclusion criteria in EligibilityCriteria",
                    "<Results>": "list all outcome measure results in bullets interms of outcome measure type, outcome measure title, outcome measure description, outcome measure value",
                    # "<clinical trials gov link>": "https://clinicaltrials.gov/ct2/show/NCT03036813",
                    # "<Summary date>": datetime.now().strftime('%d-%b-%Y'),
                    "<Sponsor>": "Lead Sponsor Name",
                   }
    
    for section_name, query in ctgov_replacements.items():
        
        st.subheader(f""":red[{section_name[1:-1]} :]""")
        text = CTGovAPIcall(NCT, query)
        ctgov_replacements[section_name] = str(text)
    
    
    replacements = {**summary_replacements, 
                    **ctgov_replacements, 
                    "<Study number>": f"{NCT}",
                    "<clinical trials gov link>": f"https://clinicaltrials.gov/ct2/show/{NCT}",
                    "<Summary date>": datetime.now().strftime('%d-%b-%Y'),
                   }
    
    return replacements

def CTGovAPIcall(NCT, query):
    file_format = '&fmt=JSON'
    expr = NCT #'A+Phase+3+Randomized+Trial+of+Voxelotor+in+Sickle+Cell+Disease' #or give NCT number here NCT03036813
    ctgov = 'https://classic.clinicaltrials.gov/api/query/full_studies?expr='

    your_url = (ctgov + expr + file_format)

    with urllib.request.urlopen(your_url) as url:
        ini_dict = json.loads(url.read().decode())
        
    json_spec = JsonSpec(dict_=ini_dict["FullStudiesResponse"]["FullStudies"][0]["Study"], max_value_length=31000)
    json_toolkit = JsonToolkit(spec=json_spec)
    
    chat_box = st.empty()
    stream_handler = StreamHandler(chat_box, display_method='write')
    
    json_agent_executor = create_json_agent(
        llm=ChatOpenAI(temperature=0, model_name="gpt-4-32k", streaming=True, callbacks=[stream_handler],), toolkit=json_toolkit, verbose=True
    )
    resp = json_agent_executor.run(query)
    st.write(resp)
    return resp

# Placeholder function for GPT API call
def GPTAPIcall(uploaded_file, key, tense, pls_grade):
    # Placeholder logic to generate values based on the key, tense, and PLS grade
    # Replace this with your actual GPT API call or any other processing logic
    # Convert tense strings
    tense_mapping = {"on-going": "present", "completed": "past", "upcoming": "future"}
    tense = tense_mapping.get(tense, tense)

    # Placeholder value for the key with the tense and PLS grade
    #value = f"Placeholder value for {key} (Tense: {tense}, PLS Grade: {pls_grade})"
    query = f"Strictly following the above instructions and the clinical trial document provided, write the content of {key} section of the APLS in {tense}, comprehendable by a {pls_grade} health literacy grade person. Do not violate the section-wise instructions provided in any case. The content should be strictly inferred from the clinical trial document provided only and not any other sources."
    
    return llama_vector_index(uploaded_file, prompt(os.path.join(rootdir, 'apls_persona_2606.txt')) + "\n" + query) #f"{key}.txt + instr

# Placeholder function for postprocessing into PPT template
def postprocess_to_ppt(replacements, selected_template):
    # Implement the postprocessing logic here
    # For demonstration purposes, we'll load a presentation object and copy the text from replacements dictionary
    
    #rootdir = os.path.realpath('./')
    
    #selected_template = "PLS_PPT_Template"
    ppt_file = f"{selected_template}.pptx"
    prs = Presentation(os.path.join(rootdir, ppt_file))

    for slide in prs.slides:
        for shape in slide.shapes:
            if shape.has_text_frame:
                text_frame = shape.text_frame
                for paragraph in text_frame.paragraphs:
                    for run in paragraph.runs:
                        for placeholder, new_text in replacements.items():
                            if run.text == placeholder:
                                # Preserve formatting of the first run in the paragraph
                                first_run = paragraph.runs[0]
                                font_size = first_run.font.size
                                font_name = first_run.font.name
                                font_bold = first_run.font.bold
                                font_italic = first_run.font.italic

                                # Check if font color is explicitly defined
                                if first_run.font.color.type == "rgb":
                                    font_color = first_run.font.color.rgb
                                else:
                                    font_color = None

                                # Replace text while preserving formatting
                                run.text = new_text

                                # Apply preserved formatting to the entire paragraph
                                for run in paragraph.runs:
                                    run.font.size = font_size
                                    run.font.name = font_name
                                    run.font.bold = font_bold
                                    run.font.italic = font_italic
                                    if font_color:
                                        run.font.color.rgb = font_color

    # Return the modified presentation object
    return prs


# Placeholder function for postprocessing into DOC template
def postprocess_to_doc(replacements):
    # Create a new document
    document = Document()

    # Set the font size of the document
    style = document.styles['Normal']
    font = style.font
    font.size = Pt(11)

    # Set the title
    title = replacements.get("Title")
    if title:
        document.add_heading(title, level=1).bold = True

    # Add sections and paragraphs
    section_count = 0
    for key, value in replacements.items():
        if key != "Title":
            section_count += 1
            if section_count <= 12:
                document.add_heading(key, level=1)
                if value:
                    document.add_paragraph(value)

    # Add the table
    table_replacements = {k: v for k, v in replacements.items() if k != "Title" and k not in list(replacements.keys())[1:13]}
    if table_replacements:
        table_heading = "Additional Information"
        document.add_heading(table_heading, level=1)

        # Create the table
        table = document.add_table(rows=1, cols=2)
        table.style = 'Table Grid'
        
        # Set table column widths
        table.autofit = False
        table.columns[0].width = Pt(200)
        table.columns[1].width = Pt(300)

        # Add table headers
        table_header_cells = table.rows[0].cells
        table_header_cells[0].text = "Variable"
        table_header_cells[1].text = "Value"
        for cell in table_header_cells:
            cell.paragraphs[0].alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
            cell.paragraphs[0].bold = True

        # Add table rows
        for key, value in table_replacements.items():
            row_cells = table.add_row().cells
            row_cells[0].text = key
            row_cells[1].text = value

    return document
    
    
def main():
    
    #Page icons n tab name on browser tab
    #img = Image.open(os.path.join(rootdir, 'pfizer.png'))
    st.set_page_config(page_title = 'MAIA', page_icon = ":robot_face:", layout="wide")
    
    #to hide the hamburger running on top right and footer of streamlit
    hide_default_format = """
       <style>
       #MainMenu {visibility: hidden; }
       footer {visibility: hidden;}
       </style>
       """
    st.markdown(hide_default_format, unsafe_allow_html=True)
    
    names = ["admin","shakti"]
    usernames = ["adm", "shrp"]
    passwords = ["abc123", "def456"]

    credentials = {"usernames":{}}
    hashed_passwords = stauth.Hasher(passwords).generate()
    
    for uname, name, pwd in zip(usernames, names, hashed_passwords):
        user_dict = {"name": name, "password": pwd}
        credentials["usernames"].update({uname: user_dict})

    
    #add a cookie which will be stored on client browser to save credentials till 30days
    authenticator = stauth.Authenticate(credentials, "pls_generator", "abcdef", cookie_expiry_days = 30)

    #u can locate the authenticator in the main body or the sidebar
    name, authentication_status, username = authenticator.login("Login", "main")
    
    if st.session_state["authentication_status"] == False:
        st.error("Username/password is incorrect")
        
    if st.session_state["authentication_status"] == None:
        st.warning("Please enter your username and password")
        
    if st.session_state["authentication_status"]:
        
        #logout button on main container
        authenticator.logout('Logout', 'main')
        st.subheader(f'Welcome *{st.session_state["name"]}*')
        
        #set bg image cover
        #set_bg_hack(os.path.join(rootdir, 'iqvia-dark-blue.png'))
        sidebar_bg(os.path.join(rootdir, 'iqvia-blue.png'))
        #header_bg(os.path.join(rootdir, 'iqvia-dark-blue.png'))

        #setting banner image
        #st.image(Image.open(os.path.join(rootdir, 'Pfizer-AI.jpg')))
        
        selected_tab = option_menu(
            menu_title=None,  # required
            options=["PLS Generator", "RCT QnA", "RCT WordCloud"],  # required
            icons=["house", "book", "envelope"],  # optional
            menu_icon="cast",  # optional
            default_index=0,  # optional
            orientation="horizontal",
            # styles={
            #     "container": {"padding": "0!important"},
            #     "icon": {"color": "orange", "font-size": "25px"},
            #     "nav-link": {
            #         "font-size": "25px",
            #         "text-align": "left",
            #         "margin": "0px",
            #         "--hover-color": "#eee",
            #     },
            #     "nav-link-selected": {"background-color": "green"},
            # },
        )
        
        #setting input components on sidebar
        with st.sidebar:

            st.image(Image.open(os.path.join(rootdir, 'iqvia-logo.png')))
            #setting title
            st.markdown("""<h3 style='text-align: center'>*MAIA - Medical Affairs Intelligence Assistant*</h3>""", unsafe_allow_html=True)

            # Step 1: Document Upload
            st.subheader("Step 1: Upload Clinical trial document")
            uploaded_file = st.file_uploader("Upload document", accept_multiple_files=False, type=["pdf"])
            
            NCT = st.text_input("Enter the NCT number:", "NCT", key="NCT")
            
            # Step 2: User Inputs
            st.subheader("Step 2: Define the tone and Grade of PLS")
            # Set default values for radio button and slider
            default_tense = "Completed"
            default_pls_grade = "Low"

            # Radio button for tense selection
            tense = st.radio("Current status of the study for writing tense", options=["On-going", "Completed", "Upcoming"], key="tense", index=["On-going", "Completed", "Upcoming"].index(default_tense), horizontal=True)
            #st.write('<style>div.row-widget.stRadio > div{flex-direction:row;}</style>', unsafe_allow_html=True)

            # Slider for PLS grade selection
            #pls_grade = st.slider("Health Literacy Grade Reading level", min_value=0, max_value=10, step=5, key="pls_grade", value=default_pls_grade)
            pls_grade = st.select_slider("Health Literacy Grade of audience", options=["Low", "High"], key="pls_grade", value = default_pls_grade)

            st.session_state.process_button = False
            process_button = st.button("Process Documents")
            st.session_state.process_button = process_button
            st.session_state.uploaded_file = uploaded_file
            st.session_state.selected_tab = selected_tab
        
        #if st.session_state.process_button and st.session_state.uploaded_file:
        #if process_button and uploaded_file:
            
            # Retrieve user inputs if you haven't initialized them to any variable, then retrieve from streamlit session state
            # st.session_state.tense = tense
            # st.session_state.pls_grade = pls_grade
            
        if st.session_state.selected_tab == "RCT QnA":
            st.subheader("Ask your PDF 💬")
            # show user input
            user_question = st.text_input("Ask a question about your PDF:", placeholder="Number of participants? ", disabled=not uploaded_file,)
            
            if st.session_state.uploaded_file:              
                
                # extract the text
                if uploaded_file is not None:
                  pdf_reader = PdfReader(uploaded_file)
                  text = ""
                  for page in pdf_reader.pages:
                    text += page.extract_text()

                  # split into chunks
                  text_splitter = CharacterTextSplitter(
                    separator="\n",
                    chunk_size=1000,
                    chunk_overlap=200,
                    length_function=len
                  )
                  chunks = text_splitter.split_text(text)
                    
                  # create embeddings
                  store_name = uploaded_file.name[:-4]
                  if os.path.exists(os.path.join(datadir, f"{store_name}.pkl")):
                    with open(os.path.join(datadir, f"{store_name}.pkl"), "rb") as f:
                        knowledge_base = pickle.load(f)
                        st.write('Embeddings loaded from the Disk:')
                  else:
                    embeddings = OpenAIEmbeddings()
                    knowledge_base = FAISS.from_texts(chunks, embeddings)
                    with open(os.path.join(datadir, f"{store_name}.pkl"), "wb") as f:
                        pickle.dump(knowledge_base, f)
                        st.write('Embeddings newly created')

                  if user_question:
                    docs = knowledge_base.similarity_search(user_question, k=3)

                    chat_box = st.empty()
                    stream_handler = StreamHandler(chat_box, display_method='write')
        
                    llm = ChatOpenAI(temperature=0, callbacks=[stream_handler], streaming=True)
                    chain = load_qa_chain(llm, chain_type="stuff")
                    
                    #get_openai_callback() gives the cost on console
                    # with get_openai_callback() as cb:
                    #   response = chain.run(input_documents=docs, question=user_question)
                    #   print(cb)
                    response = chain.run(input_documents=docs, question=user_question)
                    st.write(response)    
            
        if st.session_state.selected_tab == "RCT WordCloud":    
            #st.subheader(f"You have selected {selected_tab}")
            
            # extract the text
            if uploaded_file is not None:
                pdf_reader = PdfReader(uploaded_file)
                text = ""
                for page in pdf_reader.pages:
                    text += page.extract_text()

                # Create and generate a word cloud image:
                wordcloud = WordCloud().generate(text)

                # Display the generated image:
                plt.imshow(wordcloud, interpolation='bilinear')
                plt.axis("off")
                plt.show()
                st.set_option('deprecation.showPyplotGlobalUse', False)
                st.pyplot()
            
        if st.session_state.selected_tab == "PLS Generator":
            if st.session_state.process_button and st.session_state.uploaded_file and st.session_state.NCT!='NCT':
                col1, col2 = st.columns([0.2,0.8], gap="large")
                with col1:
                    input_file = save_uploadedfile(uploaded_file)
                    pdf_file = os.path.join(datadir, uploaded_file.name) #rootdir + "/" + uploaded_file.name
                    pdf_view = displayPDF(pdf_file)
                with col2:
                    with st.spinner(text='Processing research document you gave on the left to generate Plain Language Summary for you...⏳'):

                        # Progress bar
                        #progress_bar_method(50) or st.progress(0, "text")
                        
                        # Call the processing function on the uploaded documents with user inputs
                        replacements = process_documents(NCT, pdf_file, tense, pls_grade)
                        st.success("Processed Output to be filled up in the preferred PLS template")

                        #Display processed output
                        #st.write(replacements)
                        st.snow()
                        st.balloons()
                        
                # Store the replacements dictionary in session state
                st.session_state.replacements = replacements

            # Step 3: PPT Template Selection and Download
            st.subheader("Step 3: Select PLS Template and Download")
            
            default_format = "PPT format"
            st.session_state.select_format = pills("Select PPT or Word format", ["PPT format", "Word format"], ["🎈", "🌈"], index=["PPT format", "Word format"].index(default_format))
            
            if st.session_state.select_format == "PPT format":
                # Add radio buttons for template selection here    
                default_template = "Blue_PLS_Template"
                selected_template = image_select(
                    label="Select PPT Template",
                    images=[
                        os.path.join(rootdir, 'Pfizer_Blue_PLS_Template.png'),
                        os.path.join(rootdir, 'Pfizer_Red_PLS_Template.png'),
                        os.path.join(rootdir, 'Pfizer_Long_PLS_Template.png'),
                    ],
                    captions=["Blue_PLS_Template", "Red_PLS_Template", "Long_PLS_Template"],
                    index=["Blue_PLS_Template", "Red_PLS_Template", "Long_PLS_Template"].index(default_template),
                    use_container_width = False,
                )
                #selected_template = st.radio("Select PPT Template", options=["Blue_PLS_Template", "Red_PLS_Template", "Long_PLS_Template"], index=["Blue_PLS_Template", "Red_PLS_Template", "Long_PLS_Template"].index(default_template), horizontal=True)
                #selected_template = pills("", ["Pfizer_Blue_PLS_Template", "Pfizer_Red_PLS_Template", "Pfizer_Long_PLS_Template"], ["🍀", "🎈", "🌈"])
            
            if st.session_state.select_format == "Word format":
                default_template = "Word_PLS_Template"
                selected_template = "Blue_PLS_Template"
                image_select(
                    label="Select Word Template",
                    images=[
                        os.path.join(rootdir, 'Pfizer_Word_PLS_Template.png'),
                    ],
                    captions=["Word_PLS_Template"],
                    index=["Word_PLS_Template"].index(default_template),
                    use_container_width = False,
                )
            
            generate_ppt_button = st.button("Generate PLS")

            if generate_ppt_button:
                # Retrieve the replacements dictionary from session state
                replacements = st.session_state.replacements
                st.session_state.process_button = False

                if replacements:
                    with st.spinner('Generating PLS slides for you...⏳'):
                        # Call the postprocessing function to generate PPT content
                        ppt_content = postprocess_to_ppt(replacements, selected_template)

                        doc_content = postprocess_to_doc(replacements)

                        # Display the PPT content using st.markdown or st.write
                        #st.markdown(ppt_content, unsafe_allow_html=True)
                        st.markdown(list(replacements.keys()))

                        # Store the PPT content in session state
                        st.session_state.ppt_content = ppt_content
                        st.session_state.doc_content = doc_content

                         # Step 4: PPT Download
                        if "ppt_content" and "doc_content" in st.session_state:
                            ppt_content = st.session_state.ppt_content
                            doc_content = st.session_state.doc_content

                            st.session_state.replacements = replacements
                            st.session_state.process_button = False

                            # Save the modified presentation object to a temporary file
                            #ppt_output_file = f"PLS_{replacements['<Title>']}_{datetime.now().strftime('%Y%m%d%H%M%S')}.pptx"                    
                            ppt_output_file = "PLS_PPT.pptx"
                            #ppt_content.save(ppt_output_file)

                            # save presentation as binary output
                            binary_output = BytesIO()
                            ppt_content.save(binary_output)

                            binary_output_doc = BytesIO()
                            doc_content.save(binary_output_doc)

                            # display success message and download button
                            st.success(':tada: The PLS template has been filled with above sections in ' + selected_template)

                            # Provide the download link for the generated PPT and DOC
                            if st.session_state.select_format == "PPT format":
                                st.download_button("Download PLS PPT", data=binary_output.getvalue(), file_name=ppt_output_file, mime="application/vnd.openxmlformats-officedocument.presentationml.presentation")
                            if st.session_state.select_format == "Word format":
                                st.download_button("Download PLS Doc", data=binary_output_doc.getvalue(), file_name="PLS_DOC.docx", mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document")
                    
if __name__ == "__main__":
    main()


Overwriting app9.py


In [1]:
%%writefile app8.py
#enhancing - working version, adding independent prompts to compare outputs; fixed ppt call error .png, fixed word formatting; 
#compare this file with previous file and incorporate changes in prev file; added regex for participants
#added chatbot on docs and sentiment analyzer; removed stop words and query words in word cloud
#added wordfreq bars also as Word Analytics --- baselining it and creating a separate version below for adding pubmed/ctgov
import os
import re
import urllib
import urllib.request
import base64
import json
import streamlit as st
from pptx import Presentation
from pptx.util import Inches
from pptx.dml.color import RGBColor
from docx import Document
from docx.shared import Pt
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
from datetime import datetime  # Import the 'datetime' class from the 'datetime' module
import time
from io import BytesIO
from PIL import Image
from shakti_stream_index import llama_vector_index
from streamlit_pills import pills
import streamlit_authenticator as stauth
from streamlit_option_menu import option_menu
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI
from langchain.callbacks.base import BaseCallbackHandler
from langchain.callbacks import get_openai_callback
from langchain.agents import create_json_agent, AgentExecutor
from langchain.agents.agent_toolkits import JsonToolkit
from langchain.chains import LLMChain
from langchain.chat_models import ChatOpenAI
from langchain.requests import TextRequestsWrapper
from langchain.tools.json.tool import JsonSpec
from streamlit_image_select import image_select
from streamlit_chat import message
from llama_index import GPTVectorStoreIndex, SimpleDirectoryReader
from llama_index import download_loader, StorageContext, load_index_from_storage
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from wordcloud import WordCloud
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
#load inthe NTLK stopwords to remove articles, preposition and other words that are not actionable
from nltk.corpus import stopwords
# This allows to create individual objects from a bog of words
from nltk.tokenize import word_tokenize
# Lemmatizer helps to reduce words to the base form
from nltk.stem import WordNetLemmatizer
# Ngrams allows to group words in common pairs or trigrams..etc
from nltk import ngrams
# We can use counter to count the objects
from collections import Counter
# This is our visual library
import seaborn as sns
from time import sleep
from stqdm import stqdm
import itertools
import pickle
import glob

saved_path = "/home/cdsw/experimentation_project1/PLS_project/bot_data"
rootdir = "/home/cdsw/experimentation_project1/PLS_project"
datadir = "/home/cdsw/experimentation_project1/PLS_project/data"
promptdir = "/home/cdsw/experimentation_project1/PLS_project/prompts"

# def progress_bar_method(secs):
#     # Code for your second asynchronous method goes here
#     for i in stqdm(range(secs), backend=True, frontend=True):
#         sleep(0.5)

class StreamHandler(BaseCallbackHandler):
    def __init__(self, container, initial_text="", display_method='markdown'):
        self.container = container
        self.text = initial_text
        self.display_method = display_method

    def on_llm_new_token(self, token: str, **kwargs) -> None:
        self.text += token + "/"
        display_function = getattr(self.container, self.display_method, None)
        if display_function is not None:
            display_function(self.text)
        else:
            raise ValueError(f"Invalid display_method: {self.display_method}")

#function to set background image
def set_bg_hack(main_bg):
    '''
    A function to unpack an image from root folder and set as bg.
 
    Returns
    -------
    The background.
    '''
    # set bg name
    main_bg_ext = "png"
        
    st.markdown(
         f"""
         <style>
         .stApp {{
             background: url(data:image/{main_bg_ext};base64,{base64.b64encode(open(main_bg, "rb").read()).decode()});
             background-size: cover
         }}
         </style>
         """,
         unsafe_allow_html=True
     )

def sidebar_bg(side_bg):

   side_bg_ext = 'png'

   st.markdown(
      f"""
      <style>
      [data-testid="stSidebar"] > div:first-child {{
          background: url(data:image/{side_bg_ext};base64,{base64.b64encode(open(side_bg, "rb").read()).decode()});
      }}
      </style>
      """,
      unsafe_allow_html=True,
      )
    
def header_bg(side_bg):

   side_bg_ext = 'png'

   st.markdown(
      f"""
      <style>
      header.css-1avcm0n {{
          background: url(data:image/{side_bg_ext};base64,{base64.b64encode(open(side_bg, "rb").read()).decode()});
      }}
      </style>
      """,
      unsafe_allow_html=True,
      )

def get_sentiment(polarity):
    if polarity < 0.0:
        return 'Negative'
    elif polarity > 0.2:
        return 'Positive'
    else:
        return 'Neutral'
    
def word_frequency(sentence):
    # joins all the sentenses
    #sentence = " ".join(sentence)
    # creates tokens, creates lower class, removes numbers and lemmatizes the words
    new_tokens = word_tokenize(sentence)
    new_tokens = [t.lower() for t in new_tokens]
    new_tokens =[t for t in new_tokens if t not in stopwords.words('english')]
    new_tokens = [t for t in new_tokens if t.isalpha()]
    lemmatizer = WordNetLemmatizer()
    new_tokens =[lemmatizer.lemmatize(t) for t in new_tokens]
    #counts the words, pairs and trigrams
    counted = Counter(new_tokens)
    counted_2= Counter(ngrams(new_tokens,2))
    counted_3= Counter(ngrams(new_tokens,3))
    #creates 3 data frames and returns thems
    word_freq = pd.DataFrame(counted.items(),columns=['word','frequency']).sort_values(by='frequency',ascending=False)
    word_pairs =pd.DataFrame(counted_2.items(),columns=['pairs','frequency']).sort_values(by='frequency',ascending=False)
    trigrams =pd.DataFrame(counted_3.items(),columns=['trigrams','frequency']).sort_values(by='frequency',ascending=False)
    return word_freq,word_pairs,trigrams    
    
#function to read prompt from corresponding text file
def prompt(file):
    with open(file) as f:
        return f.read()
    
#function to save a file
def save_uploadedfile(uploaded_file):
     with open(os.path.join(datadir, uploaded_file.name),"wb") as f:
         f.write(uploaded_file.getbuffer())
     return st.success(f"""Saved File:{uploaded_file.name} to directory""")

def create_vector():
    documents = SimpleDirectoryReader(saved_path).load_data()
    index = GPTVectorStoreIndex.from_documents(documents)

    storage_context = StorageContext.from_defaults()
    index.storage_context.persist("./vectordatabase")
    #print ("Done")

def generate_response(prompt):
    storage_context = StorageContext.from_defaults(persist_dir="./vectordatabase")
    index = load_index_from_storage(storage_context)
    query_engin = index.as_query_engine() 
    question = prompt
    response = query_engin.query(question)
    return str(response)
    #print ("\n", response)

@st.cache_data
#function to display the PDF of a given file 
def displayPDF(file):
    # Opening file from file path
    with open(file, "rb") as f:
        base64_pdf = base64.b64encode(f.read()).decode('utf-8')

    # Embedding PDF in HTML
    pdf_display = F'<iframe src="data:application/pdf;base64,{base64_pdf}" width="300" height="1100" type="application/pdf"></iframe>'

    # Displaying File
    st.markdown(pdf_display, unsafe_allow_html=True)

# Placeholder function for processing the uploaded documents
def process_documents(NCT, uploaded_file, tense, pls_grade):
    # Implement the document processing logic here

    # Convert tense strings
    tense_mapping = {"on-going": "present", "completed": "past", "upcoming": "future"}
    tense = tense_mapping.get(tense, tense)
    
    summary_replacements = {
        "<Title>": prompt(os.path.join(promptdir, 'title.txt')),
        "<Subtitle>": prompt(os.path.join(promptdir, 'subtitle.txt')),
        "<Key takeaway>": prompt(os.path.join(promptdir, 'key_takeaway.txt')),
        "<Phonetics>": prompt(os.path.join(promptdir, 'phonetics.txt')), 
        "<Introduction>": prompt(os.path.join(promptdir, 'introduction.txt')), 
        "<Intro summary>": prompt(os.path.join(promptdir, 'intro_summary.txt')),
        # "<Inclusion criteria>": "",
        # "<Exclusion crtieria>": "",
        # "<Results>": "",
        "<Aims>": prompt(os.path.join(promptdir, 'aims.txt')),
        "<Conclusions>": prompt(os.path.join(promptdir, 'conclusions.txt')),
        # "<Sponsor>": "",
        # "<More Information>": "",
    }
        
    # Get the text for each section using GPTAPIcall function
    for section_name, summary_prompt in summary_replacements.items():
        
        #prompt for pls grade and tense
        query = f"Strictly following the above instructions and the research document provided, write the content of {section_name} section of the plain language summary in {tense} tense.\
        Do not violate the section-wise instructions provided in any case. The content should be strictly inferred from the research document provided and not any other sources."
        
        st.subheader(f""":red[{section_name[1:-1]} :]""")
        text = llama_vector_index(uploaded_file, prompt(os.path.join(promptdir, f'apls_persona_{pls_grade}_literacy.txt')) + "\n" + summary_prompt + "\n" + query)
        summary_replacements[section_name] = str(text)
        
    ctgov_replacements = {
                    "<Start date>": "Answer the Study Start date in ```MMM-YYYY``` format",
                    "<End date>": "Answer the Study End date in ```MMM-YYYY``` format",
                    "<Participants>": "Total number of Participants in the study including drug arms, placebo arm, soc arm. Give one number answer",
                    "<Arms count>": "Number of arms in the study including the drug arms, placebo arm, soc arm. Give one number answer",
                    "<Disease condition>": "What is the disease condition for which drug is undergoing trials on patients in the study. Give answer as one disease",
                    "<Demographics>": "What are the Demographics of participants in the study",
                    "<treatment arm>": "Number of participants only in the drug arms of the study, do not count the participants from placebo arm or soc arm. Give one number answer",
                    "<control arm>": "Number of participants in the placebo arm or soc arm. Give one number answer",
                    "<Inclusion criteria>": "Inclusion criteria in EligibilityCriteria",
                    "<Exclusion criteria>": "Exclusion criteria in EligibilityCriteria",
                    "<Results>": "list all outcome measure results in bullets interms of outcome measure type, outcome measure title, outcome measure description, outcome measure value",
                    # "<clinical trials gov link>": "https://clinicaltrials.gov/ct2/show/NCT03036813",
                    # "<Summary date>": datetime.now().strftime('%d-%b-%Y'),
                    "<Sponsor>": "Lead Sponsor Name",
                   }
    
    for section_name, ctgov_prompt in ctgov_replacements.items():
        
        st.subheader(f""":red[{section_name[1:-1]} :]""")
        text = CTGovAPIcall(NCT, ctgov_prompt)
        if section_name=="<Participants>":
            text = re.findall(r'\d+', text)
        ctgov_replacements[section_name] = str(text)
    
    
    replacements = {**summary_replacements, 
                    **ctgov_replacements, 
                    "<Study number>": f"{NCT}",
                    "<clinical trials gov link>": f"https://clinicaltrials.gov/ct2/show/{NCT}",
                    "<Summary date>": datetime.now().strftime('%d-%b-%Y'),
                   }
    
    return replacements

def CTGovAPIcall(NCT, query):
    file_format = '&fmt=JSON'
    expr = NCT #'A+Phase+3+Randomized+Trial+of+Voxelotor+in+Sickle+Cell+Disease' #or give NCT number here NCT03036813
    ctgov = 'https://classic.clinicaltrials.gov/api/query/full_studies?expr='

    your_url = (ctgov + expr + file_format)

    with urllib.request.urlopen(your_url) as url:
        ini_dict = json.loads(url.read().decode())
        
    json_spec = JsonSpec(dict_=ini_dict["FullStudiesResponse"]["FullStudies"][0]["Study"], max_value_length=31000)
    json_toolkit = JsonToolkit(spec=json_spec)
    
    chat_box = st.empty()
    stream_handler = StreamHandler(chat_box, display_method='write')
    
    json_agent_executor = create_json_agent(
        llm=ChatOpenAI(temperature=0, model_name="gpt-4-32k", streaming=True, callbacks=[stream_handler],), toolkit=json_toolkit, verbose=True
    )
    resp = json_agent_executor.run(query)
    st.write(resp)
    return resp
    
# Placeholder function for postprocessing into PPT template
def postprocess_to_ppt(replacements, selected_template):
    # Implement the postprocessing logic here
    # For demonstration purposes, we'll load a presentation object and copy the text from replacements dictionary
    
    #rootdir = os.path.realpath('./')
    
    #selected_template = "PLS_PPT_Template"
    selected_template = selected_template[:-4]
    ppt_file = f"{selected_template}.pptx"
    prs = Presentation(os.path.join(rootdir, ppt_file))

    for slide in prs.slides:
        for shape in slide.shapes:
            if shape.has_text_frame:
                text_frame = shape.text_frame
                for paragraph in text_frame.paragraphs:
                    for run in paragraph.runs:
                        for placeholder, new_text in replacements.items():
                            if run.text == placeholder:
                                # Preserve formatting of the first run in the paragraph
                                first_run = paragraph.runs[0]
                                font_size = first_run.font.size
                                font_name = first_run.font.name
                                font_bold = first_run.font.bold
                                font_italic = first_run.font.italic

                                # Check if font color is explicitly defined
                                if first_run.font.color.type == "rgb":
                                    font_color = first_run.font.color.rgb
                                else:
                                    font_color = None

                                # Replace text while preserving formatting
                                run.text = new_text

                                # Apply preserved formatting to the entire paragraph
                                for run in paragraph.runs:
                                    run.font.size = font_size
                                    run.font.name = font_name
                                    run.font.bold = font_bold
                                    run.font.italic = font_italic
                                    if font_color:
                                        run.font.color.rgb = font_color

    # Return the modified presentation object
    return prs


# Placeholder function for postprocessing into DOC template
def postprocess_to_doc(replacements):
    
    para_variable_list = ["<Subtitle>", "<Key takeaway>", "<Phonetics>", "<Introduction>", "<Intro summary>", "<Demographics>", "<Inclusion criteria>", "<Exclusion criteria>", "<Results>", "<Aims>", "<Conclusions>"]
    table_variable_list = ["<Study number>", "<Start date>", "<End date>", "<Participants>", "<Arms count>", "<treatment arm>", "<control arm>", "<Sponsor>", "<Summary date>", "<clinical trials gov link>"]
    # Create a new document
    document = Document()

    # Set the font size of the document
    style = document.styles['Normal']
    font = style.font
    font.size = Pt(11)

    # Set the title
    title = replacements.get("<Title>")
    if title:
        document.add_heading(title, level=1).bold = True

    # Add paragraphs for para_variable_list with the same header formatting
    for variable in para_variable_list:
        value = replacements.get(variable)
        if value:
            p = document.add_paragraph(style='Heading 1')
            p.text = variable[1:-1]
            p.bold = True
            document.add_paragraph(value)

    # Add the table for table_variable_list
    table_replacements = {variable: replacements.get(variable) for variable in table_variable_list}
    if table_replacements:
        table_heading = "Additional Information"
        document.add_heading(table_heading, level=1)

        # Create the table
        table = document.add_table(rows=1, cols=2)
        table.style = 'Table Grid'
        
        # Set table column widths
        table.autofit = False
        table.columns[0].width = Pt(200)
        table.columns[1].width = Pt(300)

        # Add table headers
        table_header_cells = table.rows[0].cells
        table_header_cells[0].text = "Variable"
        table_header_cells[1].text = "Value"
        for cell in table_header_cells:
            cell.paragraphs[0].alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
            cell.paragraphs[0].bold = True

        # Add table rows
        for variable, value in table_replacements.items():
            row_cells = table.add_row().cells
            row_cells[0].text = variable[1:-1]
            row_cells[1].text = value

    return document
    
    
def main():
    
    #Page icons n tab name on browser tab
    #img = Image.open(os.path.join(rootdir, 'pfizer.png'))
    st.set_page_config(page_title = 'MAIA', page_icon = ":robot_face:", layout="wide")
    
    #to hide the hamburger running on top right and footer of streamlit
    hide_default_format = """
       <style>
       #MainMenu {visibility: hidden; }
       footer {visibility: hidden;}
       </style>
       """
    st.markdown(hide_default_format, unsafe_allow_html=True)
    
    names = ["admin","shakti"]
    usernames = ["adm", "shrp"]
    passwords = ["abc123", "def456"]

    credentials = {"usernames":{}}
    hashed_passwords = stauth.Hasher(passwords).generate()
    
    for uname, name, pwd in zip(usernames, names, hashed_passwords):
        user_dict = {"name": name, "password": pwd}
        credentials["usernames"].update({uname: user_dict})

    
    #add a cookie which will be stored on client browser to save credentials till 30days
    authenticator = stauth.Authenticate(credentials, "pls_generator", "abcdef", cookie_expiry_days = 30)

    #u can locate the authenticator in the main body or the sidebar
    name, authentication_status, username = authenticator.login("Login", "main")
    
    if st.session_state["authentication_status"] == False:
        st.error("Username/password is incorrect")
        
    if st.session_state["authentication_status"] == None:
        st.warning("Please enter your username and password")
        
    if st.session_state["authentication_status"]:
        
        #logout button on main container
        authenticator.logout('Logout', 'main')
        st.subheader(f'Welcome *{st.session_state["name"]}*')
        
        #set bg image cover
        #set_bg_hack(os.path.join(rootdir, 'iqvia-dark-blue.png'))
        sidebar_bg(os.path.join(rootdir, 'iqvia-blue.png'))
        #header_bg(os.path.join(rootdir, 'iqvia-dark-blue.png'))

        #setting banner image
        #st.image(Image.open(os.path.join(rootdir, 'Pfizer-AI.jpg')))
        
        selected_tab = option_menu(
            menu_title=None,  # required
            options=["PLS Generator", "RCT QnA", "RCT WordAnalytics", "RCT Chatbot",],  # required
            icons=["house", "book", "envelope", "bot"],  # optional
            menu_icon="cast",  # optional
            default_index=0,  # optional
            orientation="horizontal",
            # styles={
            #     "container": {"padding": "0!important"},
            #     "icon": {"color": "orange", "font-size": "25px"},
            #     "nav-link": {
            #         "font-size": "25px",
            #         "text-align": "left",
            #         "margin": "0px",
            #         "--hover-color": "#eee",
            #     },
            #     "nav-link-selected": {"background-color": "green"},
            # },
        )
        
        #setting input components on sidebar
        with st.sidebar:

            st.image(Image.open(os.path.join(rootdir, 'iqvia-logo.png')))
            #setting title
            st.markdown("""<h3 style='text-align: center'>*MAIA - Medical Affairs Intelligence Assistant*</h3>""", unsafe_allow_html=True)

            # Step 1: Document Upload
            st.subheader("Step 1: Upload Clinical trial document")
            uploaded_file = st.file_uploader("Upload document", accept_multiple_files=False, type=["pdf"])
            
            NCT = st.text_input("Enter the NCT number:", "NCT", key="NCT")
            
            # Step 2: User Inputs
            st.subheader("Step 2: Define the tone and Grade of PLS")
            # Set default values for radio button and slider
            default_tense = "Completed"
            default_pls_grade = "Low"

            # Radio button for tense selection
            tense = st.radio("Current status of the study for writing tense", options=["On-going", "Completed", "Upcoming"], key="tense", index=["On-going", "Completed", "Upcoming"].index(default_tense), horizontal=True)
            #st.write('<style>div.row-widget.stRadio > div{flex-direction:row;}</style>', unsafe_allow_html=True)

            # Slider for PLS grade selection
            #pls_grade = st.slider("Health Literacy Grade Reading level", min_value=0, max_value=10, step=5, key="pls_grade", value=default_pls_grade)
            pls_grade = st.select_slider("Health Literacy Grade of audience", options=["Low", "High"], key="pls_grade", value = default_pls_grade)

            st.session_state.process_button = False
            process_button = st.button("Process Documents")
            st.session_state.process_button = process_button
            st.session_state.uploaded_file = uploaded_file
            st.session_state.selected_tab = selected_tab
        
        #if st.session_state.process_button and st.session_state.uploaded_file:
        #if process_button and uploaded_file:
            
            # Retrieve user inputs if you haven't initialized them to any variable, then retrieve from streamlit session state
            # st.session_state.tense = tense
            # st.session_state.pls_grade = pls_grade
            
        if st.session_state.selected_tab == "RCT QnA":
            st.subheader("Ask your PDF 💬")
            # show user input
            user_question = st.text_input("Ask a question about your PDF:", placeholder="Number of participants? ", disabled=not uploaded_file,)
            
            if st.session_state.uploaded_file:              
                
                # extract the text
                if uploaded_file is not None:
                  pdf_reader = PdfReader(uploaded_file)
                  text = ""
                  for page in pdf_reader.pages:
                    text += page.extract_text()

                  # split into chunks
                  text_splitter = CharacterTextSplitter(
                    separator="\n",
                    chunk_size=1000,
                    chunk_overlap=200,
                    length_function=len
                  )
                  chunks = text_splitter.split_text(text)
                    
                  # create embeddings
                  store_name = uploaded_file.name[:-4]
                  if os.path.exists(os.path.join(datadir, f"{store_name}.pkl")):
                    with open(os.path.join(datadir, f"{store_name}.pkl"), "rb") as f:
                        knowledge_base = pickle.load(f)
                        st.write('Embeddings loaded from the Disk:')
                  else:
                    embeddings = OpenAIEmbeddings()
                    knowledge_base = FAISS.from_texts(chunks, embeddings)
                    with open(os.path.join(datadir, f"{store_name}.pkl"), "wb") as f:
                        pickle.dump(knowledge_base, f)
                        st.write('Embeddings newly created')

                  if user_question:
                    docs = knowledge_base.similarity_search(user_question, k=3)

                    chat_box = st.empty()
                    stream_handler = StreamHandler(chat_box, display_method='write')
        
                    llm = ChatOpenAI(temperature=0, callbacks=[stream_handler], streaming=True)
                    chain = load_qa_chain(llm, chain_type="stuff")
                    
                    #get_openai_callback() gives the cost on console
                    # with get_openai_callback() as cb:
                    #   response = chain.run(input_documents=docs, question=user_question)
                    #   print(cb)
                    response = chain.run(input_documents=docs, question=user_question)
                    st.write(response)    
        
        if st.session_state.selected_tab == "RCT Chatbot":
            # Ensure the directory exists
            if not os.path.exists(saved_path):
                os.makedirs(saved_path)
        
            if uploaded_file is not None:
                # To read file as bytes:
                bytes_data = uploaded_file.getvalue()

                # Save the uploaded file to the 'data' directory
                with open(os.path.join(saved_path, uploaded_file.name), 'wb') as out_file:
                    out_file.write(bytes_data)

                st.success('PDF file saved in data directory')
                create_vector()
                #remove_all_files(saved_path)
                st.success('Vector created')

            # Initialise session state variables
            if 'generated' not in st.session_state:
                st.session_state['generated'] = []
            if 'past' not in st.session_state:
                st.session_state['past'] = []
            if 'messages' not in st.session_state:
                st.session_state['messages'] = [
                    {"role": "system", "content": "You are a helpful assistant."}
                ]

            response_container = st.container()
            # container for text box
            container = st.container()

            with container:
                with st.form(key='my_form', clear_on_submit=True):
                    user_input = st.text_area("You:", key='input', height=50)
                    submit_button = st.form_submit_button(label='Send')

                if submit_button and user_input:
                    output = generate_response(user_input)
                    st.session_state['past'].append(user_input)
                    st.session_state['generated'].append(output)
                    #st.session_state['model_name'].append(model_name)

            if st.session_state['generated']:
                with response_container:
                    for i in range(len(st.session_state['generated'])):
                        message(st.session_state["past"][i], is_user=True, key=str(i) + '_user1')
                        message(st.session_state["generated"][i], key=str(i))
                        
        if st.session_state.selected_tab == "RCT WordAnalytics":    
            #st.subheader(f"You have selected {selected_tab}")
            
            # extract the text
            if uploaded_file is not None:
                pdf_reader = PdfReader(uploaded_file)
                text = ""
                for page in pdf_reader.pages:
                    text += page.extract_text()
                
                #Remove un-important words:
                stop_words = set(stopwords.words('english'))
                query_words={'participants', 'Participants' }
                stop_words.update(query_words)
                for word in query_words:
                    text = text.replace(word, '')
                    
                # Create and generate a word cloud image:
                wordcloud = WordCloud(stopwords=stop_words, collocations=False, max_font_size=55, max_words=25, background_color="black").generate(text)

                # Display the generated image:
                plt.figure(figsize=(10,12))
                plt.imshow(wordcloud, interpolation='bilinear')
                plt.axis("off")
                plt.show()
                st.set_option('deprecation.showPyplotGlobalUse', False)
                st.pyplot()
                
                col1, col2 = st.columns(2)
                
                with col1:
                    #overall doc sentiment
                    analyzer=SentimentIntensityAnalyzer()
                    polarity = analyzer.polarity_scores(text)['compound']
                    st.caption(f"\nOverall Research document sentiment is: {get_sentiment(polarity)}", )
            
                    # splitting modified single string into list of strings using groupby() function
                    grouped_strings = ["".join(g) for k, g in itertools.groupby(text, lambda x: x == " ") if not k]

                    df = pd.DataFrame()
                    df['polarity']=[analyzer.polarity_scores(text)['compound'] for text in grouped_strings]
                    df['sentiment']=df.polarity.apply(get_sentiment)
                    plt.figure(figsize=(3,3))
                    df.sentiment.value_counts().plot.bar()
                    st.pyplot()
                    
                with col2:
                    st.caption("\nWord frequency of the words in the research doc is: ")
                    data2,data3,data4 = word_frequency(text)
                    fig, axes = plt.subplots(3,1,figsize=(8,20))
                    sns.barplot(ax=axes[0],x='frequency',y='word',data=data2.head(30))
                    sns.barplot(ax=axes[1],x='frequency',y='pairs',data=data3.head(30))
                    sns.barplot(ax=axes[2],x='frequency',y='trigrams',data=data4.head(30))
                    st.pyplot(fig)
                
        if st.session_state.selected_tab == "PLS Generator":
            if st.session_state.process_button and st.session_state.uploaded_file and st.session_state.NCT!='NCT':
                col1, col2 = st.columns([0.2,0.8], gap="large")
                with col1:
                    input_file = save_uploadedfile(uploaded_file)
                    pdf_file = os.path.join(datadir, uploaded_file.name) #rootdir + "/" + uploaded_file.name
                    pdf_view = displayPDF(pdf_file)
                with col2:
                    with st.spinner(text='Processing research document you gave on the left to generate Plain Language Summary for you...⏳'):

                        # Progress bar
                        #progress_bar_method(50) or st.progress(0, "text")
                        
                        # Call the processing function on the uploaded documents with user inputs
                        replacements = process_documents(NCT, pdf_file, tense, pls_grade)
                        st.success("Processed Output to be filled up in the preferred PLS template")

                        #Display processed output
                        #st.write(replacements)
                        st.snow()
                        st.balloons()
                        
                # Store the replacements dictionary in session state
                st.session_state.replacements = replacements

            # Step 3: PPT Template Selection and Download
            st.subheader("Step 3: Select PLS Template and Download")
            
            default_format = "PPT format"
            st.session_state.select_format = pills("Select PPT or Word format", ["PPT format", "Word format"], ["🎈", "🌈"], index=["PPT format", "Word format"].index(default_format))
            
            if st.session_state.select_format == "PPT format":
                # Add radio buttons for template selection here    
                default_template = "Blue_PLS_Template.png"
                selected_template = image_select(
                    label="Select PPT Template",
                    images=[
                        os.path.join(rootdir, 'Pfizer_Blue_PLS_Template.png'),
                        os.path.join(rootdir, 'Pfizer_Red_PLS_Template.png'),
                        os.path.join(rootdir, 'Pfizer_Long_PLS_Template.png'),
                    ],
                    captions=["Blue_PLS_Template", "Red_PLS_Template", "Long_PLS_Template"],
                    index=["Blue_PLS_Template.png", "Red_PLS_Template.png", "Long_PLS_Template.png"].index(default_template),
                    use_container_width = False,
                )
                #selected_template = st.radio("Select PPT Template", options=["Blue_PLS_Template", "Red_PLS_Template", "Long_PLS_Template"], index=["Blue_PLS_Template", "Red_PLS_Template", "Long_PLS_Template"].index(default_template), horizontal=True)
                #selected_template = pills("", ["Pfizer_Blue_PLS_Template", "Pfizer_Red_PLS_Template", "Pfizer_Long_PLS_Template"], ["🍀", "🎈", "🌈"])
            
            if st.session_state.select_format == "Word format":
                default_template = "Word_PLS_Template"
                selected_template = "Blue_PLS_Template.png"
                image_select(
                    label="Select Word Template",
                    images=[
                        os.path.join(rootdir, 'Pfizer_Word_PLS_Template.png'),
                    ],
                    captions=["Word_PLS_Template"],
                    index=["Word_PLS_Template"].index(default_template),
                    use_container_width = False,
                )
            
            generate_ppt_button = st.button("Generate PLS")

            if generate_ppt_button:
                # Retrieve the replacements dictionary from session state
                replacements = st.session_state.replacements
                st.session_state.process_button = False

                if replacements:
                    with st.spinner('Generating PLS slides for you...⏳'):
                        # Call the postprocessing function to generate PPT content
                        ppt_content = postprocess_to_ppt(replacements, selected_template)

                        doc_content = postprocess_to_doc(replacements)

                        # Display the PPT content using st.markdown or st.write
                        #st.markdown(ppt_content, unsafe_allow_html=True)
                        st.markdown(list(replacements.keys()))

                        # Store the PPT content in session state
                        st.session_state.ppt_content = ppt_content
                        st.session_state.doc_content = doc_content

                         # Step 4: PPT Download
                        if "ppt_content" and "doc_content" in st.session_state:
                            ppt_content = st.session_state.ppt_content
                            doc_content = st.session_state.doc_content

                            st.session_state.replacements = replacements
                            st.session_state.process_button = False

                            # Save the modified presentation object to a temporary file
                            #ppt_output_file = f"PLS_{replacements['<Title>']}_{datetime.now().strftime('%Y%m%d%H%M%S')}.pptx"                    
                            ppt_output_file = "PLS_PPT.pptx"
                            #ppt_content.save(ppt_output_file)

                            # save presentation as binary output
                            binary_output = BytesIO()
                            ppt_content.save(binary_output)

                            binary_output_doc = BytesIO()
                            doc_content.save(binary_output_doc)

                            # display success message and download button
                            st.success(':tada: The PLS template has been filled with above sections in ' + selected_template)

                            # Provide the download link for the generated PPT and DOC
                            if st.session_state.select_format == "PPT format":
                                st.download_button("Download PLS PPT", data=binary_output.getvalue(), file_name=ppt_output_file, mime="application/vnd.openxmlformats-officedocument.presentationml.presentation")
                            if st.session_state.select_format == "Word format":
                                st.download_button("Download PLS Doc", data=binary_output_doc.getvalue(), file_name="PLS_DOC.docx", mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document")
                    
if __name__ == "__main__":
    main()


Overwriting app8.py


In [3]:
%%writefile app8.py
#enhancing - working version of the above app; adding the pubmed/ctgov and word freq graph
import os
import re
import urllib
import urllib.request
import base64
import json
import nltk
import streamlit as st
from pptx import Presentation
from pptx.util import Inches
from pptx.dml.color import RGBColor
from docx import Document
from docx.shared import Pt
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
from datetime import datetime  # Import the 'datetime' class from the 'datetime' module
import requests
from Bio import Entrez
from Bio import Medline
import time
from io import BytesIO
from PIL import Image
from shakti_stream_index import llama_vector_index
from streamlit_pills import pills
import streamlit_authenticator as stauth
from streamlit_option_menu import option_menu
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI
from langchain.callbacks.base import BaseCallbackHandler
from langchain.callbacks import get_openai_callback
from langchain.agents import create_json_agent, AgentExecutor
from langchain.agents import create_pandas_dataframe_agent
from langchain.agents.agent_types import AgentType
from streamlit_chat import message
from langchain.agents.agent_toolkits import JsonToolkit
from langchain.chains import LLMChain
from langchain.chat_models import ChatOpenAI
from langchain.requests import TextRequestsWrapper
from langchain.tools.json.tool import JsonSpec
from streamlit_image_select import image_select
from streamlit_chat import message
from llama_index import GPTVectorStoreIndex, SimpleDirectoryReader
from llama_index import download_loader, StorageContext, load_index_from_storage
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from wordcloud import WordCloud
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
#load inthe NTLK stopwords to remove articles, preposition and other words that are not actionable
from nltk.corpus import stopwords
# This allows to create individual objects from a bog of words
from nltk.tokenize import word_tokenize
# Lemmatizer helps to reduce words to the base form
from nltk.stem import WordNetLemmatizer
# Ngrams allows to group words in common pairs or trigrams..etc
from nltk import ngrams
# We can use counter to count the objects
from collections import Counter
# This is our word freq distribution library
from nltk import FreqDist
import seaborn as sns
from time import sleep
from stqdm import stqdm
import itertools
import pickle
import glob

saved_path = "/home/cdsw/experimentation_project1/PLS_project/bot_data"
rootdir = "/home/cdsw/experimentation_project1/PLS_project"
datadir = "/home/cdsw/experimentation_project1/PLS_project/data"
promptdir = "/home/cdsw/experimentation_project1/PLS_project/prompts"
Entrez.email = "shakti20889@gmail.com"

# def progress_bar_method(secs):
#     # Code for your second asynchronous method goes here
#     for i in stqdm(range(secs), backend=True, frontend=True):
#         sleep(0.5)

def generate_response1(input_text, df):
    agent = create_pandas_dataframe_agent(ChatOpenAI(temperature =0, model_name="gpt-4", streaming = True), df, verbose=False)
    query_response = agent.run(input_text)
    return query_response


def search_pubmed(article_title, retmax=5):
    # Perform the PubMed search using the article title
    handle = Entrez.esearch(db="pubmed", term=article_title, retmax=retmax)
    record = Entrez.read(handle)
    handle.close()
    
    # Retrieve the full study articles based on the search results
    id_list = record["IdList"]
    handle = Entrez.efetch(db="pubmed", id=id_list, rettype="medline", retmode="text")
    records = Medline.parse(handle)
    records = list(records)
    handle.close()
    
    # Extract relevant information from the articles and return as JSON or CSV
    articles = []
    for record in records:
        article = {
            "PMID": record["PMID"],
            "Title": record["TI"],
            "Abstract": record.get("AB", ""),
            "Citations": f"https://pubmed.ncbi.nlm.nih.gov/{record['PMID']}/",
        }
        articles.append(article)
    
    # Return the articles as JSON or CSV
    return articles

def search_ctgov(article_title, retmax=5):
    # Perform the ClinicalTrials.gov search using the article title
    api_url = "https://clinicaltrials.gov/api/query/full_studies"
    params = {
        "expr": article_title,
        "min_rnk": 1,
        "max_rnk": retmax,
        "fmt": "json",
    }
    response = requests.get(api_url, params=params)
    data = response.json()
    
    # Extract relevant information from the ctgov results and return as JSON or CSV
    articles = []
    for study in data.get("FullStudiesResponse", {}).get("FullStudies", []):
        article = {
            "PMID": study.get("Study", {}).get("ProtocolSection", {}).get("IdentificationModule", {}).get("NCTId", ""),
            "Title": study.get("Study", {}).get("ProtocolSection", {}).get("IdentificationModule", {}).get("OfficialTitle", ""),
            "Abstract": study.get("Study", {}).get("ProtocolSection", {}).get("DescriptionModule", {}).get("BriefSummary", ""),
            "Citations": f"https://clinicaltrials.gov/ct2/show/{study['Study']['ProtocolSection']['IdentificationModule']['NCTId']}",
        }
        articles.append(article)
    
    # Return the articles as JSON or CSV
    return articles

# Function to display the article details in the main container
def display_articles(articles):
    for article in articles:
        title = article["Title"]
        abstract = article["Abstract"].strip().split(". ", 3)[0] + "..."  # First 3 lines of abstract
        citations_url = article["Citations"]
        st.write(f"**Title:** {title}")
        st.write(f"**Abstract:** {abstract}")
        st.write(f"[Read More]({citations_url})")
        st.write("--------")

class StreamHandler(BaseCallbackHandler):
    def __init__(self, container, initial_text="", display_method='markdown'):
        self.container = container
        self.text = initial_text
        self.display_method = display_method

    def on_llm_new_token(self, token: str, **kwargs) -> None:
        self.text += token + "/"
        display_function = getattr(self.container, self.display_method, None)
        if display_function is not None:
            display_function(self.text)
        else:
            raise ValueError(f"Invalid display_method: {self.display_method}")

#function to set background image
def set_bg_hack(main_bg):
    '''
    A function to unpack an image from root folder and set as bg.
 
    Returns
    -------
    The background.
    '''
    # set bg name
    main_bg_ext = "png"
        
    st.markdown(
         f"""
         <style>
         .stApp {{
             background: url(data:image/{main_bg_ext};base64,{base64.b64encode(open(main_bg, "rb").read()).decode()});
             background-size: cover
         }}
         </style>
         """,
         unsafe_allow_html=True
     )

def sidebar_bg(side_bg):

   side_bg_ext = 'png'

   st.markdown(
      f"""
      <style>
      [data-testid="stSidebar"] > div:first-child {{
          background: url(data:image/{side_bg_ext};base64,{base64.b64encode(open(side_bg, "rb").read()).decode()});
      }}
      </style>
      """,
      unsafe_allow_html=True,
      )
    
def header_bg(side_bg):

   side_bg_ext = 'png'

   st.markdown(
      f"""
      <style>
      header.css-1avcm0n {{
          background: url(data:image/{side_bg_ext};base64,{base64.b64encode(open(side_bg, "rb").read()).decode()});
      }}
      </style>
      """,
      unsafe_allow_html=True,
      )

def get_sentiment(polarity):
    if polarity < 0.0:
        return 'Negative'
    elif polarity > 0.2:
        return 'Positive'
    else:
        return 'Neutral'
    
def word_frequency(sentence):
    # joins all the sentenses
    #sentence = " ".join(sentence)
    # creates tokens, creates lower class, removes numbers and lemmatizes the words
    new_tokens = word_tokenize(sentence)
    new_tokens = [t.lower() for t in new_tokens]
    new_tokens =[t for t in new_tokens if t not in stopwords.words('english')]
    new_tokens = [t for t in new_tokens if t.isalpha()]
    lemmatizer = WordNetLemmatizer()
    new_tokens =[lemmatizer.lemmatize(t) for t in new_tokens]
    #counts the words, pairs and trigrams
    counted = Counter(new_tokens)
    counted_2= Counter(ngrams(new_tokens,2))
    counted_3= Counter(ngrams(new_tokens,3))
    #creates 3 data frames and returns thems
    word_freq = pd.DataFrame(counted.items(),columns=['word','frequency']).sort_values(by='frequency',ascending=False)
    word_pairs =pd.DataFrame(counted_2.items(),columns=['pairs','frequency']).sort_values(by='frequency',ascending=False)
    trigrams =pd.DataFrame(counted_3.items(),columns=['trigrams','frequency']).sort_values(by='frequency',ascending=False)
    return word_freq,word_pairs,trigrams    
    
#function to read prompt from corresponding text file
def prompt(file):
    with open(file) as f:
        return f.read()
    
#function to save a file
def save_uploadedfile(uploaded_file):
     with open(os.path.join(datadir, uploaded_file.name),"wb") as f:
         f.write(uploaded_file.getbuffer())
     return st.success(f"""Saved File:{uploaded_file.name} to directory""")

def create_vector():
    documents = SimpleDirectoryReader(saved_path).load_data()
    index = GPTVectorStoreIndex.from_documents(documents)

    storage_context = StorageContext.from_defaults()
    index.storage_context.persist("./vectordatabase")
    #print ("Done")

def generate_response(prompt):
    storage_context = StorageContext.from_defaults(persist_dir="./vectordatabase")
    index = load_index_from_storage(storage_context)
    query_engin = index.as_query_engine() 
    question = prompt
    response = query_engin.query(question)
    return str(response)
    #print ("\n", response)

@st.cache_data
#function to display the PDF of a given file 
def displayPDF(file):
    # Opening file from file path
    with open(file, "rb") as f:
        base64_pdf = base64.b64encode(f.read()).decode('utf-8')

    # Embedding PDF in HTML
    pdf_display = F'<iframe src="data:application/pdf;base64,{base64_pdf}" width="300" height="1100" type="application/pdf"></iframe>'

    # Displaying File
    st.markdown(pdf_display, unsafe_allow_html=True)

# Placeholder function for processing the uploaded documents
def process_documents(NCT, uploaded_file, tense, pls_grade):
    # Implement the document processing logic here

    # Convert tense strings
    tense_mapping = {"on-going": "present", "completed": "past", "upcoming": "future"}
    tense = tense_mapping.get(tense, tense)
    
    summary_replacements = {
        "<Title>": prompt(os.path.join(promptdir, 'title.txt')),
        "<Subtitle>": prompt(os.path.join(promptdir, 'subtitle.txt')),
        "<Key takeaway>": prompt(os.path.join(promptdir, 'key_takeaway.txt')),
        "<Phonetics>": prompt(os.path.join(promptdir, 'phonetics.txt')), 
        "<Introduction>": prompt(os.path.join(promptdir, 'introduction.txt')), 
        "<Intro summary>": prompt(os.path.join(promptdir, 'intro_summary.txt')),
        # "<Inclusion criteria>": "",
        # "<Exclusion crtieria>": "",
        # "<Results>": "",
        "<Aims>": prompt(os.path.join(promptdir, 'aims.txt')),
        "<Conclusions>": prompt(os.path.join(promptdir, 'conclusions.txt')),
        # "<Sponsor>": "",
        # "<More Information>": "",
    }
        
    # Get the text for each section using GPTAPIcall function
    for section_name, summary_prompt in summary_replacements.items():
        
        #prompt for pls grade and tense
        query = f"Strictly following the above instructions and the research document provided, write the content of {section_name} section of the plain language summary in {tense} tense.\
        Do not violate the section-wise instructions provided in any case. The content should be strictly inferred from the research document provided and not any other sources."
        
        st.subheader(f""":red[{section_name[1:-1]} :]""")
        text = llama_vector_index(uploaded_file, prompt(os.path.join(promptdir, f'apls_persona_{pls_grade}_literacy.txt')) + "\n" + summary_prompt + "\n" + query)
        summary_replacements[section_name] = str(text)
        
    ctgov_replacements = {
                    "<Start date>": "Answer the Study Start date in ```MMM-YYYY``` format",
                    "<End date>": "Answer the Study End date in ```MMM-YYYY``` format",
                    "<Participants>": "Total number of Participants in the study including drug arms, placebo arm, soc arm. Give one number answer",
                    "<Arms count>": "Number of arms in the study including the drug arms, placebo arm, soc arm. Give one number answer",
                    "<Disease condition>": "What is the disease condition for which drug is undergoing trials on patients in the study. Give answer as one disease",
                    "<Demographics>": "What are the Demographics of participants in the study",
                    "<treatment arm>": "Number of participants only in the drug arms of the study, do not count the participants from placebo arm or soc arm. Give one number answer",
                    "<control arm>": "Number of participants in the placebo arm or soc arm. Give one number answer",
                    "<Inclusion criteria>": "Inclusion criteria in EligibilityCriteria",
                    "<Exclusion criteria>": "Exclusion criteria in EligibilityCriteria",
                    "<Results>": "list all outcome measure results in bullets interms of outcome measure type, outcome measure title, outcome measure description, outcome measure value",
                    # "<clinical trials gov link>": "https://clinicaltrials.gov/ct2/show/NCT03036813",
                    # "<Summary date>": datetime.now().strftime('%d-%b-%Y'),
                    "<Sponsor>": "Lead Sponsor Name",
                   }
    
    for section_name, ctgov_prompt in ctgov_replacements.items():
        
        st.subheader(f""":red[{section_name[1:-1]} :]""")
        text = CTGovAPIcall(NCT, ctgov_prompt)
        if section_name=="<Participants>":
            text = re.findall(r'\d+', text)
        ctgov_replacements[section_name] = str(text)
    
    
    replacements = {**summary_replacements, 
                    **ctgov_replacements, 
                    "<Study number>": f"{NCT}",
                    "<clinical trials gov link>": f"https://clinicaltrials.gov/ct2/show/{NCT}",
                    "<Summary date>": datetime.now().strftime('%d-%b-%Y'),
                   }
    
    return replacements

def CTGovAPIcall(NCT, query):
    file_format = '&fmt=JSON'
    expr = NCT #'A+Phase+3+Randomized+Trial+of+Voxelotor+in+Sickle+Cell+Disease' #or give NCT number here NCT03036813
    ctgov = 'https://classic.clinicaltrials.gov/api/query/full_studies?expr='

    your_url = (ctgov + expr + file_format)

    with urllib.request.urlopen(your_url) as url:
        ini_dict = json.loads(url.read().decode())
        
    json_spec = JsonSpec(dict_=ini_dict["FullStudiesResponse"]["FullStudies"][0]["Study"], max_value_length=31000)
    json_toolkit = JsonToolkit(spec=json_spec)
    
    chat_box = st.empty()
    stream_handler = StreamHandler(chat_box, display_method='write')
    
    json_agent_executor = create_json_agent(
        llm=ChatOpenAI(temperature=0, model_name="gpt-4-32k", streaming=True, callbacks=[stream_handler],), toolkit=json_toolkit, verbose=True
    )
    resp = json_agent_executor.run(query)
    st.write(resp)
    return resp
    
# Placeholder function for postprocessing into PPT template
def postprocess_to_ppt(replacements, selected_template):
    # Implement the postprocessing logic here
    # For demonstration purposes, we'll load a presentation object and copy the text from replacements dictionary
    
    #rootdir = os.path.realpath('./')
    
    #selected_template = "PLS_PPT_Template"
    selected_template = selected_template[:-4]
    ppt_file = f"{selected_template}.pptx"
    prs = Presentation(os.path.join(rootdir, ppt_file))

    for slide in prs.slides:
        for shape in slide.shapes:
            if shape.has_text_frame:
                text_frame = shape.text_frame
                for paragraph in text_frame.paragraphs:
                    for run in paragraph.runs:
                        for placeholder, new_text in replacements.items():
                            if run.text == placeholder:
                                # Preserve formatting of the first run in the paragraph
                                first_run = paragraph.runs[0]
                                font_size = first_run.font.size
                                font_name = first_run.font.name
                                font_bold = first_run.font.bold
                                font_italic = first_run.font.italic

                                # Check if font color is explicitly defined
                                if first_run.font.color.type == "rgb":
                                    font_color = first_run.font.color.rgb
                                else:
                                    font_color = None

                                # Replace text while preserving formatting
                                run.text = new_text

                                # Apply preserved formatting to the entire paragraph
                                for run in paragraph.runs:
                                    run.font.size = font_size
                                    run.font.name = font_name
                                    run.font.bold = font_bold
                                    run.font.italic = font_italic
                                    if font_color:
                                        run.font.color.rgb = font_color

    # Return the modified presentation object
    return prs


# Placeholder function for postprocessing into DOC template
def postprocess_to_doc(replacements):
    
    para_variable_list = ["<Subtitle>", "<Key takeaway>", "<Phonetics>", "<Introduction>", "<Intro summary>", "<Demographics>", "<Inclusion criteria>", "<Exclusion criteria>", "<Results>", "<Aims>", "<Conclusions>"]
    table_variable_list = ["<Study number>", "<Start date>", "<End date>", "<Participants>", "<Arms count>", "<treatment arm>", "<control arm>", "<Sponsor>", "<Summary date>", "<clinical trials gov link>"]
    # Create a new document
    document = Document()

    # Set the font size of the document
    style = document.styles['Normal']
    font = style.font
    font.size = Pt(11)

    # Set the title
    title = replacements.get("<Title>")
    if title:
        document.add_heading(title, level=1).bold = True

    # Add paragraphs for para_variable_list with the same header formatting
    for variable in para_variable_list:
        value = replacements.get(variable)
        if value:
            p = document.add_paragraph(style='Heading 1')
            p.text = variable[1:-1]
            p.bold = True
            document.add_paragraph(value)

    # Add the table for table_variable_list
    table_replacements = {variable: replacements.get(variable) for variable in table_variable_list}
    if table_replacements:
        table_heading = "Additional Information"
        document.add_heading(table_heading, level=1)

        # Create the table
        table = document.add_table(rows=1, cols=2)
        table.style = 'Table Grid'
        
        # Set table column widths
        table.autofit = False
        table.columns[0].width = Pt(200)
        table.columns[1].width = Pt(300)

        # Add table headers
        table_header_cells = table.rows[0].cells
        table_header_cells[0].text = "Variable"
        table_header_cells[1].text = "Value"
        for cell in table_header_cells:
            cell.paragraphs[0].alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
            cell.paragraphs[0].bold = True

        # Add table rows
        for variable, value in table_replacements.items():
            row_cells = table.add_row().cells
            row_cells[0].text = variable[1:-1]
            row_cells[1].text = value

    return document
    
    
def main():
    
    #Page icons n tab name on browser tab
    #img = Image.open(os.path.join(rootdir, 'pfizer.png'))
    st.set_page_config(page_title = 'MAIA', page_icon = ":robot_face:", layout="wide")
    
    #to hide the hamburger running on top right and footer of streamlit
    hide_default_format = """
       <style>
       #MainMenu {visibility: hidden; }
       footer {visibility: hidden;}
       </style>
       """
    st.markdown(hide_default_format, unsafe_allow_html=True)
    
    names = ["admin","shakti"]
    usernames = ["adm", "shrp"]
    passwords = ["abc123", "def456"]

    credentials = {"usernames":{}}
    hashed_passwords = stauth.Hasher(passwords).generate()
    
    for uname, name, pwd in zip(usernames, names, hashed_passwords):
        user_dict = {"name": name, "password": pwd}
        credentials["usernames"].update({uname: user_dict})

    
    #add a cookie which will be stored on client browser to save credentials till 30days
    authenticator = stauth.Authenticate(credentials, "pls_generator", "abcdef", cookie_expiry_days = 30)

    #u can locate the authenticator in the main body or the sidebar
    name, authentication_status, username = authenticator.login("Login", "main")
    
    if st.session_state["authentication_status"] == False:
        st.error("Username/password is incorrect")
        
    if st.session_state["authentication_status"] == None:
        st.warning("Please enter your username and password")
        
    if st.session_state["authentication_status"]:
        
        #logout button on main container
        authenticator.logout('Logout', 'main')
        st.subheader(f'Welcome *{st.session_state["name"]}*')
        
        #set bg image cover
        #set_bg_hack(os.path.join(rootdir, 'iqvia-dark-blue.png'))
        sidebar_bg(os.path.join(rootdir, 'iqvia-blue.png'))
        #header_bg(os.path.join(rootdir, 'iqvia-dark-blue.png'))

        #setting banner image
        #st.image(Image.open(os.path.join(rootdir, 'Pfizer-AI.jpg')))
        
        selected_tab = option_menu(
            menu_title=None,  # required
            options=["PLS Generator", "RCT QnA", "RCT WordAnalytics", "RCT Chatbot", "Search PubMed/CTGov"],  # required
            icons=["house", "book", "envelope", "book", "envelope"],  # optional
            menu_icon="cast",  # optional
            default_index=0,  # optional
            orientation="horizontal",
            # styles={
            #     "container": {"padding": "0!important"},
            #     "icon": {"color": "orange", "font-size": "25px"},
            #     "nav-link": {
            #         "font-size": "25px",
            #         "text-align": "left",
            #         "margin": "0px",
            #         "--hover-color": "#eee",
            #     },
            #     "nav-link-selected": {"background-color": "green"},
            # },
        )
        
        #setting input components on sidebar
        with st.sidebar:

            st.image(Image.open(os.path.join(rootdir, 'iqvia-logo.png')))
            #setting title
            st.markdown("""<h3 style='text-align: center'>*MAIA - Medical Affairs Intelligence Assistant*</h3>""", unsafe_allow_html=True)

            # Step 1: Document Upload
            st.subheader("Step 1: Upload Clinical trial document")
            uploaded_file = st.file_uploader("Upload document", accept_multiple_files=False, type=["pdf"])
            
            NCT = st.text_input("Enter the NCT number:", "NCT", key="NCT")
            
            # Step 2: User Inputs
            st.subheader("Step 2: Define the tone and Grade of PLS")
            # Set default values for radio button and slider
            default_tense = "Completed"
            default_pls_grade = "Low"

            # Radio button for tense selection
            tense = st.radio("Current status of the study for writing tense", options=["On-going", "Completed", "Upcoming"], key="tense", index=["On-going", "Completed", "Upcoming"].index(default_tense), horizontal=True)
            #st.write('<style>div.row-widget.stRadio > div{flex-direction:row;}</style>', unsafe_allow_html=True)

            # Slider for PLS grade selection
            #pls_grade = st.slider("Health Literacy Grade Reading level", min_value=0, max_value=10, step=5, key="pls_grade", value=default_pls_grade)
            pls_grade = st.select_slider("Health Literacy Grade of audience", options=["Low", "High"], key="pls_grade", value = default_pls_grade)

            st.session_state.process_button = False
            process_button = st.button("Process Documents")
            st.session_state.process_button = process_button
            st.session_state.uploaded_file = uploaded_file
            st.session_state.selected_tab = selected_tab
        
        #if st.session_state.process_button and st.session_state.uploaded_file:
        #if process_button and uploaded_file:
            
            # Retrieve user inputs if you haven't initialized them to any variable, then retrieve from streamlit session state
            # st.session_state.tense = tense
            # st.session_state.pls_grade = pls_grade
            
        if st.session_state.selected_tab == "RCT QnA":
            st.subheader("Ask your PDF 💬")
            # show user input
            user_question = st.text_input("Ask a question about your PDF:", placeholder="Number of participants? ", disabled=not uploaded_file,)
            
            if st.session_state.uploaded_file:              
                
                # extract the text
                if uploaded_file is not None:
                  pdf_reader = PdfReader(uploaded_file)
                  text = ""
                  for page in pdf_reader.pages:
                    text += page.extract_text()

                  # split into chunks
                  text_splitter = CharacterTextSplitter(
                    separator="\n",
                    chunk_size=1000,
                    chunk_overlap=200,
                    length_function=len
                  )
                  chunks = text_splitter.split_text(text)
                    
                  # create embeddings
                  store_name = uploaded_file.name[:-4]
                  if os.path.exists(os.path.join(datadir, f"{store_name}.pkl")):
                    with open(os.path.join(datadir, f"{store_name}.pkl"), "rb") as f:
                        knowledge_base = pickle.load(f)
                        st.write('Embeddings loaded from the Disk:')
                  else:
                    embeddings = OpenAIEmbeddings()
                    knowledge_base = FAISS.from_texts(chunks, embeddings)
                    with open(os.path.join(datadir, f"{store_name}.pkl"), "wb") as f:
                        pickle.dump(knowledge_base, f)
                        st.write('Embeddings newly created')

                  if user_question:
                    docs = knowledge_base.similarity_search(user_question, k=3)

                    chat_box = st.empty()
                    stream_handler = StreamHandler(chat_box, display_method='write')
        
                    llm = ChatOpenAI(temperature=0, callbacks=[stream_handler], streaming=True)
                    chain = load_qa_chain(llm, chain_type="stuff")
                    
                    #get_openai_callback() gives the cost on console
                    # with get_openai_callback() as cb:
                    #   response = chain.run(input_documents=docs, question=user_question)
                    #   print(cb)
                    response = chain.run(input_documents=docs, question=user_question)
                    st.write(response)    
        
        if st.session_state.selected_tab == "RCT Chatbot":
            # Ensure the directory exists
            if not os.path.exists(saved_path):
                os.makedirs(saved_path)
        
            if uploaded_file is not None:
                # To read file as bytes:
                bytes_data = uploaded_file.getvalue()

                # Save the uploaded file to the 'data' directory
                with open(os.path.join(saved_path, uploaded_file.name), 'wb') as out_file:
                    out_file.write(bytes_data)

                st.success('PDF file saved in data directory')
                create_vector()
                #remove_all_files(saved_path)
                st.success('Vector created')

            # Initialise session state variables
            if 'generated' not in st.session_state:
                st.session_state['generated'] = []
            if 'past' not in st.session_state:
                st.session_state['past'] = []
            if 'messages' not in st.session_state:
                st.session_state['messages'] = [
                    {"role": "system", "content": "You are a helpful assistant."}
                ]

            response_container = st.container()
            # container for text box
            container = st.container()

            with container:
                with st.form(key='my_form', clear_on_submit=True):
                    user_input = st.text_area("You:", key='input', height=50)
                    submit_button = st.form_submit_button(label='Send')

                if submit_button and user_input:
                    output = generate_response(user_input)
                    st.session_state['past'].append(user_input)
                    st.session_state['generated'].append(output)
                    #st.session_state['model_name'].append(model_name)

            if st.session_state['generated']:
                with response_container:
                    for i in range(len(st.session_state['generated'])):
                        message(st.session_state["past"][i], is_user=True, key=str(i) + '_user1')
                        message(st.session_state["generated"][i], key=str(i))
                        
        if st.session_state.selected_tab == "RCT WordAnalytics":    
            #st.subheader(f"You have selected {selected_tab}")
            
            # extract the text
            if uploaded_file is not None:
                pdf_reader = PdfReader(uploaded_file)
                text = ""
                for page in pdf_reader.pages:
                    text += page.extract_text()
                
                #Remove un-important words:
                stop_words = set(stopwords.words('english'))
                query_words={'participants', 'Participants' }
                stop_words.update(query_words)
                for word in query_words:
                    text = text.replace(word, '')
                    
                # Create and generate a word cloud image:
                wordcloud = WordCloud(stopwords=stop_words, collocations=False, max_font_size=55, max_words=25, background_color="black").generate(text)

                # Display the generated image:
                plt.figure(figsize=(10,12))
                plt.imshow(wordcloud, interpolation='bilinear')
                plt.axis("off")
                plt.show()
                st.set_option('deprecation.showPyplotGlobalUse', False)
                st.pyplot()
                
                col1, col2 = st.columns(2)
                
                with col1:
                    #overall doc sentiment
                    analyzer=SentimentIntensityAnalyzer()
                    polarity = analyzer.polarity_scores(text)['compound']
                    st.subheader(f"\nOverall Research document sentiment is: {get_sentiment(polarity)}", )
            
                    # splitting modified single string into list of strings using groupby() function
                    grouped_strings = ["".join(g) for k, g in itertools.groupby(text, lambda x: x == " ") if not k]
                    
                    #word-wise sentiment
                    df = pd.DataFrame()
                    df['polarity']=[analyzer.polarity_scores(text)['compound'] for text in grouped_strings]
                    df['sentiment']=df.polarity.apply(get_sentiment)
                    plt.figure(figsize=(3,3))
                    df.sentiment.value_counts().plot.bar()
                    st.pyplot()
                    
                with col2:
                    st.subheader("\nWord frequency of the words in the research doc is: ")
                    
                    #Tokenization
                    tokens = nltk.word_tokenize(text)

                    #compute freq distribution
                    freq_dist = FreqDist(tokens)

                    #plot the freq distribution
                    freq_dist.plot(50, cumulative=True)

                    #set labels and title
                    # plt.xlabel('Words')
                    # plt.ylabel('Frequency')
                    plt.title('Frequency distribution of words')
                    st.pyplot()
                    
                    data2,data3,data4 = word_frequency(text)
                    fig, axes = plt.subplots(3,1,figsize=(8,20))
                    sns.barplot(ax=axes[0],x='frequency',y='word',data=data2.head(30))
                    sns.barplot(ax=axes[1],x='frequency',y='pairs',data=data3.head(30))
                    sns.barplot(ax=axes[2],x='frequency',y='trigrams',data=data4.head(30))
                    st.pyplot(fig)
                
        if st.session_state.selected_tab == "PLS Generator":
            if st.session_state.process_button and st.session_state.uploaded_file and st.session_state.NCT!='NCT':
                col1, col2 = st.columns([0.2,0.8], gap="large")
                with col1:
                    input_file = save_uploadedfile(uploaded_file)
                    pdf_file = os.path.join(datadir, uploaded_file.name) #rootdir + "/" + uploaded_file.name
                    pdf_view = displayPDF(pdf_file)
                with col2:
                    with st.spinner(text='Processing research document you gave on the left to generate Plain Language Summary for you...⏳'):

                        # Progress bar
                        #progress_bar_method(50) or st.progress(0, "text")
                        
                        # Call the processing function on the uploaded documents with user inputs
                        replacements = process_documents(NCT, pdf_file, tense, pls_grade)
                        st.success("Processed Output to be filled up in the preferred PLS template")

                        #Display processed output
                        #st.write(replacements)
                        st.snow()
                        st.balloons()
                        
                # Store the replacements dictionary in session state
                st.session_state.replacements = replacements

            # Step 3: PPT Template Selection and Download
            st.subheader("Step 3: Select PLS Template and Download")
            
            default_format = "PPT format"
            st.session_state.select_format = pills("Select PPT or Word format", ["PPT format", "Word format"], ["🎈", "🌈"], index=["PPT format", "Word format"].index(default_format))
            
            if st.session_state.select_format == "PPT format":
                # Add radio buttons for template selection here    
                default_template = "Blue_PLS_Template.png"
                selected_template = image_select(
                    label="Select PPT Template",
                    images=[
                        os.path.join(rootdir, 'Pfizer_Blue_PLS_Template.png'),
                        os.path.join(rootdir, 'Pfizer_Red_PLS_Template.png'),
                        os.path.join(rootdir, 'Pfizer_Long_PLS_Template.png'),
                    ],
                    captions=["Blue_PLS_Template", "Red_PLS_Template", "Long_PLS_Template"],
                    index=["Blue_PLS_Template.png", "Red_PLS_Template.png", "Long_PLS_Template.png"].index(default_template),
                    use_container_width = False,
                )
                #selected_template = st.radio("Select PPT Template", options=["Blue_PLS_Template", "Red_PLS_Template", "Long_PLS_Template"], index=["Blue_PLS_Template", "Red_PLS_Template", "Long_PLS_Template"].index(default_template), horizontal=True)
                #selected_template = pills("", ["Pfizer_Blue_PLS_Template", "Pfizer_Red_PLS_Template", "Pfizer_Long_PLS_Template"], ["🍀", "🎈", "🌈"])
            
            if st.session_state.select_format == "Word format":
                default_template = "Word_PLS_Template"
                selected_template = "Blue_PLS_Template.png"
                image_select(
                    label="Select Word Template",
                    images=[
                        os.path.join(rootdir, 'Pfizer_Word_PLS_Template.png'),
                    ],
                    captions=["Word_PLS_Template"],
                    index=["Word_PLS_Template"].index(default_template),
                    use_container_width = False,
                )
            
            generate_ppt_button = st.button("Generate PLS")

            if generate_ppt_button:
                # Retrieve the replacements dictionary from session state
                replacements = st.session_state.replacements
                st.session_state.process_button = False

                if replacements:
                    with st.spinner('Generating PLS slides for you...⏳'):
                        # Call the postprocessing function to generate PPT content
                        ppt_content = postprocess_to_ppt(replacements, selected_template)

                        doc_content = postprocess_to_doc(replacements)

                        # Display the PPT content using st.markdown or st.write
                        #st.markdown(ppt_content, unsafe_allow_html=True)
                        st.markdown(list(replacements.keys()))

                        # Store the PPT content in session state
                        st.session_state.ppt_content = ppt_content
                        st.session_state.doc_content = doc_content

                         # Step 4: PPT Download
                        if "ppt_content" and "doc_content" in st.session_state:
                            ppt_content = st.session_state.ppt_content
                            doc_content = st.session_state.doc_content

                            st.session_state.replacements = replacements
                            st.session_state.process_button = False

                            # Save the modified presentation object to a temporary file
                            #ppt_output_file = f"PLS_{replacements['<Title>']}_{datetime.now().strftime('%Y%m%d%H%M%S')}.pptx"                    
                            ppt_output_file = "PLS_PPT.pptx"
                            #ppt_content.save(ppt_output_file)

                            # save presentation as binary output
                            binary_output = BytesIO()
                            ppt_content.save(binary_output)

                            binary_output_doc = BytesIO()
                            doc_content.save(binary_output_doc)

                            # display success message and download button
                            st.success(':tada: The PLS template has been filled with above sections in ' + selected_template)

                            # Provide the download link for the generated PPT and DOC
                            if st.session_state.select_format == "PPT format":
                                st.download_button("Download PLS PPT", data=binary_output.getvalue(), file_name=ppt_output_file, mime="application/vnd.openxmlformats-officedocument.presentationml.presentation")
                            if st.session_state.select_format == "Word format":
                                st.download_button("Download PLS Doc", data=binary_output_doc.getvalue(), file_name="PLS_DOC.docx", mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document")

                                
        if st.session_state.selected_tab == "Search PubMed/CTGov":
            st.subheader("Search PubMed and ClinicalTrials.gov")
            query = st.text_input("Enter your query:")
            search_option = st.radio("Search Option", ["Search PubMed", "Search ClinicalTrials.gov", "Search Both"], horizontal=True)

            col1, col2 = st.columns(2)  # Split the main container into two columns
            if query:
                if search_option == "Search PubMed":
                    pubmed_articles = search_pubmed(query)[:5]
                    st.session_state.articles = pubmed_articles
                    with col1:
                        st.write("### PubMed Results")
                        display_articles(pubmed_articles)
                    # Create a DataFrame in the second column with PubMed articles
                    with col2:
                        df_pubmed = pd.DataFrame(pubmed_articles)
                        st.write("### PubMed DataFrame")
                        st.write(df_pubmed)

                elif search_option == "Search ClinicalTrials.gov":
                    ctgov_articles = search_ctgov(query)[:5]
                    st.session_state.articles = ctgov_articles
                    with col1:
                        st.write("### ClinicalTrials.gov Results")
                        display_articles(ctgov_articles)
                    # Create a DataFrame in the second column with ClinicalTrials.gov articles
                    with col2:
                        df_ctgov = pd.DataFrame(ctgov_articles)
                        st.write("### ClinicalTrials.gov DataFrame")
                        st.write(df_ctgov)

                else:  # Search Both
                    pubmed_articles = search_pubmed(query)[:5]
                    ctgov_articles = search_ctgov(query)[:5]
                    st.session_state.articles = pubmed_articles + ctgov_articles
                    with col1:
                        st.write("### PubMed Results")
                        display_articles(pubmed_articles)
                    with col1:  # Use the same column for ClinicalTrials.gov Results
                        st.write("### ClinicalTrials.gov Results")
                        display_articles(ctgov_articles)
                    # Concatenate both sets of articles and create a DataFrame in the second column
                    with col2:
                        combined_articles = pubmed_articles + ctgov_articles
                        df_combined = pd.DataFrame(combined_articles)
                        st.write("### Combined DataFrame")
                        st.write(df_combined)
            # Placeholder for chatbot implementation in the second column
            with col2:
                st.write("Chatbot - Ask questions from only these Pubmed/CTGov articles")
                if 'articles' in st.session_state:      
                    df = pd.DataFrame(st.session_state.articles)
                    #user_prompt = st.text_area(label="prompt:",placeholder="Number of patients..",)
                    #if st.button("Generate"):
                ########################################################LangChain CSV Agent (with Pandas)

                    # langchain_pandas_agent = create_pandas_dataframe_agent(
                    #     ChatOpenAI(temperature=0, model="gpt-4-32k", streaming=True, ),
                    #     df,
                    #     verbose=True,
                    #     agent_type=AgentType.OPENAI_FUNCTIONS,
                    # )

                    #st.write("Langchain pandas agent: ", langchain_pandas_agent.run(user_prompt))

                    # Initialise session state variables
                    if 'generated1' not in st.session_state:
                        st.session_state['generated1'] = []
                    if 'past1' not in st.session_state:
                        st.session_state['past1'] = []
                    if 'messages1' not in st.session_state:
                        st.session_state['messages1'] = [
                            {"role": "system", "content": "You are a helpful assistant."}
                        ]


                    # container for chat history
                    response_container = st.container()

                    # container for text box
                    input_container = st.container()

                    with input_container:
                        # Create a form for user input
                        with st.form(key='my_form', clear_on_submit=True):
                            user_input = st.text_area("You:", key='input', height=100)
                            submit_button = st.form_submit_button(label='Send')

                        if submit_button and user_input:
                            # If user submits input, generate response and store input and response in session state variables
                            try:
                                query_response = generate_response1(user_input, df)
                                st.session_state['past1'].append(user_input)
                                st.session_state['generated1'].append(query_response)
                            except Exception as e:
                                st.error("An error occurred: {}".format(e))

                    if st.session_state['generated1']:
                        # Display chat history in a container
                        with response_container:
                            for i in range(len(st.session_state['generated1'])):
                                message(st.session_state["past1"][i], is_user=True, key=str(i) + '_user')
                                message(st.session_state["generated1"][i], key=str(i))
                        
                        
                        # Add a download button for the chat conversation
                        #if st.button("Download Chat Conversation"):
                            #download_chat_conversation(st.session_state['past'], st.session_state['generated'])
                                
if __name__ == "__main__":
    main()


Overwriting app8.py
