In [12]:
import os
from dotenv import load_dotenv

load_dotenv(override=True)

OPENAI_API_TYPE = os.getenv('OPENAI_API_TYPE')
AZURE_OPENAI_ENDPOINT = os.getenv('AZURE_OPENAI_ENDPOINT')
OPENAI_API_VERSION = os.getenv('OPENAI_API_VERSION')
AZURE_OPENAI_API_KEY = os.getenv('AZURE_OPENAI_API_KEY')
AZURE_OPENAI_LLM_DEPLOYMENT_NAME=os.getenv("AZURE_OPENAI_LLM_DEPLOYMENT_NAME")
AZURE_OPENAI_LLM_MODEL_NAME=os.getenv("AZURE_OPENAI_LLM_MODEL_NAME")
CONFLUENCE_TOKEN = os.getenv('CONFLUENCE_TOKEN')


In [13]:
from langchain.document_loaders import ConfluenceLoader

loader = ConfluenceLoader(
    url="https://strive.devops.t-systems.net/confluence/",
    token=CONFLUENCE_TOKEN
)

confluence_documents = loader.load(
    space_key="SDV", 
    include_attachments=True, #confluence_docuemnts  becomes a list of dictionaries
    limit=100,
    max_pages=1000,
)

print(confluence_documents[1])
print(f'{len(confluence_documents)} documents read from Confluence.')

Received runtime arguments {'space_key': 'SDV', 'include_attachments': True, 'limit': 100, 'max_pages': 1000}. Passing runtime args to `load` is deprecated. Please pass arguments during initialization instead.


page_content="true 40% Content Owner 60% 1 In this article you can find the information about domain Telemetry and Connectivity: Mission Vision Commitment Objective For more information & our current SDV Service portfolio click here . You will find a detailed description of our business area in this PowerPoint. Please request access if you don't have it yet. 30 White #e20074 Our Mission Telemetry and Connectivity Our mission is to be the nerve center of Software-Defined Vehicles, enabling them to perceive, connect, and communicate with the world around them. 60 White #e20074 Our Vision We imagine a world where all vehicles are defined by software to be secure, connected, entertaining, sustainable, automated and user centric. White #e20074 Our Commitment We focus on breaking silos and building foundations for our partners to have access to all type of vehicle data and creating insights into hidden SDV secrets for scaling, performance optimization and achieving excellence. White #e20074 

In [14]:
from openai import AzureOpenAI
client = AzureOpenAI(
    api_key=AZURE_OPENAI_API_KEY,
    api_version=OPENAI_API_VERSION,
    azure_endpoint=AZURE_OPENAI_ENDPOINT, 
)


In [15]:
from azure.core.exceptions import HttpResponseError
def evaluate_confluence_page(page_content):
    prompt = f"""
    You are an AI assistant tasked with evaluating the quality of Confluence page content to ensure it is optimized for use with a chatbot. The following recommendations outline the criteria for high-quality content that is effective for chatbot responses.

    Quality Guidelines for Confluence Content:
    1. Content Structure: Ensure the text is clear and well-structured to allow the chatbot to understand and extract information accurately.
    2. Simplicity: Use simple language and avoid complex sentences. Provide necessary details without unnecessary complexity.
    3. Context: Avoid short bullet points that lack context. Use meaningful, coherent sentences that provide complete information.
    4. Consistency: Avoid undefined abbreviations and acronyms that can confuse the chatbot. Use consistent terminology throughout the content.
    5. Headers: Use headers and sub-headers to include page titles in the context, which helps the chatbot understand the structure better.

    Image Handling:
    6. Prioritize Text: Whenever possible, prioritize text over images. Images should be used to complement text, not replace it.
    7. Text Descriptions: Provide text descriptions for each image to ensure the chatbot can interpret visual information accurately.
    8. Quality: Use high-resolution images to facilitate accurate OCR.

    Attachments (PDFs, PPTs, World, Excel):
    9. Prioritize Text: Prioritize text over attachments whenever possible. If attachments are essential, it is crucial to Provide concise summaries of key points from attachments, including descriptive titles and relevant keywords.
    10. Selectable Text: Use PDFs with selectable, searchable text to ensure compatibility with the chatbot.

    Overall Recommendations:
    12. Text Over Attachments (crucial): Prioritize textual content over attachments. If attachments are essential, it is crucial to Provide concise summaries of key points from attachments, including descriptive titles and relevant keywords.
    13. FAQs and Scenarios (optional, not considered in the overall grade when it's absent but considered as a plus when available): When relevant, incorporate FAQs and common scenarios related to the topic when possible.
    14. Tagging and Categorization: Tag attachments (PDF, PPT, word, excel, images) with relevant keywords and categorize them appropriately.
    15. Summaries (optional, not considered in the overall grade when it's absent but considered as a plus when available):  Consider including a brief summary or abstract at the beginning of long or complex documents. This allows the chatbot to quickly grasp the main points. However, summaries may not be necessary for concise, well-structured documents or those utilizing tables effectively to organize information.

    Here is the content of the Confluence page and its attachments for evaluation:

    Confluence Page Content:
    {page_content}

    Based on the above guidelines, please provide a detailed evaluation and grade as Red (needs improvement), Yellow (acceptable but could be better), or Green (meets or exceeds expectations). Provide specific comments and suggestions for improvement where applicable.
    Consider the following when grading:
    - If images are used within a table structure along with relevant text descriptions, consider them appropriately described.
    - If the document is short, concise, and well-structured, it may not require a separate summary.

    Additionally, provide an overall grade and overall recommendations based on the individual criteria evaluations.
    """
    try:
        response = client.chat.completions.create(
            model = AZURE_OPENAI_LLM_DEPLOYMENT_NAME,
            messages = [{"role": "user", "content": prompt}]
        )
    
        return response.choices[0].message.content.strip() 
    except HttpResponseError as e:
        print(f"OpenAI API Error: {e}")
        return "Error"

In [17]:
specific_page_title = "ConMob_Responsibilities"
specific_page = next((page for page in confluence_documents if page.metadata['title'] == specific_page_title), None)

if specific_page:
    print(f"Evaluating Page: {specific_page.metadata['title']}")

page_content_with_metadata = f"Page Title: {specific_page.metadata['title']}\n\n{specific_page.page_content}"

evaluation_results = evaluate_confluence_page(page_content_with_metadata)
if evaluation_results != "Error":
        print(evaluation_results)
else:
    print(f"Page with title '{specific_page_title}' not found.")

Evaluating Page: ConMob_Responsibilities
Based on the content provided, my evaluation is as follows:

**Confluence Page Content Evaluation**

1. Content Structure: Green. The page contains well-structured information with a clear layout. Information is neatly organized into tables, making it easy for a chatbot to extract and use.
   
2. Simplicity: Yellow. Although the document uses simple language, the extensive use of acronyms and project codes could be hard for the bot to understand. Adding a project code and acronym glossary could help the bot recognize these terms.

3. Context: Red. The page uses numerous short bullet points without providing a clear context or explanation, especially towards the end. Try to add more information to these points to provide complete information.
   
4. Consistency: Yellow. Many abbreviations and project codes are used without definitions. While some are defined at the beginning, consistency in this practice is lacking. Adding a glossary could fix th