In [646]:
from mistralai.client import MistralClient
from mistralai.models.chat_completion import ChatMessage
from dotenv import load_dotenv
from docx import Document
from OpenRAG.src.openrag.chunk_vectorization.chunk_vectorization import get_vectorizer
from OpenRAG.src.openrag.vectordb.milvus_adapter import init_milvus_connection
from pymilvus import Collection
import os
import cohere
import json
import re
import helper_functions as hf
import create_prompts as prompts

load_dotenv()  # take environment variables from .env.

True

In [647]:
def send_request_to_mistral_ai(model, messages):
    """
    Send a request to the Mistral AI model and return the response.

    Args:
        model (str): The Mistral AI model to use.
        messages (List[ChatMessage]): The messages to send in the request.

    Returns:
        str: The response from the Mistral AI model.
    """
    api_key = os.environ["MISTRAL_API_KEY"]
    if not api_key:
        raise ValueError("MISTRAL_API_KEY not found in environment variables.")
    client = MistralClient(api_key=api_key)

    try:
        chat_response = client.chat(
            model=model,
            messages=messages,
        )

        response = chat_response.choices[0].message.content
        return response
    except Exception as e:
        print(f"Error in Mistral AI request: {e}")
        return None
    

def load_company_knowledge():
    """
    Load company knowledge from company documents.

    Returns:
        str: The combined text from all company documents.
    """
    business_model = 'Data/Internal/Business Model de StIT.docx'
    long_term_strategy = 'Data/Internal/Plan de développement stratégique sur 8 ans pour StIT.docx'
    products_and_services = 'Data/Internal/Produits et services de StIT.docx'
    company_docs = [business_model, long_term_strategy, products_and_services]
    company_knowledge = ''

    for doc in company_docs:
        docx_document = Document(doc)
        paragraphs_text = ' '.join([paragraph.text for paragraph in docx_document.paragraphs])
        company_knowledge += paragraphs_text

    return company_knowledge

def find_chunks(id, path = "Data/Internal/HR/"):
    """
    Find the chunk based on the given id.

    Args:
        id (int): The id of the chunk to find.
        path (str, optional): The path to the chunk files. Defaults to "Data/Internal/HR/".

    Returns:
        dict: A dictionary containing the chunk details, or None if the chunk is not found.
    """
    global_indexing = json.load(open("global_indexing.json", "r"))
    for key, value in global_indexing.items():
        start_idx = value["start"]
        end_idx = value["end"]
        if start_idx <= id <= end_idx:
            index_in_file = id - start_idx
            data_dict_file = json.load(open(path + key + "_chunks.json", "r"))
            data_dict_file["chunk_" + str(index_in_file)]["document"] = key + ".docx"
            data_dict_file["chunk_" + str(index_in_file)]["fullpath"] = path + key + ".docx"
            return data_dict_file["chunk_" + str(index_in_file)]
    return None

In [648]:
def create_prompt_analyst_agent(context, company_knowledge, type):
    """
    Create the prompt for the Mistral AI model.

    Args:
        article (str): The news article to analyze.
        company_knowledge (str): The company knowledge to include in the prompt.

    Returns:
        str: The prompt for the Mistral AI model.
    """
    prompt = f"""
                You are an experienced business analyst tasked with determining the priority level of {type}s based on their relevance to your company, StIT.

                Here is some crucial information about the company to consider during your analysis:
                <company_knowledge>{company_knowledge}</company_knowledge>

                Please thoroughly read and analyze the following {type}:

                <{type}>{context}</{type}>

                After completing your analysis, provide your final assessment in the <output> section, using the following format:

                <output>
                <priority_level>High OR Medium OR Low</priority_level>
                <justification>A detailed explanation of your priority rating, including how the {type}'s main points and key details relate to the company's goals, operations, or industry, and the potential implications and impact of the {type} on the company</justification>
                <main_topic>A one-sentence summary highlighting the {type}'s main topic<main_topic>
                </output>

                Remember, your goal is to help company management quickly identify and prioritize important {type}s, so be sure to consider the key implications and potential impact of the {type} on the company in your priority rating and justification.
            """
    return prompt

In [649]:
def analyst_agent(context, type):
    """
    Analyze the given news article and dispatch it to the appropriate agent.

    Args:
        article (str): The news article to analyze.

    Returns:
        None
    """
    docx_document = Document(context)
    context_content = ' '.join([paragraph.text for paragraph in docx_document.paragraphs])

    company_knowledge = load_company_knowledge()

    prompt = create_prompt_analyst_agent(context_content, company_knowledge, type)

    model = "mistral-large-latest"

    messages = [
        ChatMessage(role="system", content=prompt),
        ChatMessage(role="user", content=f"Content of the {type}  : " + context_content)
    ]

    xml_response = send_request_to_mistral_ai(model, messages)

    priority_level = hf.xml_retriever(xml_response, 'priority_level')
    justification = hf.xml_retriever(xml_response, 'justification')
    main_topic = hf.xml_retriever(xml_response, 'main_topic')
    
    print("Priority Level: ", priority_level)
    print("Justification: ", justification)
    print("Main Topic: ", main_topic)
        
    dispatch_agent(main_topic, justification)

In [650]:
def create_prompt_dispatch_agent(main_topic, justification, CVs):
    """
    Create the prompt for the Mistral AI model.

    Args:
        maint_topic (str): The main topic of the news article to analyze.
        justification (str): The reason why the news article might be relevant to the company.
        CVs (str): The CVs of the employees.

    Returns:
        str: The prompt for the Mistral AI model.
    """
    prompt = f"""
                You are a senior executive at StIT, and you have been tasked with identifying the employee who should be informed about a specific matter based on their expertise and role within the company.
                Please thoroughly read and analyze the following matter:

                <matter>{main_topic} {justification}</matter>
                Now read carefully the CVs of the following employees and rank them in order of relevance to the matter:
                <CVs>{CVs}</CVs>
                In the <output> section, write down the names of the 5 people who are the most relevant to contact for this matter. Carefully consider how the main points and key details of this matter. Relate to the provided CVs and job titles at StIT to select the relevant employees. 
                
                After completing your analysis, provide your final assessment in the <output> section, using the following format:

                <output>
                <employee1>NAME_EMPLOYEE1</employee1>
                <employee2>NAME_EMPLOYEE2</employee2>  
                <employee3>NAME_EMPLOYEE3</employee3>
                <employee4>NAME_EMPLOYEE4</employee4>
                <employee5>NAME_EMPLOYEE5</employee5>
                </output>

                If no Employee is relevant, please write "None".
                Remember, your goal is to help company management quickly identify and prioritize the employees to inform about the matter, so be sure to consider the key implications and potential impact of the matter on the company in your selection of relevant profiles to inform about it.
            """
    return prompt

def dispatch_agent(main_topic, justification):
    """
    Determine the employee(s) who should be informed about the given topic and justification.

    Args:
        main_topic (str): The main topic of the news.
        justification (str): The justification for the priority level of the news.

    Returns:
        None
    """
    results = internal_retriever_agent(main_topic)

    CVs = []
    for result in results:
        content_cv = ''
        docx_document = Document(result['fullpath'])
        paragraphs_text = ' '.join([paragraph.text for paragraph in docx_document.paragraphs])
        content_cv += paragraphs_text
        # The input string
        filename = result['fullpath']

        # The regular expression pattern to match the name
        pattern = r'/CV\s*(.+?)\.docx'

        # Search for the pattern in the input string
        match = re.search(pattern, filename)
        name = match.group(1)
        # The name is in between 'CV' and '.docx'
        CVs.append("Name: "+name + " " + content_cv)
    
    prompt = create_prompt_dispatch_agent(main_topic, justification, CVs)
    messages = [
        ChatMessage(role="system", content=prompt),
        ChatMessage(role="user", content="The matter : " + main_topic + justification)
    ]

    model = "mistral-large-latest"

    print(send_request_to_mistral_ai(model, messages))
    
    return None

In [651]:
def internal_retriever_agent(text, filter='HR'):
    """
    Retrieve internal information related to the given text.

    Args:
        text (str): The text to search for.
        filter (str, optional): The filter to apply to the search results. Defaults to 'HR'.

    Returns:
        list: A list of dictionaries containing the search results.
    """
    vectorizer = get_vectorizer('mistral')
    query_vector = vectorizer.vectorize(text)
    
    init_milvus_connection()
    
    collection_name = "mistral_collection"
    collection = Collection(name=collection_name)
    
    n_neighbors = 20
    results = collection.search([query_vector], "vector", param={"metric_type": "L2", "params":{}}, limit=n_neighbors, expr="source == '"+filter+"'")
    
    final_indices = []
    for result in results[0]:
        if result.id not in final_indices and len(final_indices) + 1 <= n_neighbors:
            final_indices.append([result.id, result.distance])
            prev_index = result.id - 1
            if prev_index >= 0 and prev_index not in final_indices and len(final_indices) + 1 <= n_neighbors and filter != 'HR':
                final_indices.append([prev_index, result.distance])
            next_index = result.id + 1
            if next_index not in final_indices and len(final_indices) + 1 <= n_neighbors and filter != 'HR':
                final_indices.append([next_index, result.distance])

    results = final_indices
    
    answer_chunks = []
    unique_chunks = []
    answer_chunks2 = []
    for hit in results:
        answer_chunk = find_chunks(hit[0])
        if answer_chunk['document'] in unique_chunks and filter == 'HR':
            continue
        unique_chunks.append(answer_chunk['document'])
        answer_chunks.append(answer_chunk['text'])
        answer_chunks2.append(answer_chunk)
        #print(find_chunks(hit[0]))
        print(find_chunks(hit[0])['fullpath'])
    
    return answer_chunks2

In [652]:
news_article1 = 'Data/External/News Articles/ New tax law in France aims to encourage and support the growth of startups and small businesses copy.docx'
news_article2 = 'Data/External/News Articles/Local bakery in Paris wins award for best croissant in the city copy.docx'
news_article3 = 'Data/External/News Articles/New survey finds that the majority of French people prefer to shop online rather than in-store copy.docx'
news_article4 = 'Data/External/News Articles/Massive cyberattack exposes the vulnerabilities of businesses and organizations copy.docx'

new_law1 = 'Data/External/Laws/New animal welfare law in France.docx'
new_law3 = 'Data/External/Laws/New environmental protection law in France.docx'
new_law2 = 'Data/External/Laws/New data privacy law in France.docx'
new_law4 = 'Data/External/Laws/New cybersecurity and technology law in France.docx'

new_post1 = 'Data/External/Social Media/cybersecurity.docx'
new_post2 = 'Data/External/Social Media/space_travel.docx'
new_post3 = 'Data/External/Social Media/startup_culture.docx'
new_post4 = 'Data/External/Social Media/coffee_and_snacks_lover.docx'

if __name__ == "__main__":
    analyst_agent(news_article2, "news article")
    analyst_agent(news_article4, "news article")
    analyst_agent(new_law1, "law")
    analyst_agent(new_law4, "law")
    analyst_agent(new_post1, "social media post")
    analyst_agent(new_post4, "social media post")

Priority Level:  Low
Justification:  The news article discusses a local bakery in Paris winning an award for the best croissant in the city. While this is a positive development for the bakery and the local community, it has no direct relevance or impact on StIT, a technology company focused on developing software solutions for businesses and individuals. The article's main points and key details do not relate to StIT's goals, operations, or industry, and there are no potential implications or impact on the company.
Main Topic:  A local bakery in Paris, "Le Croissant Doré", wins an award for the best croissant in the city.
Data/Internal/HR/CV ISABELLE LECLERC.docx
Data/Internal/HR/CV ALEXANDRE DURAND.docx
Data/Internal/HR/CV JEANNE D'ARC.docx
Data/Internal/HR/CV THOMAS LEFÈVRE.docx
Data/Internal/HR/CV LAURENT BERNARD.docx
Data/Internal/HR/CV SOPHIE MOREAU.docx
Data/Internal/HR/CV JEAN MASSART.docx
Data/Internal/HR/CV PIERRE LECLERC.docx
Data/Internal/HR/CV FRANÇOIS ROUSSEAU.docx
<outpu