In [411]:
from mistralai.client import MistralClient
from mistralai.models.chat_completion import ChatMessage
from dotenv import load_dotenv
from docx import Document
import os
import cohere
import json
import re

load_dotenv()  # take environment variables from .env.

True

In [412]:
def xml_retriever(xml_response, tag, default=None):
    """
    Retrieve the content between the specified XML tags.

    Args:
        xml_response (str): The XML response.
        tag (str): The XML tag to retrieve the content from.
        default (optional): The value to return if the tag is not found.

    Returns:
        str or default: The content between the specified XML tags or the default value.
    """
    try:
        start_tag = f'<{tag}>'
        end_tag = f'</{tag}>'
        start_index = xml_response.index(start_tag) + len(start_tag)
        end_index = xml_response.index(end_tag, start_index)
        return xml_response[start_index:end_index]
    except ValueError:
        return default

In [413]:
def load_company_knowledge():
    """
    Load company knowledge from company documents.

    Returns:
        str: The combined text from all company documents.
    """
    business_model = 'Data/Internal/Business Model de StIT.docx'
    long_term_strategy = 'Data/Internal/Plan de développement stratégique sur 8 ans pour StIT.docx'
    products_and_services = 'Data/Internal/Produits et services de StIT.docx'
    company_docs = [business_model, long_term_strategy, products_and_services]
    company_knowledge = ''

    for doc in company_docs:
        docx_document = Document(doc)
        paragraphs_text = ' '.join([paragraph.text for paragraph in docx_document.paragraphs])
        company_knowledge += paragraphs_text

    return company_knowledge

def create_prompt_analyst_agent(article, company_knowledge):
    """
    Create the prompt for the Mistral AI model.

    Args:
        article (str): The news article to analyze.
        company_knowledge (str): The company knowledge to include in the prompt.

    Returns:
        str: The prompt for the Mistral AI model.
    """
    prompt = f"""
                You are an experienced business analyst tasked with determining the priority level of news articles based on their relevance to your company, StIT.

                Here is some crucial information about the company to consider during your analysis:
                <company_knowledge>{company_knowledge}</company_knowledge>

                Please thoroughly read and analyze the following news article:

                <article>{article}</article>

                ...

                After completing your analysis, provide your final assessment in the <output> section, using the following format:

                <output>
                <priority_level>High OR Medium OR Low</priority_level>
                <justification>A detailed explanation of your priority rating, including how the article's main points and key details relate to the company's goals, operations, or industry, and the potential implications and impact of the article on the company</justification>
                <main_topic>A one-sentence summary highlighting the article's main topic<main_topic>
                </output>

                Remember, your goal is to help company management quickly identify and prioritize important news, so be sure to consider the key implications and potential impact of the article on the company in your priority rating and justification.
            """
    return prompt

def send_request_to_mistral_ai(model, messages):
    """
    Send a request to the Mistral AI model and return the response.

    Args:
        model (str): The Mistral AI model to use.
        messages (List[ChatMessage]): The messages to send in the request.

    Returns:
        str: The response from the Mistral AI model.
    """
    api_key = os.environ["MISTRAL_API_KEY"]
    if not api_key:
        raise ValueError("MISTRAL_API_KEY not found in environment variables.")
    client = MistralClient(api_key=api_key)

    try:
        chat_response = client.chat(
            model=model,
            messages=messages,
        )

        response = chat_response.choices[0].message.content
        return response
    except Exception as e:
        print(f"Error in Mistral AI request: {e}")
        return None

    return response

def news_analyst_agent(article):
    """
    Analyze the given news article and determine its priority level and main topic.

    Args:
        article (str): The news article to analyze.

    Returns:
        tuple: A tuple containing the priority level (str) and the main topic (str) of the article.
    """
    company_knowledge = load_company_knowledge()

    prompt = create_prompt_analyst_agent(article, company_knowledge)

    model = "mistral-large-latest"

    messages = [
        ChatMessage(role="system", content=prompt),
        ChatMessage(role="user", content="Content of the article : " + article)
    ]

    xml_response = send_request_to_mistral_ai(model, messages)

    priority_level = xml_retriever(xml_response, 'priority_level')
    justification = xml_retriever(xml_response, 'justification')
    main_topic = xml_retriever(xml_response, 'main_topic')
    
    return priority_level, justification, main_topic


In [414]:
# news_article1 = 'Data/External/News Articles/ New tax law in France aims to encourage and support the growth of startups and small businesses copy.docx'
# news_article2 = 'Data/External/News Articles/Local bakery in Paris wins award for best croissant in the city copy.docx'
# news_article3 = 'Data/External/News Articles/New survey finds that the majority of French people prefer to shop online rather than in-store copy.docx'
# news_article4 = 'Data/External/News Articles/Global economic recession expected to impact the tech sector copy.docx'
# news_article5 = 'Data/External/News Articles/Massive cyberattack exposes the vulnerabilities of businesses and organizations copy.docx'
# news_articles = [news_article1, news_article2, news_article3, news_article4, news_article5]
# for news_article in news_articles:
#     docx_document = Document(news_article)
#     paragraphs_text = ' '.join([paragraph.text for paragraph in docx_document.paragraphs])
#     print(news_analyst_agent(paragraphs_text))

In [415]:
def news_agent(article):
    """
    Analyze the given news articles and dispatch them to the appropriate agent.

    Args:
        articles (List[str]): The news articles to analyze.

    Returns:
        None
    """
    news_article1 = 'Data/External/News Articles/ New tax law in France aims to encourage and support the growth of startups and small businesses copy.docx'
    news_article2 = 'Data/External/News Articles/Local bakery in Paris wins award for best croissant in the city copy.docx'
    news_article3 = 'Data/External/News Articles/New survey finds that the majority of French people prefer to shop online rather than in-store copy.docx'
    news_article5 = 'Data/External/News Articles/Massive cyberattack exposes the vulnerabilities of businesses and organizations copy.docx'

    docx_document = Document(news_article5)
    paragraphs_text = ' '.join([paragraph.text for paragraph in docx_document.paragraphs])
    priority_level, justification, main_topic = news_analyst_agent(paragraphs_text)
    
    print("Priority Level: ", priority_level)
    print("Justification: ", justification)
    print("Main Topic: ", main_topic)
        
    dispatch_agent(main_topic, justification)

In [416]:
def dispatch_agent(main_topic, justification):
    """
    Determine the employee(s) who should be informed about the given topic and justification.

    Args:
        main_topic (str): The main topic of the news.
        justification (str): The justification for the priority level of the news.

    Returns:
        None
    """
    results = internal_retriever_agent(main_topic)

    to_re_rank = []
    for result in results:
        content_cv = ''
        docx_document = Document(result['fullpath'])
        paragraphs_text = ' '.join([paragraph.text for paragraph in docx_document.paragraphs])
        content_cv += paragraphs_text
        # The input string
        filename = result['fullpath']

        # The regular expression pattern to match the name
        pattern = r'/CV\s*(.+?)\.docx'

        # Search for the pattern in the input string
        match = re.search(pattern, filename)
        name = match.group(1)
        # The name is in between 'CV' and '.docx'
        to_re_rank.append("Name: "+name + " " + content_cv)

    """
    co = cohere.Client(os.environ["COHERE_API_KEY"])
    rerank_prompt = "Which of our employees should be informed about this matter ? The matter to be informed is about " + main_topic + justification
    
    response = co.rerank(
                    model="rerank-english-v3.0",
                    query=' '.join(rerank_prompt),
                    documents=to_re_rank,
                    top_n=7,
                )
    print("############")
    print("############")
    print("############")
    for el in response:
        print(el)
    """
    api_key = os.environ["MISTRAL_API_KEY"]
    model = "mistral-large-latest"

    client = MistralClient(api_key=api_key)
    prompt = """
                You are a senior executive at StIT, and you have been tasked with identifying the employee who should be informed about a specific matter based on their expertise and role within the company.
                Please thoroughly read and analyze the following matter:

                <matter>"""+main_topic + justification+"""</matter>
                Now read carefully the CVs of the following employees and rank them in order of relevance to the matter:
                <CVs>"""+str(to_re_rank)+"""</CVs>
                In the <output> section, write down the names of the 5 people who are the most relevant to contact for this matter. Carefully consider how the main points and key details of this matter. Relate to the provided CVs and job titles at StIT to select the relevant employees. 
                
                After completing your analysis, provide your final assessment in the <output> section, using the following format:

                <output>
                <employee1>NAME_EMPLOYEE1</employee1>
                <employee2>NAME_EMPLOYEE2</employee2>  
                <employee3>NAME_EMPLOYEE3</employee3>
                <employee4>NAME_EMPLOYEE4</employee4>
                <employee5>NAME_EMPLOYEE5</employee5>
                </output>

                If no Employee is relevant, please write "None".
                Remember, your goal is to help company management quickly identify and prioritize the employees to inform about the matter, so be sure to consider the key implications and potential impact of the matter on the company in your selection of relevant profiles to inform about it.

    """

    messages = [
        ChatMessage(role="system", content=prompt),
        ChatMessage(role="user", content="The matter : " + main_topic + justification)
    ]

    chat_response = client.chat(
        model=model,
        messages=messages,
    )
    print(chat_response.choices[0].message.content)
    return None

In [417]:
from OpenRAG.src.openrag.chunk_vectorization.chunk_vectorization import get_vectorizer
from OpenRAG.src.openrag.vectordb.milvus_adapter import init_milvus_connection
from pymilvus import Collection


def internal_retriever_agent(text, filter='HR'):
    """
    Retrieve internal information related to the given text.

    Args:
        text (str): The text to search for.
        filter (str, optional): The filter to apply to the search results. Defaults to 'HR'.

    Returns:
        list: A list of dictionaries containing the search results.
    """
    vectorizer = get_vectorizer('mistral')
    query_vector = vectorizer.vectorize(text)
    
    init_milvus_connection()
    
    collection_name = "mistral_collection"
    collection = Collection(name=collection_name)
    
    n_neighbors = 20
    results = collection.search([query_vector], "vector", param={"metric_type": "L2", "params":{}}, limit=n_neighbors, expr="source == '"+filter+"'")
    
    final_indices = []
    for result in results[0]:
        if result.id not in final_indices and len(final_indices) + 1 <= n_neighbors:
            final_indices.append([result.id, result.distance])
            prev_index = result.id - 1
            if prev_index >= 0 and prev_index not in final_indices and len(final_indices) + 1 <= n_neighbors and filter != 'HR':
                final_indices.append([prev_index, result.distance])
            next_index = result.id + 1
            if next_index not in final_indices and len(final_indices) + 1 <= n_neighbors and filter != 'HR':
                final_indices.append([next_index, result.distance])

    results = final_indices
    
    answer_chunks = []
    unique_chunks = []
    answer_chunks2 = []
    for hit in results:
        answer_chunk = find_chunks(hit[0])
        if answer_chunk['document'] in unique_chunks and filter == 'HR':
            continue
        unique_chunks.append(answer_chunk['document'])
        answer_chunks.append(answer_chunk['text'])
        answer_chunks2.append(answer_chunk)
        print(find_chunks(hit[0]))
        print(find_chunks(hit[0])['fullpath'])
    
    return answer_chunks2

In [418]:
def find_chunks(id, path = "Data/Internal/HR/"):
    """
    Find the chunk based on the given id.

    Args:
        id (int): The id of the chunk to find.
        path (str, optional): The path to the chunk files. Defaults to "Data/Internal/HR/".

    Returns:
        dict: A dictionary containing the chunk details, or None if the chunk is not found.
    """
    global_indexing = json.load(open("global_indexing.json", "r"))
    for key, value in global_indexing.items():
        start_idx = value["start"]
        end_idx = value["end"]
        if start_idx <= id <= end_idx:
            index_in_file = id - start_idx
            data_dict_file = json.load(open(path + key + "_chunks.json", "r"))
            data_dict_file["chunk_" + str(index_in_file)]["document"] = key + ".docx"
            data_dict_file["chunk_" + str(index_in_file)]["fullpath"] = path + key + ".docx"
            return data_dict_file["chunk_" + str(index_in_file)]
    return None

In [419]:
if __name__ == "__main__":
    news_agent("test")

Priority Level:  High
Justification:  The article describes a massive cyberattack that has affected a wide range of businesses and organizations, including those in the finance, health, retail, and education sectors, which are also the target segments of StIT. The cyberattack has resulted in the theft and destruction of sensitive information, highlighting the vulnerabilities and weaknesses of businesses in the area of cybersecurity. This article is highly relevant to StIT as it emphasizes the importance of a strong cybersecurity framework, which is a key aspect of StIT's value proposition to its clients. The potential implications of this article for StIT include the need to review and possibly enhance its own cybersecurity measures, as well as those of its clients, to prevent similar attacks in the future. The article also underscores the importance of regular cybersecurity assessments, training, and collaborations, which could be potential areas for StIT to offer additional services 