In [366]:
from mistralai.client import MistralClient
from mistralai.models.chat_completion import ChatMessage
import os
import cohere
import json
import re
from dotenv import load_dotenv

from docx import Document

load_dotenv()  # take environment variables from .env.

True

In [367]:
def xml_retriever(xml_response, tag):
    return xml_response.split('<'+tag+'>')[1].split('</'+tag+'>')[0]

In [368]:
# Agent for News Analysis
# Returns:
# 1. The significance level of the news (High, Medium, Low)
# 2. The main topic of the article
def news_analyst_agent(article):
    # TODO : Change the prompt
    business_model = 'Data/Internal/Business Model de StIT.docx'
    long_term_strategy = 'Data/Internal/Plan de développement stratégique sur 8 ans pour StIT.docx'
    products_and_services = 'Data/Internal/Produits et services de StIT.docx'
    company_docs = [business_model, long_term_strategy, products_and_services]
    company_knowledge = ''
    for doc in company_docs:
        docx_document = Document(doc)
        paragraphs_text = ' '.join([paragraph.text for paragraph in docx_document.paragraphs])
        company_knowledge += paragraphs_text
    prompt = """
                You are an experienced business analyst tasked with determining the priority level of news articles based on their relevance to your company, StIT.

                Here is some crucial information about the company to consider during your analysis:
                <company_knowledge>"""+company_knowledge +"""</company_knowledge>

                Please thoroughly read and analyze the following news article:

                <article>"""+article+"""</article>

                In the <output> section, document your thoughts on the article's relevance to the company. Carefully consider how the main points and key details in the article relate to the provided company knowledge. Here are some guidelines on how to rank the priority level of the article:

                * High priority: The article contains information that is highly relevant to the company's goals, operations, or industry, and has the potential to significantly impact the company in the near term. The article may discuss a major new development, a significant change in the regulatory or competitive landscape, or a major opportunity or threat for the company. The company's management and other stakeholders should be alerted to the article and its implications as soon as possible.
                * Medium priority: The article contains information that is moderately relevant to the company's goals, operations, or industry, and has the potential to impact the company in the medium term. The article may discuss a new trend or development that is likely to affect the company's business, a change in the competitive landscape that is likely to have a moderate impact, or an opportunity or threat that is not yet fully formed. The company's management and other stakeholders should be aware of the article and its implications, and may want to monitor the situation or take some initial actions to address it.
                * Low priority: The article contains information that is not highly relevant to the company's goals, operations, or industry, or is unlikely to have a significant impact on the company. The article may discuss a development that is not directly related to the company's business, a change in the competitive landscape that is unlikely to have a significant impact, or an opportunity or threat that is unlikely to materialize. The company's management and other stakeholders may want to be aware of the article, but it is not necessary to take any immediate actions to address it.

                After completing your analysis, provide your final assessment in the <output> section, using the following format:

                <output>
                <priority_level>High OR Medium OR Low</priority_level>
                <justification>A detailed explanation of your priority rating, including how the article's main points and key details relate to the company's goals, operations, or industry, and the potential implications and impact of the article on the company</justification>
                <main_topic>A one-sentence summary highlighting the article's main topic<main_topic>
                </output>

                Remember, your goal is to help company management quickly identify and prioritize important news, so be sure to consider the key implications and potential impact of the article on the company in your priority rating and justification.
            """
    api_key = "M5aTzx5CYlozf6qywo77peiSahoeS6ja"
    model = "mistral-large-latest"

    client = MistralClient(api_key=api_key)

    messages = [
        ChatMessage(role="system", content=prompt),
        ChatMessage(role="user", content="Content of the article : " + article)
    ]
    
    chat_response = client.chat(
        model=model,
        messages=messages,
    )
    
    xml_response = chat_response.choices[0].message.content
    priority_level = xml_retriever(xml_response, 'priority_level')
    justification = xml_retriever(xml_response, 'justification')
    main_topic = xml_retriever(xml_response, 'main_topic')
    
    return priority_level, justification, main_topic


In [369]:
# news_article1 = 'Data/External/News Articles/ New tax law in France aims to encourage and support the growth of startups and small businesses copy.docx'
# news_article2 = 'Data/External/News Articles/Local bakery in Paris wins award for best croissant in the city copy.docx'
# news_article3 = 'Data/External/News Articles/New survey finds that the majority of French people prefer to shop online rather than in-store copy.docx'
# news_article4 = 'Data/External/News Articles/Global economic recession expected to impact the tech sector copy.docx'
# news_article5 = 'Data/External/News Articles/Massive cyberattack exposes the vulnerabilities of businesses and organizations copy.docx'
# news_articles = [news_article1, news_article2, news_article3, news_article4, news_article5]
# for news_article in news_articles:
#     docx_document = Document(news_article)
#     paragraphs_text = ' '.join([paragraph.text for paragraph in docx_document.paragraphs])
#     print(news_analyst_agent(paragraphs_text))

In [370]:
def news_agent(article):
    news_article1 = 'Data/External/News Articles/ New tax law in France aims to encourage and support the growth of startups and small businesses copy.docx'
    news_article2 = 'Data/External/News Articles/Local bakery in Paris wins award for best croissant in the city copy.docx'
    news_article3 = 'Data/External/News Articles/New survey finds that the majority of French people prefer to shop online rather than in-store copy.docx'
    docx_document = Document(news_article3)
    paragraphs_text = ' '.join([paragraph.text for paragraph in docx_document.paragraphs])
    priority_level, justification, main_topic = news_analyst_agent(paragraphs_text)
    
    print("Priority Level: ", priority_level)
    print("Justification: ", justification)
    print("Main Topic: ", main_topic)
        
    dispatch_agent(main_topic, justification)

In [371]:
import json

# Agent that tells the user responsible to the text given
def dispatch_agent(main_topic, justification):
    # Now go look for the attribute full_path of each response, then rerank the full CVs.
    results = internal_retriever_agent(main_topic)

    to_re_rank = []
    for result in results:
        content_cv = ''
        docx_document = Document(result['fullpath'])
        paragraphs_text = ' '.join([paragraph.text for paragraph in docx_document.paragraphs])
        content_cv += paragraphs_text
        # The input string
        filename = result['fullpath']

        # The regular expression pattern to match the name
        pattern = r'/CV\s*(.+?)\.docx'

        # Search for the pattern in the input string
        match = re.search(pattern, filename)
        name = match.group(1)
        # The name is in between 'CV' and '.docx'
        to_re_rank.append("Name: "+name + " " + content_cv)

    """
    co = cohere.Client(os.environ["COHERE_API_KEY"])
    rerank_prompt = "Which of our employees should be informed about this matter ? The matter to be informed is about " + main_topic + justification
    
    response = co.rerank(
                    model="rerank-english-v3.0",
                    query=' '.join(rerank_prompt),
                    documents=to_re_rank,
                    top_n=7,
                )
    print("############")
    print("############")
    print("############")
    for el in response:
        print(el)
    """
    api_key = os.environ["MISTRAL_API_KEY"]
    model = "mistral-large-latest"

    client = MistralClient(api_key=api_key)
    prompt = """
                You are a senior executive at StIT, and you have been tasked with identifying the employee who should be informed about a specific matter based on their expertise and role within the company.
                Please thoroughly read and analyze the following matter:

                <matter>"""+main_topic + justification+"""</matter>
                Now read carefully the CVs of the following employees and rank them in order of relevance to the matter:
                <CVs>"""+str(to_re_rank)+"""</CVs>
                In the <output> section, write down the names of the 5 people who are the most relevant to contact for this matter. Carefully consider how the main points and key details of this matter. Relate to the provided CVs and job titles at StIT to select the relevant employees. 
                
                After completing your analysis, provide your final assessment in the <output> section, using the following format:

                <output>
                <employee1>NAME_EMPLOYEE1</employee1>
                <employee2>NAME_EMPLOYEE2</employee2>  
                <employee3>NAME_EMPLOYEE3</employee3>
                <employee4>NAME_EMPLOYEE4</employee4>
                <employee5>NAME_EMPLOYEE5</employee5>
                </output>

                If no Employee is relevant, please write "None".
                Remember, your goal is to help company management quickly identify and prioritize the employees to inform about the matter, so be sure to consider the key implications and potential impact of the matter on the company in your selection of relevant profiles to inform about it.

    """

    messages = [
        ChatMessage(role="system", content=prompt),
        ChatMessage(role="user", content="The matter : " + main_topic + justification)
    ]

    chat_response = client.chat(
        model=model,
        messages=messages,
    )
    print(chat_response.choices[0].message.content)
    return None

In [372]:
from OpenRAG.src.openrag.chunk_vectorization.chunk_vectorization import get_vectorizer
from OpenRAG.src.openrag.vectordb.milvus_adapter import init_milvus_connection
from pymilvus import Collection

# Agent to retrieve internal information
# Returns:
# 1. Several contexts linked to the provided text
def internal_retriever_agent(text, filter='HR'):
    vectorizer = get_vectorizer('mistral')
    query_vector = vectorizer.vectorize(text)
    
    init_milvus_connection()
    
    collection_name = "mistral_collection"
    collection = Collection(name=collection_name)
    
    n_neighbors = 20
    results = collection.search([query_vector], "vector", param={"metric_type": "L2", "params":{}}, limit=n_neighbors, expr="source == '"+filter+"'")
    
    final_indices = []
    for result in results[0]:
        if result.id not in final_indices and len(final_indices) + 1 <= n_neighbors:
            final_indices.append([result.id, result.distance])
            prev_index = result.id - 1
            if prev_index >= 0 and prev_index not in final_indices and len(final_indices) + 1 <= n_neighbors and filter != 'HR':
                final_indices.append([prev_index, result.distance])
            next_index = result.id + 1
            if next_index not in final_indices and len(final_indices) + 1 <= n_neighbors and filter != 'HR':
                final_indices.append([next_index, result.distance])

    results = final_indices
    
    answer_chunks = []
    unique_chunks = []
    answer_chunks2 = []
    for hit in results:
        answer_chunk = find_chunks(hit[0])
        if answer_chunk['document'] in unique_chunks and filter == 'HR':
            continue
        unique_chunks.append(answer_chunk['document'])
        answer_chunks.append(answer_chunk['text'])
        answer_chunks2.append(answer_chunk)
        print(find_chunks(hit[0]))
        print(find_chunks(hit[0])['fullpath'])
    
    return answer_chunks2

In [373]:
def find_chunks(id, path = "Data/Internal/HR/"):
    """
    Find the chunk based on the given id.
    """
    global_indexing = json.load(open("global_indexing.json", "r"))
    for key, value in global_indexing.items():
        start_idx = value["start"]
        end_idx = value["end"]
        if start_idx <= id <= end_idx:
            index_in_file = id - start_idx
            data_dict_file = json.load(open(path + key + "_chunks.json", "r"))
            data_dict_file["chunk_" + str(index_in_file)]["document"] = key + ".docx"
            data_dict_file["chunk_" + str(index_in_file)]["fullpath"] = path + key + ".docx"
            return data_dict_file["chunk_" + str(index_in_file)]
    return None

In [374]:
news_agent("test")

Priority Level:  Medium
Justification:  The article discusses a trend that is likely to affect StIT's business in the medium term, as the preference for online shopping among French people continues to increase. This trend could impact StIT's retail clients and potentially create new opportunities for the company to develop and offer e-commerce and digital marketing solutions. The article also highlights the need for a more proactive, innovative, and responsible approach to e-commerce and digital marketing, which aligns with StIT's focus on innovation and commitment to protecting its clients' data and information. However, the article does not discuss any major new developments or significant changes in the regulatory or competitive landscape that would require immediate attention from StIT's management and other stakeholders.
Main Topic:  A new survey finds that the majority of French people prefer to shop online rather than in-store, and this trend is expected to continue and increas