In [29]:
from mistralai.client import MistralClient
from mistralai.models.chat_completion import ChatMessage
import os
from dotenv import load_dotenv

from docx import Document

load_dotenv()  # take environment variables from .env.

In [30]:
def xml_retriever(xml_response, tag):
    return xml_response.split('<'+tag+'>')[1].split('</'+tag+'>')[0]

In [31]:
# Agent for News Analysis
# Returns:
# 1. The significance level of the news (High, Medium, Low)
# 2. The main topic of the article
def news_analyst_agent(article):
    # TODO : Change the prompt
    business_model = 'Data/Internal/Company StIT/Business Model de StIT.docx'
    long_term_strategy = 'Data/Internal/Company StIT/Plan de développement stratégique sur 8 ans pour StIT.docx'
    products_and_services = 'Data/Internal/Company StIT/Produits et services de StIT.docx'
    company_docs = [business_model, long_term_strategy, products_and_services]
    company_knowledge = ''
    for doc in company_docs:
        docx_document = Document(doc)
        paragraphs_text = ' '.join([paragraph.text for paragraph in docx_document.paragraphs])
        company_knowledge += paragraphs_text
    prompt = """
                You are an experienced business analyst tasked with determining the priority level of news articles based on their relevance to your company, StIT.

                Here is some crucial information about the company to consider during your analysis:
                <company_knowledge>"""+company_knowledge +"""</company_knowledge>

                Please thoroughly read and analyze the following news article:

                <article>"""+article+"""</article>

                In the <output> section, document your thoughts on the article's relevance to the company. Carefully consider how the main points and key details in the article relate to the provided company knowledge. Here are some guidelines on how to rank the priority level of the article:

                * High priority: The article contains information that is highly relevant to the company's goals, operations, or industry, and has the potential to significantly impact the company in the near term. The article may discuss a major new development, a significant change in the regulatory or competitive landscape, or a major opportunity or threat for the company. The company's management and other stakeholders should be alerted to the article and its implications as soon as possible.
                * Medium priority: The article contains information that is moderately relevant to the company's goals, operations, or industry, and has the potential to impact the company in the medium term. The article may discuss a new trend or development that is likely to affect the company's business, a change in the competitive landscape that is likely to have a moderate impact, or an opportunity or threat that is not yet fully formed. The company's management and other stakeholders should be aware of the article and its implications, and may want to monitor the situation or take some initial actions to address it.
                * Low priority: The article contains information that is not highly relevant to the company's goals, operations, or industry, or is unlikely to have a significant impact on the company. The article may discuss a development that is not directly related to the company's business, a change in the competitive landscape that is unlikely to have a significant impact, or an opportunity or threat that is unlikely to materialize. The company's management and other stakeholders may want to be aware of the article, but it is not necessary to take any immediate actions to address it.

                After completing your analysis, provide your final assessment in the <output> section, using the following format:

                <output>
                <priority_level>High OR Medium OR Low</priority_level>
                <justification>A detailed explanation of your priority rating, including how the article's main points and key details relate to the company's goals, operations, or industry, and the potential implications and impact of the article on the company</justification>
                <main_topic>A one-sentence summary highlighting the article's main topic<main_topic>
                </output>

                Remember, your goal is to help company management quickly identify and prioritize important news, so be sure to consider the key implications and potential impact of the article on the company in your priority rating and justification.
            """
    api_key = "M5aTzx5CYlozf6qywo77peiSahoeS6ja"
    model = "mistral-large-latest"

    client = MistralClient(api_key=api_key)

    messages = [
        ChatMessage(role="system", content=prompt),
        ChatMessage(role="user", content="Content of the article : " + article)
    ]
    
    chat_response = client.chat(
        model=model,
        messages=messages,
    )
    
    xml_response = chat_response.choices[0].message.content
    priority_level = xml_retriever(xml_response, 'priority_level')
    justification = xml_retriever(xml_response, 'justification')
    main_topic = xml_retriever(xml_response, 'main_topic')
    
    return priority_level, justification, main_topic


In [32]:
news_article1 = 'Data/External/News Articles/ New tax law in France aims to encourage and support the growth of startups and small businesses copy.docx'
news_article2 = 'Data/External/News Articles/Local bakery in Paris wins award for best croissant in the city copy.docx'
news_article3 = 'Data/External/News Articles/New survey finds that the majority of French people prefer to shop online rather than in-store copy.docx'
news_article4 = 'Data/External/News Articles/Global economic recession expected to impact the tech sector copy.docx'
news_article5 = 'Data/External/News Articles/Massive cyberattack exposes the vulnerabilities of businesses and organizations copy.docx'
news_articles = [news_article1, news_article2, news_article3, news_article4, news_article5]
for news_article in news_articles:
    docx_document = Document(news_article)
    paragraphs_text = ' '.join([paragraph.text for paragraph in docx_document.paragraphs])
    print(news_analyst_agent(paragraphs_text))

('Medium', "The article discusses a new tax law in France aimed at encouraging and supporting the growth of startups and small businesses. As StIT is a technology company based in France, this tax law could potentially benefit the company, particularly if it is considered a startup or SME. The proposed reductions in corporate income tax and value-added tax (VAT) could improve StIT's financial position and cash flow. Additionally, the exemption from social security contributions for hiring new employees could reduce costs and encourage expansion. However, the article does not provide specific details on whether StIT meets the criteria for these benefits, and the law is still pending approval. Therefore, while the article is relevant and could have a positive impact on StIT, it is not an immediate high priority until more information is available.", 'The French government has proposed a new tax law aimed at supporting the growth of startups and small businesses through various tax incent

In [33]:
def news_agent(article):
    news_article1 = 'Data/External/News Articles/ New tax law in France aims to encourage and support the growth of startups and small businesses copy.docx'
    
    docx_document = Document(news_article1)
    paragraphs_text = ' '.join([paragraph.text for paragraph in docx_document.paragraphs])
    priority_level, justification, main_topic = news_analyst_agent(paragraphs_text)
    
    print("Priority Level: ", priority_level)
    print("Justification: ", justification)
    print("Main Topic: ", main_topic)
        
    dispatch_agent(main_topic)

In [34]:
import json

# Agent that tells the user responsible to the text given
def dispatch_agent(text):
    results = internal_retriever_agent(text)
    
    return None

In [35]:
from OpenRAG.src.openrag.chunk_vectorization.chunk_vectorization import get_vectorizer
from OpenRAG.src.openrag.vectordb.milvus_adapter import init_milvus_connection
from pymilvus import Collection

# Agent to retrieve internal information
# Returns:
# 1. Several contexts linked to the provided text
def internal_retriever_agent(text):
    vectorizer = get_vectorizer('mistral')
    query_vector = vectorizer.vectorize(text)
    
    init_milvus_connection()
    
    collection_name = "mistral_collection"
    collection = Collection(name=collection_name)
    
    n_neighbors = 10
    results = collection.search([query_vector], "vector", param={"metric_type": "L2", "params":{}}, limit=n_neighbors, expr="source == 'Internal'")
    
    final_indices = []
    for result in results[0]:
        if result.id not in final_indices and len(final_indices) + 1 <= n_neighbors:
            final_indices.append([result.id, result.distance])
            prev_index = result.id - 1
            if prev_index >= 0 and prev_index not in final_indices and len(final_indices) + 1 <= n_neighbors:
                final_indices.append([prev_index, result.distance])
            next_index = result.id + 1
            if next_index not in final_indices and len(final_indices) + 1 <= n_neighbors:
                final_indices.append([next_index, result.distance])

    results = final_indices
    
    answer_chunks = []
    for hit in results:
        print(find_chunks(hit[0]))
        answer_chunk = find_chunks(hit[0])
        answer_chunks.append(answer_chunk['text'])
    
    return answer_chunks

In [36]:
def find_chunks(id, path = "Data/Internal/Company StIT/"):
    """
    Find the chunk based on the given id.
    """
    global_indexing = json.load(open("global_indexing.json", "r"))
    for key, value in global_indexing.items():
        start_idx = value["start"]
        end_idx = value["end"]
        if start_idx <= id <= end_idx:
            index_in_file = id - start_idx
            data_dict_file = json.load(open(path + key + "_chunks.json", "r"))
            data_dict_file["chunk_" + str(index_in_file)]["document"] = key + ".docx"
            return data_dict_file["chunk_" + str(index_in_file)]
    return None

In [37]:
# Function scanning the response from the LLM for a specific action call
def scan_response(response):
    # TODO : Implement the different cases for the different actions and run the corresponding agent
    pass

In [38]:
news_agent("test")

Priority Level:  Medium
Justification:  The article discusses a new tax law proposal in France that aims to encourage and support the growth of startups and small businesses. As StIT is a technology company based in France, this tax law could potentially benefit the company if it is approved and enacted. The tax incentives, benefits, and reliefs mentioned in the article, such as a reduction in corporate income tax and value-added tax (VAT) rates, could positively impact StIT's financial performance and competitiveness. Additionally, the exemption from social security contributions for hiring new employees could encourage StIT to expand its workforce and invest more in research and development. However, it is important to note that the tax law is not yet enacted and its impact on StIT will depend on the final details and the company's eligibility for the tax benefits. Therefore, the priority level for this article is medium, and the company's management should monitor the progress of th