In [1]:
import openai
import os
import nest_asyncio
from dotenv import load_dotenv
from openai import OpenAI

# Load environment variables from .env file
load_dotenv()

# Allow for nested asyncio event loops
nest_asyncio.apply()

# Set up OpenAI API client
client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))

In [2]:
import os
from pprint import pprint 

# Define the directory to read
directory = r'D:\github-repo-tkhongsap\editor-in-chief\docs'

# List to store the file names
articles = []

# Iterate through the directory and collect .txt files
for filename in os.listdir(directory):
    if filename.endswith('.txt'):
        articles.append(filename)

# Sort the list to ensure the dates are in order (optional)
articles.sort()

pprint(articles)


['article_2024-08-18.txt',
 'article_2024-08-19.txt',
 'article_2024-08-20.txt',
 'article_2024-08-21.txt',
 'article_2024-08-22.txt',
 'article_2024-08-23.txt',
 'article_2024-08-24.txt']


In [5]:
from utils.get_doc_tools import get_doc_tools
from pathlib import Path

# Define the directory containing the documents
docs_directory = Path(r"D:\github-repo-tkhongsap\editor-in-chief\docs")

# List all files in the directory (assuming articles is a list of filenames)
articles = [file for file in docs_directory.iterdir() if file.is_file()]

paper_to_tools_dict = {}

for article in articles:
    print(f"Getting tools for article: {article}")
    
    # Pass the full file path to get_doc_tools
    vector_query_tool, summary_tool = get_doc_tools(str(article), article.stem)
    paper_to_tools_dict[article] = [vector_query_tool, summary_tool]


Getting tools for article: D:\github-repo-tkhongsap\editor-in-chief\docs\article_2024-08-18.txt
Getting tools for article: D:\github-repo-tkhongsap\editor-in-chief\docs\article_2024-08-19.txt
Getting tools for article: D:\github-repo-tkhongsap\editor-in-chief\docs\article_2024-08-20.txt
Getting tools for article: D:\github-repo-tkhongsap\editor-in-chief\docs\article_2024-08-21.txt
Getting tools for article: D:\github-repo-tkhongsap\editor-in-chief\docs\article_2024-08-22.txt
Getting tools for article: D:\github-repo-tkhongsap\editor-in-chief\docs\article_2024-08-23.txt
Getting tools for article: D:\github-repo-tkhongsap\editor-in-chief\docs\article_2024-08-24.txt


In [6]:
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding

# Initialize the LLM with model name, temperature, and max tokens
llm = OpenAI(model="gpt-4o")

embed_model = OpenAIEmbedding(model="text-embedding-3-large")

# Define the directory containing the documents
all_tools = [t for article in articles for t in paper_to_tools_dict[article]]
len(all_tools)

14

In [7]:
# define an "object" index and retriever over these tools
# we will performm index on the tools and retrieve relevant tools first. 
from llama_index.core import VectorStoreIndex
from llama_index.core.objects import ObjectIndex

obj_index = ObjectIndex.from_objects(
    all_tools,
    index_cls=VectorStoreIndex,
)

In [8]:
obj_retriever = obj_index.as_retriever(similarity_top_k=7)

In [10]:
tools = obj_retriever.retrieve(
    "Any thing related to politics?"
)

from pprint import pprint

ans = tools[4].metadata
pprint(ans)

ToolMetadata(description='Use ONLY IF you want to get a holistic summary of '
                         'the document. Do NOT use if you have specific '
                         'questions over the document.',
             name='summary_tool_article_2024-08-20',
             fn_schema=<class 'llama_index.core.tools.types.DefaultToolFnSchema'>,
             return_direct=False)


In [11]:
from llama_index.core.agent import FunctionCallingAgentWorker
from llama_index.core.agent import AgentRunner

agent_worker = FunctionCallingAgentWorker.from_tools(
    tool_retriever=obj_retriever,
    llm=llm, 
    system_prompt=""" \
You are an AI agent tasked with answering queries based on a set of provided news articles. 
Always utilize the tools provided to generate your responses. Answer queries in the same language as the user's input. 
Avoid relying on prior knowledge or external information not found within the provided tools.\

""",
    verbose=True
)
agent = AgentRunner(agent_worker)

In [12]:
response = agent.query(
    "Tell me about the news on 24th of August? Any thing related to politics?"
)

print(str(response))

Added user message to memory: Tell me about the news on 24th of August? Any thing related to politics?
=== Calling Function ===
Calling function: summary_tool_article_2024-08-24 with args: {"input": "general"}
=== Function Output ===
The information provided covers a range of topics including government responses to natural disasters, support for small and medium enterprises (SMEs), initiatives in various industries like telecommunications and tourism, digital asset security, and challenges faced by companies like 7-Eleven in Japan. The content emphasizes the importance of community well-being, economic growth, business collaborations, and the need for companies to adapt to changing dynamics to remain competitive in the global market.
=== Calling Function ===
Calling function: summary_tool_article_2024-08-24 with args: {"input": "politics"}
=== Function Output ===
The information provided does not contain any details related to politics.
=== LLM Response ===
On the 24th of August, the 

In [13]:
response = agent.query(
    "what is the trend of the political news since last week? how is the sentiment over the week?  "
)

Added user message to memory: what is the trend of the political news since last week? how is the sentiment over the week?  
=== Calling Function ===
Calling function: summary_tool_article_2024-08-18 with args: {"input": "political news"}
=== Function Output ===
The political news covers recent developments such as the appointment of a new Prime Minister, the formation of the new cabinet, potential changes in ministerial positions within different political parties, discussions on the allocation of ministerial seats among various parties, and speculation about personnel changes within the government. There are also reports on the legality of party branch formations, the impact on the upcoming election of the Prime Minister, and the importance of adhering to legal processes in decision-making. Additionally, there are discussions on the roles of individuals in the government, the continuity of policies, and the need for collaboration within the ruling coalition.
=== Calling Function ===


In [15]:
print(type(response))

<class 'llama_index.core.base.response.schema.Response'>


In [13]:
pprint(str(response))

('Over the past week, the political news has covered a wide range of topics, '
 'including:\n'
 '\n'
 '1. **Government Formation and Cabinet Appointments**: There has been '
 'significant focus on the appointment of a new Prime Minister and the '
 'formation of the new cabinet. Discussions have revolved around the '
 'allocation of ministerial seats among parties, adjustments in ministerial '
 'roles, and speculation about specific individuals who may be appointed to '
 'key positions.\n'
 '\n'
 '2. **Policy and Economic Discussions**: Topics such as government budget '
 'allocation, economic stimulus measures, digital wallet projects, and policy '
 'adjustments have been prominent. There have been discussions on economic '
 'strategies, societal policies, and the involvement of key political figures '
 'in shaping economic strategies.\n'
 '\n'
 '3. **Party Dynamics and Ethical Conduct**: Internal party issues, '
 'disciplinary actions, ethical investigations, and the dissolution of '


In [39]:
from llama_index.core.agent import FunctionCallingAgentWorker
from llama_index.core.agent import AgentRunner

# Initialize the agent worker and agent runner
agent_worker = FunctionCallingAgentWorker.from_tools(
    tool_retriever=obj_retriever,
    llm=llm, 
    system_prompt=""" \
You are an AI agent tasked with answering queries based on a set of provided news articles. 
Always utilize the tools provided to generate your responses. Answer queries in the same language as the user's input. 
Avoid relying on prior knowledge or external information not found within the provided tools.
""",
    verbose=False
)
agent = AgentRunner(agent_worker)

# Start the loop to continuously ask questions
while True:
    # Get the user query
    user_input = input("Ask a question (or type 'exit' to quit): ")
    
    # Break the loop if the user wants to exit
    if user_input.lower() == "exit":
        print("Exiting the query loop.")
        break
    
    # Get the agent's response
    response = agent.query(user_input)
    
    # Print the response
    print("Agent response:", response)


Agent response: I can provide summaries for news articles from the following dates:

1. August 18, 2024
2. August 19, 2024
3. August 20, 2024
4. August 21, 2024
5. August 22, 2024
6. August 23, 2024
7. August 24, 2024

If you need information or summaries from any of these dates, please let me know!
Agent response: Aquí tienes los puntos destacados de las noticias para cada día:

- **18 de agosto de 2024**: Anutin Shinawatra expresó su apoyo al nuevo Primer Ministro y destacó la importancia del apoyo continuo. También mencionó que la formación del gabinete es responsabilidad exclusiva del Primer Ministro. Además, se especula sobre posibles cambios en la alineación del gabinete y la asignación de puestos ministeriales entre diferentes partidos políticos.

- **19 de agosto de 2024**: Los artículos abarcan una amplia gama de temas, incluyendo patrones de envejecimiento, registros de vehículos, desarrollos políticos y políticas gubernamentales en Tailandia. El gobierno enfrenta escrutinio 