# LlamaIndex Agent Testing Notebook

This notebook requires Python 3.11.11


## Setup env

In [None]:
from llama_index.llms.ollama import Ollama
from llama_index.core import Settings
from langchain_community.llms import Ollama as OllamaLLM
import os
from typing import Optional
import pandas as pd

import boto3


## Setup boto3 connection

In [9]:
AWS_ACCESS_KEY_ID="#TODO"
AWS_SECRET_ACCESS_KEY="#TODO"
AWS_SESSION_TOKEN="#TODO"

In [None]:
session_dev = boto3.Session(
    aws_access_key_id=AWS_ACCESS_KEY_ID,
    aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
    aws_session_token=AWS_SESSION_TOKEN,
    region_name="eu-west-1",
)
sagemaker_client = session_dev.client('sagemaker')
s3_client = session_dev.client('s3')
cloudwatch_client = session_dev.client('logs')


## Setup ollama usage

In [2]:
llm2 = OllamaLLM(model="llama3.1:8b")
llm = Ollama(model="llama3.1:8b", request_timeout=120.0)
# llm = BedrockLLM(
#     model="llama3.1:8b",
#     region_name="eu-west-1",
#     endpoint_name="llama3.1:8b",
#     sagemaker_client=sagemaker_client,
#     s3_client=s3_client,
#     cloudwatch_client=cloudwatch_client,
#     request_timeout=120.0,
# )
Settings.llm = llm

  llm2 = OllamaLLM(model="llama3.1:8b")


## Define tools

In [3]:
def find_latest_path(market:str, channel:str) -> Optional[str]:
    """
    Find the latest inference data 
    """
    s3_client = session_dev.client('s3')
    
    paginator = s3_client.get_paginator('list_objects_v2')
    latest_folder = None
    latest_modified = None
    bucket = "dynamic-pricing-inference-616469646173-dev"
    for page in paginator.paginate(Bucket=bucket, Prefix=f'dev/{market}/{channel}/', Delimiter='/'):
        for cp in page.get('CommonPrefixes', []):
            folder = cp['Prefix']
            # List objects in the folder to get the latest modification time
            objects = s3_client.list_objects_v2(Bucket=bucket, Prefix=folder)
            if 'Contents' in objects:
                folder_latest = max(obj['LastModified'] for obj in objects['Contents'])
                if latest_modified is None or folder_latest > latest_modified:
                    latest_modified = folder_latest
                    latest_folder = folder

    latest_modified = None
    for page in paginator.paginate(Bucket=bucket, Prefix=latest_folder, Delimiter='/'):
        for cp in page.get('CommonPrefixes', []):
            folder = cp['Prefix']
            # List objects in the folder to get the latest modification time
            objects = s3_client.list_objects_v2(Bucket=bucket, Prefix=folder)
            if 'Contents' in objects:
                folder_latest = max(obj['LastModified'] for obj in objects['Contents'])
                if latest_modified is None or folder_latest > latest_modified:
                    latest_modified = folder_latest
                    latest_folder = folder

    return "s3://dynamic-pricing-inference-616469646173-dev/"+latest_folder




In [4]:
def query_data_give_market_channel(market: str, channel:str, query:str) -> Optional[str]:
    """
    get a demand forecast for a given market, channel and article, discount
    """

    # Download all files in the S3 folder to a local directory
    temp_folder = f'./temp_folder_latest_{market}_{channel}'
    if not os.path.exists(temp_folder):

        s3_client = session_dev.client('s3')
        path = find_latest_path(market, channel)+"output/total/"

        if path is None:
            print(f"No path found for market {market} and channel {channel}")
            return None
        else:
            # Remove "s3://" and split bucket/key
            s3_path = path.replace("s3://", "")
            bucket, key = s3_path.split("/", 1)            
            if not os.path.exists(temp_folder):
                os.makedirs(temp_folder)
            paginator = s3_client.get_paginator('list_objects_v2')
            for page in paginator.paginate(Bucket=bucket, Prefix=key):
                for obj in page.get('Contents', []):
                    file_key = obj['Key']
                    file_name = os.path.basename(file_key)
                    if file_name:  # skip if it's a folder
                        local_path = os.path.join(temp_folder, file_name)
                        s3_client.download_file(bucket, file_key, local_path)
    
    data = pd.read_parquet(temp_folder)
    # make data smaller 
    data = data[data['country']=='DE']
    
    df = SmartDataframe(data, config={"llm": llm2, "verbose": True})
        
    return df.chat(query)

    

In [9]:
query_data_give_market_channel("EU", "inline", "what is the sum of pred_combined_sales for IF8079 at different discount level?")

2025-05-16 16:24:39 [INFO] Question: what is the sum of pred_combined_sales for IF8079 at different discount level?
2025-05-16 16:24:39 [INFO] Running PandasAI with langchain_ollama-llm LLM...
2025-05-16 16:24:39 [INFO] Prompt ID: 4e2a7c52-f611-4dc5-9594-3f0d6e04a2fb
2025-05-16 16:24:39 [INFO] Executing Pipeline: GenerateChatPipeline
2025-05-16 16:24:39 [INFO] Executing Step 0: ValidatePipelineInput
2025-05-16 16:24:39 [INFO] Executing Step 1: CacheLookup
2025-05-16 16:24:39 [INFO] Executing Step 2: PromptGeneration
2025-05-16 16:24:40 [INFO] Using prompt: <dataframe>
dfs[0]:86955x16
country,date,article,discount,index,pred_causal_sales,pred_pred_sales,pred_combined_sales,pred_combined_probabilistic,pred_pred_probabilistic,pred_causal_probabilistic,campaign,voucher_campaign,voucher_markdown_proportion,voucher_redemption_rate,pred
DE,2025-05-20,IF7804,0.35,107,0.7335095764349758,2.1851785570358624,1.7003444528932363,0.7483764060848429,0.8141304716715491,0.059171953973552655,no_campaign,

Unnamed: 0,discount,pred_combined_sales
0,0.0,80.266004
1,0.05,93.720344
2,0.1,108.485711
3,0.15,124.493775
4,0.2,141.631276
5,0.25,159.737591
6,0.3,178.604143
7,0.35,197.975915
8,0.4,217.555277
9,0.45,237.008171


## Define agent

In [5]:
from llama_index.llms.openai import OpenAI
from llama_index.core.agent.workflow import ReActAgent
from llama_index.core.workflow import Context
import nest_asyncio
from llama_index.core.agent.workflow import AgentStream, ToolCallResult

# Define agent with tools
agent = ReActAgent(tools=[find_latest_path, query_data_give_market_channel], llm=llm)

# Create a context to store the conversation history/session state
ctx = Context(agent)

nest_asyncio.apply()

async def run_agent_query(query):
    handler = agent.run(query, ctx=ctx)
    async for ev in handler.stream_events():
        if isinstance(ev, ToolCallResult):
            print(f"\nCall {ev.tool_name} with {ev.tool_kwargs}\nReturned: {ev.tool_output}")
        if isinstance(ev, AgentStream):
            print(f"{ev.delta}", end="", flush=True)
    return await handler

## Test agent

In [None]:
# Just await directly:
response = await run_agent_query("What is latest result for EU inline?")

Thought: The current language of the user is English. I need to use a tool to help me answer the question.

Action: find_latest_path
Action Input: {"market": "EU", "channel": "inline"}
Call find_latest_path with {'market': 'EU', 'channel': 'inline'}
Returned: s3://dynamic-pricing-inference-616469646173-dev/dev/EU/inline/general_2025-05-01_to_2025-05-31/2025-05-12-11-06-13-694/
Thought: The observation indicates the path to the latest result for market=EU and channel=inline. I can use this information to answer the question.
Answer: The latest result is located at s3://dynamic-pricing-inference-616469646173-dev/dev/EU/inline/general_2025-05-01_to_2025-05-31/2025-05-12-11-06-13-694/

In [6]:
# Just await directly:
response = await run_agent_query("Can you give me prediction for combined sales for article IF8079 at discount 0.5 level in EU inline?")

Thought: The current language of the user is: English. I need to use a tool to help me answer the question.

Action: query_data_give_market_channel
Action Input: {"market": "EU", "channel": "inline", "query": "IF8079, 0.5"}2025-05-21 14:57:08 [INFO] Question: IF8079, 0.5
2025-05-21 14:57:09 [INFO] Running PandasAI with langchain_ollama-llm LLM...
2025-05-21 14:57:09 [INFO] Prompt ID: c3aecd87-8172-4e60-9bdd-80827ef6e92a
2025-05-21 14:57:09 [INFO] Executing Pipeline: GenerateChatPipeline
2025-05-21 14:57:09 [INFO] Executing Step 0: ValidatePipelineInput
2025-05-21 14:57:09 [INFO] Executing Step 1: CacheLookup
2025-05-21 14:57:09 [INFO] Executing Step 2: PromptGeneration
2025-05-21 14:57:09 [INFO] Using prompt: <dataframe>
dfs[0]:86955x16
country,date,article,discount,index,pred_causal_sales,pred_pred_sales,pred_combined_sales,pred_combined_probabilistic,pred_pred_probabilistic,pred_causal_probabilistic,campaign,voucher_campaign,voucher_markdown_proportion,voucher_redemption_rate,pred
DE

## Simply frontend

In [None]:
# TO RUN STREAMLIT HOST LOCALLY
# Export this entire notebook into app.py 
# !streamlit run app.py


import streamlit as st
import asyncio

st.title("LlamaIndex Agent UI")

if "chat_history" not in st.session_state:
    st.session_state.chat_history = []

user_input = st.text_input("Ask a question to the agent:")

if st.button("Send") and user_input:
    st.session_state.chat_history.append(("User", user_input))

    # Use the same run_agent_query function directly
    try:
        loop = asyncio.get_event_loop()
    except RuntimeError:
        loop = asyncio.new_event_loop()
        asyncio.set_event_loop(loop)

    response_text = loop.run_until_complete(run_agent_query(user_input))
    st.session_state.chat_history.append(("Agent", response_text))

for speaker, text in st.session_state.chat_history:
    st.markdown(f"**{speaker}:** {text}")