# Build functions to analyze the content of the raw response from the agentic app

In [1]:
# Import main packages
import pandas as pd
import os

from datetime import datetime
from tqdm import tqdm
import json
import numpy as np


# App package
from llm_agentic_app_for_price_sources import build_df_with_search_result_content, build_df_with_search_terms, search_term_performace, build_df_with_search_results, analytics_report

# Define directories
def find_directory_upwards(dir_name):
    """
    Find the absolute path to a directory with the given name by searching upwards
    from the current notebook's directory.

    Parameters:
    dir_name (str): The name of the directory to find.

    Returns:
    str: The absolute path to the directory if found, otherwise None.
    """
    # Get the current notebook's directory
    current_dir = os.getcwd()
    
    while True:
        # Check if the directory exists in the current directory
        potential_path = os.path.join(current_dir, dir_name)
        if os.path.isdir(potential_path):
            return potential_path
        
        # Move to the parent directory
        parent_dir = os.path.abspath(os.path.join(current_dir, os.pardir))
        
        # If the current directory is the root directory, stop searching
        if current_dir == parent_dir:
            break
        
        current_dir = parent_dir
    
    # If the directory is not found, return None
    return None

code_dir = find_directory_upwards('codes')
raw_data_dir = find_directory_upwards('raw_data')
int_data_dir = find_directory_upwards('int_data')
final_data_dir = find_directory_upwards('final_data')
output_dir = find_directory_upwards('output')

print(code_dir)
# print(raw_data_dir)
print(int_data_dir)
print(output_dir)

  from pandas.core import (


/home/sosajuanbautista/aeai-filestore/projects/agentic/codes
/home/sosajuanbautista/aeai-filestore/projects/agentic/int_data
/home/sosajuanbautista/aeai-filestore/projects/agentic/output


## Load a raw response

In [2]:
path = os.path.join(final_data_dir, 'commodity_price_sources', 'commodity_prices_search_results_evaluated_2024-11-19_12.16.json')
with open(path, 'r') as file:
    raw_responses_commodities = json.load(file)
print(len(raw_responses_commodities))
for entry in raw_responses_commodities:
    print(entry['item_to_search'])
    print(entry.keys())
    print('\n')

20
Rotary tiller or power tiller
dict_keys(['item_to_search', 'item_type', 'search_results_to_evaluate', 'evaluated_search_results', 'min_approved_search_results', 'include_justification', 'use_raw_content', 'max_content_length', 'min_similarity_score', 'n_refined_search_terms', 'max_results_per_search_term', 'search_terms_temperature_iteration', 'iteration_number', 'max_iterations', 'n_input_tokens', 'n_output_tokens', 'n_tavily_api_calls'])


Farming excavator
dict_keys(['item_to_search', 'item_type', 'search_results_to_evaluate', 'evaluated_search_results', 'min_approved_search_results', 'include_justification', 'use_raw_content', 'max_content_length', 'min_similarity_score', 'n_refined_search_terms', 'max_results_per_search_term', 'search_terms_temperature_iteration', 'iteration_number', 'max_iterations', 'n_input_tokens', 'n_output_tokens', 'n_tavily_api_calls'])


Mowers
dict_keys(['item_to_search', 'item_type', 'search_results_to_evaluate', 'evaluated_search_results', 'min_appro

In [3]:
path = os.path.join(final_data_dir, 'product_price_sources', 'product_prices_search_results_evaluated_2024-11-18_00.20.json')
with open(path, 'r') as file:
    raw_responses_products = json.load(file)
print(len(raw_responses_products))
for entry in raw_responses_products:
    print(entry['item_to_search'])
    # print(entry.keys())
    # print('\n')

20
E-Z-GO RXV Elite Golf Cart
B. Braun Infusomat Space Infusion Pump
CLUB CAR DS Golf cart
Club Car IQ Electric Electric Golf Car
Yale GC050VX Forklift
Chevrolet Traverse SUV
Lenovo ThinkPad T14s G2 Notebook computer
Ford E-150 Cargo Van
Kubota LA525 Front Loader
Intek Strength Pro Steel Dumbbell Pair Dumbbells
Dell PowerEdge R740 Computer server
Park Cottage RV Recreational Vehicle
E-Z-GO RXV-G Golf cart
Xerox DocuMate 262 Document Scanner
Chrysler Voyager LX Minivan
Toyota 8FGCU25 Forklift
Land Pride BB1260 Box Scraper
Raymond 740R45TT Reach Forklift
EZGO TXT PDS Golf Cart
Land Pride RCR1260 Rotary Cutter


In [4]:
print(entry['evaluated_search_results'][0].keys())


dict_keys(['title', 'url', 'content', 'score', 'raw_content', 'query', 'evaluation_outcome', 'justification'])


In [5]:
print(entry['search_terms_temperature_iteration'])

[['Land Pride RCR1260 Rotary Cutter used prices', 0, None], ['Land Pride RCR1260 Rotary Cutter price list', 0, None], ['Land Pride RCR1260 Rotary Cutter blue book', 0, None], ['buy Land Pride RCR1260 Rotary Cutter', 1, 0.3], ['Land Pride RCR1260 Rotary Cutter for sale', 1, 0.3], ['Land Pride RCR1260 Rotary Cutter pricing information', 1, 0.3], ['Land Pride RCR1260 Rotary Cutter price comparison', 2, 0.5], ['Land Pride RCR1260 Rotary Cutter listings', 2, 0.5], ['Land Pride RCR1260 Rotary Cutter for sale near me', 2, 0.5]]


## Analysis

In [6]:
build_df_with_search_result_content(raw_responses_products)

Unnamed: 0,url,raw_content
0,https://prestigegolfcars.com/,Click Here to view our Golf Cars currently on ...
1,https://ezgo.txtsv.com/personal/models/freedom...,Freedom RXV | E-Z-GO®\nSkip to main content\n\...
2,https://keweenawmountainlodge.com/wp-content/u...,UNBEATABLE ENERGY EFFICIENCY MAINTENANCE FREE ...
3,https://www.gowithgarretts.com/product/e-z-go-...,E-Z-GO Freedom RXV ELITE Lithium - Go With Gar...
4,https://www.windtreegolf.com/kelley-blue-book-...,Kelley Blue Book for Golf Carts? (How to Value...
...,...,...
465,https://www.tractorbynet.com/forums/threads/la...,
466,https://www.machinerypete.com/details/rotary-c...,\nComplete the form below and we'll contact yo...
467,https://www.tractorhouse.com/listing/for-sale/...,LAND PRIDE RCR1260\nSeller Information\nGerman...
468,https://www.tractorhouse.com/listings/for-sale...,


In [7]:
build_df_with_search_terms(raw_responses_products, item_type='product')

  output_df = pd.concat([output_df, pd.DataFrame({item_type: item, 'iteration': sti[1],


Unnamed: 0,product,iteration,temperature,search_term
0,E-Z-GO RXV Elite Golf Cart,0,,E-Z-GO RXV Elite Golf Cart price list
1,E-Z-GO RXV Elite Golf Cart,0,,E-Z-GO RXV Elite Golf Cart used prices
2,E-Z-GO RXV Elite Golf Cart,0,,E-Z-GO RXV Elite Golf Cart blue book
3,E-Z-GO RXV Elite Golf Cart,1,0.3,E-Z-GO RXV Elite Golf Cart for sale
4,E-Z-GO RXV Elite Golf Cart,1,0.3,E-Z-GO RXV Elite Golf Cart pricing
...,...,...,...,...
175,Land Pride RCR1260 Rotary Cutter,1,0.3,Land Pride RCR1260 Rotary Cutter for sale
176,Land Pride RCR1260 Rotary Cutter,1,0.3,Land Pride RCR1260 Rotary Cutter pricing infor...
177,Land Pride RCR1260 Rotary Cutter,2,0.5,Land Pride RCR1260 Rotary Cutter price comparison
178,Land Pride RCR1260 Rotary Cutter,2,0.5,Land Pride RCR1260 Rotary Cutter listings


In [8]:
# build_df_with_search_results(raw_responses_commodities).head()
build_df_with_search_results(raw_responses_products, item_type='product').head()

Unnamed: 0,product,search_term,title,url,evaluation_outcome,justification
0,E-Z-GO RXV Elite Golf Cart,E-Z-GO RXV Elite Golf Cart blue book,Prestige Golf Cars | Golf Cart Dealer in Murri...,https://prestigegolfcars.com/,True,The webpage is from a golf cart dealer and inc...
1,E-Z-GO RXV Elite Golf Cart,E-Z-GO RXV Elite Golf Cart blue book,Freedom RXV | E-Z-GO®,https://ezgo.txtsv.com/personal/models/freedom...,True,"The webpage provides a starting MSRP of $8,674..."
2,E-Z-GO RXV Elite Golf Cart,E-Z-GO RXV Elite Golf Cart blue book,PDF,https://keweenawmountainlodge.com/wp-content/u...,False,The search result is a PDF document containing...
3,E-Z-GO RXV Elite Golf Cart,E-Z-GO RXV Elite Golf Cart blue book,E-Z-GO Freedom RXV ELITE Lithium - Go With Gar...,https://www.gowithgarretts.com/product/e-z-go-...,True,The webpage provides a product listing for the...
4,E-Z-GO RXV Elite Golf Cart,E-Z-GO RXV Elite Golf Cart blue book,Kelley Blue Book for Golf Carts? (How to Value...,https://www.windtreegolf.com/kelley-blue-book-...,False,The search result provides general information...


In [9]:
search_term_performace(raw_responses_products, item_type='product')

  output_df = pd.concat([output_df, pd.DataFrame({item_type: item, 'iteration': sti[1],


Unnamed: 0,product,iteration,temperature,search_term,evaluated_searchs_results,approved_search_results,disapproved_search_results
0,E-Z-GO RXV Elite Golf Cart,0,,E-Z-GO RXV Elite Golf Cart price list,4.0,3.0,1.0
1,E-Z-GO RXV Elite Golf Cart,0,,E-Z-GO RXV Elite Golf Cart used prices,5.0,5.0,0.0
2,E-Z-GO RXV Elite Golf Cart,0,,E-Z-GO RXV Elite Golf Cart blue book,5.0,3.0,2.0
3,E-Z-GO RXV Elite Golf Cart,1,0.3,E-Z-GO RXV Elite Golf Cart for sale,3.0,3.0,0.0
4,E-Z-GO RXV Elite Golf Cart,1,0.3,E-Z-GO RXV Elite Golf Cart pricing,3.0,3.0,0.0
...,...,...,...,...,...,...,...
175,Land Pride RCR1260 Rotary Cutter,1,0.3,Land Pride RCR1260 Rotary Cutter for sale,3.0,3.0,0.0
176,Land Pride RCR1260 Rotary Cutter,1,0.3,Land Pride RCR1260 Rotary Cutter pricing infor...,2.0,2.0,0.0
177,Land Pride RCR1260 Rotary Cutter,2,0.5,Land Pride RCR1260 Rotary Cutter price comparison,3.0,2.0,1.0
178,Land Pride RCR1260 Rotary Cutter,2,0.5,Land Pride RCR1260 Rotary Cutter listings,2.0,2.0,0.0


In [10]:
# search_term_performace(raw_responses_commodities).to_csv(os.path.join(final_data_dir, 'commodity_price_sources', 'search_term_performance.csv'), index=False)

In [11]:
# analytics_report(raw_responses_commodities, item_type='commodity')
analytics_report(raw_responses_products, item_type='product')

  output_df = pd.concat([output_df, pd.DataFrame({item_type: item, 'iteration': sti[1],


How many approved search results were found for each product?
+--------------------------------------------------+---------------------------+
| product                                          |   approved_search_results |
|--------------------------------------------------+---------------------------|
| Lenovo ThinkPad T14s G2 Notebook computer        |                        21 |
| Club Car IQ Electric Electric Golf Car           |                        20 |
| E-Z-GO RXV Elite Golf Cart                       |                        20 |
| CLUB CAR DS Golf cart                            |                        20 |
| Chevrolet Traverse SUV                           |                        19 |
| Park Cottage RV Recreational Vehicle             |                        19 |
| Ford E-150 Cargo Van                             |                        19 |
| Chrysler Voyager LX Minivan                      |                        18 |
| Raymond 740R45TT Reach Forklift              

In [12]:
analytics_report(raw_responses_commodities, item_type='commodity')

  output_df = pd.concat([output_df, pd.DataFrame({item_type: item, 'iteration': sti[1],


How many approved search results were found for each commodity?
+------------------------------------------------------+---------------------------+
| commodity                                            |   approved_search_results |
|------------------------------------------------------+---------------------------|
| Notebook computer                                    |                        20 |
| Rotary tiller or power tiller                        |                        19 |
| Automobiles or cars                                  |                        17 |
| Inkjet printer for commercial printing applications  |                        17 |
| Multifunction machines                               |                        17 |
| Computer server                                      |                        17 |
| Forklifts                                            |                        16 |
| General tool kits                                    |                        16 |
|

In [None]:
# import matplotlib.pyplot as plt

# df = search_term_performace(raw_responses_commodities)

# # Grouping the data by 'commodity' and 'iteration' to aggregate the results
# grouped_data = df.groupby(['commodity', 'iteration'])[['approved_search_results', 'disapproved_search_results']].sum().reset_index()

# # Get the list of unique commodities
# commodities = grouped_data['commodity'].unique()

# for commodity in commodities:
#     # Filter data for the current commodity
#     data = df[df['commodity'] == commodity]

#     # Setting up the figure and axis
#     fig, ax = plt.subplots(figsize=(16, 10))

#     # Initialize variables for x positions and labels
#     x_positions = []
#     x_labels = []
#     current_position = 0

#     # Iterate over each iteration group
#     for iteration in sorted(data['iteration'].unique()):
#         # Filter data for this iteration
#         iteration_data = data[data['iteration'] == iteration]

#         # Define the x positions for this iteration's search terms
#         search_terms = iteration_data['search_term'].values
#         num_terms = len(search_terms)
#         positions = list(range(current_position, current_position + num_terms))

#         # Update x positions and labels
#         x_positions.extend(positions)
#         # Split long search terms into two lines for better readability
#         formatted_labels = [f"Iter {iteration}:\n{term}" if len(term) > 30 else f"Iter {iteration}:\n{term}"
#                             for term in search_terms]
#         x_labels.extend(formatted_labels)

#         # Plot approved and disapproved results
#         ax.bar(
#             positions,
#             iteration_data['approved_search_results'],
#             width=0.4,
#             color='green',
#         )
#         ax.bar(
#             positions,
#             iteration_data['disapproved_search_results'],
#             width=0.4,
#             bottom=iteration_data['approved_search_results'],
#             color='red',
#         )

#         # Update current position for the next iteration
#         current_position += num_terms + 1  # Adding a gap between iterations

#     # Customizing the plot
#     ax.set_xlabel('Search Terms (by Iteration)', fontsize=14)
#     ax.set_ylabel('Count of Results', fontsize=14)
#     ax.set_title(f'Search Term Results for {commodity}', fontsize=16, fontweight='bold')
#     ax.set_xticks(x_positions)
#     ax.set_xticklabels(x_labels, rotation=45, ha='right', fontsize=12)
    
#     # Adding a single legend
#     ax.legend(['Approved', 'Disapproved'], fontsize=12)

#     # Enhancing the plot appearance
#     ax.grid(axis='y', linestyle='--', alpha=0.7)
#     plt.tight_layout()
#     plt.show()
