# Build functions to analyze the content of the raw response from the agentic app

In [2]:
# Import main packages
import pandas as pd
import os

from datetime import datetime
from tqdm import tqdm
import time
import json
import numpy as np

# Define directories
def find_directory_upwards(dir_name):
    """
    Find the absolute path to a directory with the given name by searching upwards
    from the current notebook's directory.

    Parameters:
    dir_name (str): The name of the directory to find.

    Returns:
    str: The absolute path to the directory if found, otherwise None.
    """
    # Get the current notebook's directory
    current_dir = os.getcwd()
    
    while True:
        # Check if the directory exists in the current directory
        potential_path = os.path.join(current_dir, dir_name)
        if os.path.isdir(potential_path):
            return potential_path
        
        # Move to the parent directory
        parent_dir = os.path.abspath(os.path.join(current_dir, os.pardir))
        
        # If the current directory is the root directory, stop searching
        if current_dir == parent_dir:
            break
        
        current_dir = parent_dir
    
    # If the directory is not found, return None
    return None

code_dir = find_directory_upwards('codes')
raw_data_dir = find_directory_upwards('raw_data')
int_data_dir = find_directory_upwards('int_data')
final_data_dir = find_directory_upwards('final_data')
output_dir = find_directory_upwards('output')

print(code_dir)
# print(raw_data_dir)
print(int_data_dir)
print(output_dir)

/home/sosajuanbautista/aeai-filestore/projects/agentic/codes
/home/sosajuanbautista/aeai-filestore/projects/agentic/int_data
/home/sosajuanbautista/aeai-filestore/projects/agentic/output


## Load a raw response

In [3]:
path = os.path.join(final_data_dir, 'commodity_price_sources', 'commodity_prices_search_results_evaluated_2024-11-08_11.07.json')
with open(path, 'r') as file:
    raw_responses = json.load(file)
print(len(raw_responses))
for entry in raw_responses:
    print(entry['commodity'])
    print(entry.keys())
    print('\n')
raw_responses

2
Rotary tiller or power tiller
dict_keys(['commodity', 'search_results_to_evaluate', 'evaluated_search_results', 'min_approved_search_results', 'include_justification', 'use_raw_content', 'max_content_length', 'min_similarity_score', 'n_refined_search_terms', 'max_results_per_search_term', 'search_terms_temperature_iteration', 'iteration_number', 'max_iterations', 'n_input_tokens', 'n_output_tokens', 'n_tavily_api_calls'])


Farming excavator
dict_keys(['commodity', 'search_results_to_evaluate', 'evaluated_search_results', 'min_approved_search_results', 'include_justification', 'use_raw_content', 'max_content_length', 'min_similarity_score', 'n_refined_search_terms', 'max_results_per_search_term', 'search_terms_temperature_iteration', 'iteration_number', 'max_iterations', 'n_input_tokens', 'n_output_tokens', 'n_tavily_api_calls'])




[{'commodity': 'Rotary tiller or power tiller',
  'search_results_to_evaluate': [],
  'evaluated_search_results': [{'title': "Rototiller Buyer's Guide - Harvest to Table",
    'url': 'https://harvesttotable.com/rototiller-buyers-guide-best-rated-tillers/',
    'content': 'Rototiller Buyer’s Guide Rototiller manufacturers make tillers in all shapes and sizes, from small, light models to large machines with good-sized motors and big rotating tines. If you have a small home garden, an electric corded rototiller should be adequate. If you have a large garden or heavy clay soil, you’ll need a large rototiller with a powerful engine. No matter what size garden or yard you have, be sure to choose a rototiller that is powerful enough to do the job. Here’s our guide to help you buy a rototiller: What are rototillers used for? What is a rototiller? rototillers Rototiller-cultivator buying tips A. A rear-tine, gas-powered rototiller can dig 8 to 10 inches deep. Rototiller Hedge Trimmer Buyer’s Gu

In [4]:
print(entry['evaluated_search_results'][0].keys())


dict_keys(['title', 'url', 'content', 'score', 'raw_content', 'query', 'evaluation_outcome', 'justification'])


In [5]:
print(entry['search_terms_temperature_iteration'])

[['Farming excavator used prices', 0, None], ['Farming excavator price list', 0, None], ['Farming excavator blue book', 0, None], ['buy farming excavator online', 1, 0.3], ['farming excavator for sale', 1, 0.3], ['farming excavator price comparison', 1, 0.3], ['farming excavator pricing guide', 2, 0.5], ['buy farming excavator online', 2, 0.5], ['farming excavator listings with prices', 2, 0.5], ['farming excavator pricing guide', 3, 0.7], ['new and used farming excavators for sale', 3, 0.7], ['farming excavator sales listings with prices', 3, 0.7], ['farming excavator price listings', 4, 0.9], ['buy farming excavators online', 4, 0.9], ['farming excavator for sale with prices', 4, 0.9]]


## Analysis

In [None]:
def build_df_with_search_result_content(raw_responses):
    output_df = pd.DataFrame(columns=['url', 'raw_content'])

    # Fill the dataframe
    for entry in raw_responses:
        search_results = entry['evaluated_search_results']
        for sr in search_results:
            output_df = pd.concat([output_df, pd.DataFrame({'url': sr['url'], 'raw_content': sr['raw_content']}, index=[0])], ignore_index=True)
    
    # Convert None to NaN
    output_df = output_df.where(pd.notnull(output_df), np.NaN)

    return output_df

In [None]:
build_df_with_search_result_content(raw_responses)

In [6]:
def build_df_with_search_terms(raw_responses):
    output_df = pd.DataFrame(columns=['commodity', 'iteration', 'temperature', 'search_term'])

    # Fill the dataframe
    for entry in raw_responses:
        commodity = entry['commodity']
        search_terms_temperature_iteration = entry['search_terms_temperature_iteration']
        for sti in search_terms_temperature_iteration:
            output_df = pd.concat([output_df, pd.DataFrame({'commodity': commodity, 'iteration': sti[1], 'temperature': sti[2], 'search_term': sti[0]}, index=[0])], ignore_index=True)
    
    # Convert None to NaN
    output_df = output_df.where(pd.notnull(output_df), np.NaN)

    return output_df

In [7]:
build_df_with_search_terms(raw_responses)

Unnamed: 0,commodity,iteration,temperature,search_term
0,Rotary tiller or power tiller,0,,Rotary tiller or power tiller used prices
1,Rotary tiller or power tiller,0,,Rotary tiller or power tiller blue book
2,Rotary tiller or power tiller,0,,Rotary tiller or power tiller price list
3,Rotary tiller or power tiller,1,0.3,Rotary tiller price comparison
4,Rotary tiller or power tiller,1,0.3,Power tiller online prices
5,Rotary tiller or power tiller,1,0.3,Rotary tiller product listings with prices
6,Rotary tiller or power tiller,2,0.5,buy rotary tiller online
7,Rotary tiller or power tiller,2,0.5,rotary tiller price listings
8,Rotary tiller or power tiller,2,0.5,power tiller e-commerce sites
9,Rotary tiller or power tiller,3,0.7,buy rotary tiller with prices


In [8]:
def build_df_with_search_results(raw_responses):
    output_df = pd.DataFrame(columns=['commodity', 'search_term', 'title', 'url', 'evaluation_outcome', 'justification'])

    # Fill the dataframe
    for entry in raw_responses:
        commodity = entry['commodity']
        search_results = entry['evaluated_search_results']
        for sr in search_results:
            output_df = pd.concat([output_df, pd.DataFrame({'commodity': commodity, 'search_term': sr['query'], 
                'title': sr['title'], 'url': sr['url'], 'evaluation_outcome': sr['evaluation_outcome'], 
                'justification': sr['justification']}, index=[0])], ignore_index=True)
    
    # Convert None to NaN
    output_df = output_df.where(pd.notnull(output_df), np.NaN)

    # # Convert evaluation_outcome to boolean
    output_df['evaluation_outcome'] = output_df['evaluation_outcome'].astype(bool)


    return output_df

In [9]:
build_df_with_search_results(raw_responses).head()

Unnamed: 0,commodity,search_term,title,url,evaluation_outcome,justification
0,Rotary tiller or power tiller,Rotary tiller or power tiller blue book,Rototiller Buyer's Guide - Harvest to Table,https://harvesttotable.com/rototiller-buyers-g...,False,The search result is a buyer's guide that prov...
1,Rotary tiller or power tiller,Rotary tiller or power tiller blue book,PDF Section 5: Specifications & Capacities - L...,https://www.landpride.com/ari/attach/lp/public...,False,The search result provides specifications and ...
2,Rotary tiller or power tiller,Rotary tiller or power tiller blue book,What Is a Rotary Tiller and How Is It Used in ...,https://blog.machinefinder.com/26358/rotary-ti...,False,The webpage provides general information about...
3,Rotary tiller or power tiller,Rotary tiller or power tiller blue book,7 Best Tractor Tillers in 2022 (Full Reviews) ...,https://sandcreekfarm.net/best-rotary-tiller-f...,False,The webpage provides a list of the 'best' rota...
4,Rotary tiller or power tiller,Rotary tiller or power tiller blue book,Tarter Farm & Ranch 4 ft. Light-Duty Rotary Ti...,https://www.tractorsupply.com/tsc/product/farm...,True,The search result provides a specific product ...


In [20]:
def search_term_performace(raw_responses):
    search_terms = build_df_with_search_terms(raw_responses)
    search_results = build_df_with_search_results(raw_responses)

    # Calculate row count by group
    search_results['evaluated_searchs_results'] = search_results.groupby(['commodity', 'search_term'])['evaluation_outcome'].transform('size')

    # Calculate True count by group
    search_results['approved_search_results'] = search_results.groupby(['commodity', 'search_term'])['evaluation_outcome'].transform('sum')

    # Calculate False count by group
    search_results['disapproved_search_results'] = search_results['evaluated_searchs_results'] - search_results['approved_search_results']

    search_results_group = search_terms.merge(search_results[['commodity', 'search_term', 'evaluated_searchs_results', 'approved_search_results', 'disapproved_search_results']], 
                            on=['commodity', 'search_term'], how='left')

    return search_results_group

In [21]:
search_term_performace(raw_responses)

Unnamed: 0,commodity,iteration,temperature,search_term,evaluated_searchs_results,approved_search_results,disapproved_search_results
0,Rotary tiller or power tiller,0,,Rotary tiller or power tiller used prices,1.0,1.0,0.0
1,Rotary tiller or power tiller,0,,Rotary tiller or power tiller blue book,5.0,1.0,4.0
2,Rotary tiller or power tiller,0,,Rotary tiller or power tiller blue book,5.0,1.0,4.0
3,Rotary tiller or power tiller,0,,Rotary tiller or power tiller blue book,5.0,1.0,4.0
4,Rotary tiller or power tiller,0,,Rotary tiller or power tiller blue book,5.0,1.0,4.0
5,Rotary tiller or power tiller,0,,Rotary tiller or power tiller blue book,5.0,1.0,4.0
6,Rotary tiller or power tiller,0,,Rotary tiller or power tiller price list,1.0,1.0,0.0
7,Rotary tiller or power tiller,1,0.3,Rotary tiller price comparison,1.0,0.0,1.0
8,Rotary tiller or power tiller,1,0.3,Power tiller online prices,1.0,1.0,0.0
9,Rotary tiller or power tiller,1,0.3,Rotary tiller product listings with prices,3.0,3.0,0.0
