## 

# Ready, Set..

Set up: Imports, Document Loading

In [2]:
import json
import os
import pandas as pd
import time
import wandb
# from datetime import datetime, timedelta
from dotenv import load_dotenv
from openai import OpenAI
# from requests import JSONDecodeError, ConnectionError, ConnectTimeout
from src.utils import *
from src.set_datatypes import set_datatypes
from src.main_functions import *
from src.function_mapping import function_mapping

In [3]:
import numpy as np

In [4]:
# set flag for summarization with an LLM or not (for ingredients and instructions in 'get_recipe_info')
summary_flag = False
# Set day in config.py
print(f"Set day: {DAY}")

Set day: 2024-03-25


In [4]:

# read in prompt.csv
df_prompts = pd.read_csv("prompts_and_responses_02/prompts_02.csv", sep=";")
df_prompts = set_datatypes(df_prompts)
# define function_map which maps functions to their corresponding parameters and the columns of intended use and actual use:
function_map  = function_mapping(df_prompts)


In [5]:
# some statistics about previous answers:
df_path = "prompts_and_responses_02/responses_02.csv"
df = pd.read_csv(df_path, sep=';')

df.head(2)

Unnamed: 0.1,Unnamed: 0,ID,CONTROL_PROMPT_ID,user_prompt,LLM,intended_steps,steps,summary_flag,completion,correct_functions,...,parameter_dish_name,used_dish_name,function_image_classification,used_image_classification,parameter_image_url,used_image_url,function_compute_shopping_list,used_compute_shopping_list,parameter_items_list,used_items_list
0,0,0,0,I would like to make a salad. I prefer Greek c...,gpt-3.5-turbo,1,1,False,True,True,...,,,,,,,,,,
1,1,1,0,I would like to make a salad. I prefer Greek c...,gpt-4-turbo,1,1,False,True,True,...,,,,,,,,,,


In [6]:
# print length of df
print(f"#answers: {len(df)}")
# print number of 'completion' false:
print(f"#not completed: {len(df[df['completion'] == False])}")
print(f"#false functions: {len(df[df['correct_functions'] == False])}")
print(f"#false params: {len(df[df['correct_params'] == False])}")

#answers: 70
#not completed: 15
#false functions: 15
#false params: 16


In [7]:
df_prompts.intended_steps.value_counts()

intended_steps
2    20
3    20
4    20
1     8
5     2
Name: count, dtype: int64

In [8]:
# define counter (counter is updated with each iteration):
counter = -1

# GO!

In [None]:
# set model and prompt as in prompt.csv
counter += 1
model_gpt = df_prompts.iloc[counter]["LLM"]
user_prompt = df_prompts.iloc[counter]["user_prompt"]
print("counter: ", counter) 
print("LLM: ", model_gpt)
print(user_prompt)

In [None]:
# call the main function:
messages, error_log = main(user_prompt, model_gpt, summary_flag)

In [None]:
# Full conversation
messages 

In [None]:
str(messages)

In [None]:
error_log

In [None]:
responses = response_check(user_prompt, messages, model_gpt, summary_flag) # manually set flags and write comments
print_responses(responses)

In [None]:
# load current row of df_prompts:
new_row = df_prompts.iloc[[counter]].copy(deep=True)

# Update new_row with responses, handling types appropriately
for key, value in responses.items():
    if key in new_row.columns:
        # Update the value in new_row for the corresponding key
        new_row.at[new_row.index[0], key] = value
    else:
        new_row[key] = value
     

In [None]:
new_row

In [None]:
   
if len(error_log) > 0:
    new_row['error_log'] = str(error_log)

# copy the rest of the fields from the csv to a new df
updated_row = update_function_usage(new_row[0:1], function_map) 
updated_row_df = pd.DataFrame([updated_row])
# updated_row_df

In [None]:
# load previous responses as df
df_path = "prompts_and_responses_02/responses_02.csv"
df = pd.read_csv(df_path, sep=';')
df = set_datatypes(df)
df = pd.concat([df, updated_row_df], ignore_index=True)
df.tail(3)

In [None]:
# save the updated dataframe as the df
df.to_csv(df_path, index=False, sep=";") # saving the dataframe as a csv file

In [None]:
# Convert the DataFrame into a W&B Table
log_df = df.copy(deep=True)
log_df = safe_replace_na_with_none(log_df)
log_df['messages'] = log_df['messages'].apply(json.dumps)
# change the datatype of CONTROL_PROMPT_ID to int
log_df['CONTROL_PROMPT_ID'] = log_df['CONTROL_PROMPT_ID'].astype(int)
log_df['steps'] = log_df['steps'].astype(int)
log_df.tail(3)

In [None]:
logging_table = wandb.Table(dataframe=log_df) # Convert the DataFrame into a W&B Table
# Add the table to an Artifact to increase the row
logging_table_artifact = wandb.Artifact("responses", type="dataset")
logging_table_artifact.add(logging_table, "responses")

# Log the raw csv file within an artifact to preserve our data
logging_table_artifact.add_file(df_path)

In [None]:
WANDB_NOTEBOOK_NAME="LLM_multi_step_query" # set notebook name
# Initialize W&B
run = wandb.init(
    project="LLM_multi_step_query",
    entity="misssophie"
    )
# Log the table to visualize with a run...
run.log({"responses": logging_table})

# and Log as an Artifact to increase the available row limit!
run.log_artifact(logging_table_artifact)

In [None]:
wandb.finish()

# Query Plan in NLP

### Preparations (NL)

In [5]:
df_path = "prompts_and_responses_02/responses_02.csv"
df = pd.read_csv(df_path, sep=';')

In [6]:
# select those rows where the completion is False and where inteded_steps is greater or equal to 3
df_not_completed = df[(df['completion'] == False) & (df['intended_steps'] >= 3)]

# Restrict to only the columns needed for running the new prompts
df_not_completed = df_not_completed[['ID', 'CONTROL_PROMPT_ID', 'user_prompt', 'LLM', 'intended_steps']]
# if not already present, add the 'needed_columns' to the df_not_completed
needed_columns = ['ID', 'CONTROL_PROMPT_ID', 'user_prompt', 'LLM', 'intended_steps',
       'completion', 'correct_functions',
       'correct_params', 'correct_interpretation', 'comments', 'error','messages']
for col in needed_columns:
    if col not in df_not_completed.columns:
        df_not_completed[col] = None
df_not_completed.head(2)


Unnamed: 0,ID,CONTROL_PROMPT_ID,user_prompt,LLM,intended_steps,completion,correct_functions,correct_params,correct_interpretation,comments,error,messages
32,32,22,Please give me a list of ingredients for a rec...,gpt-3.5-turbo,3,,,,,,,
40,40,26,Which white wine is on offer today? Please pai...,gpt-3.5-turbo,3,,,,,,,


In [53]:
len(df_not_completed)

15

In [11]:
counter = 5

### Running NL prompts

In [94]:
counter += 1
model_gpt = df_not_completed.iloc[counter]["LLM"]
user_prompt = df_not_completed.iloc[counter]["user_prompt"]
print(f"counter: {counter}/{len(df_not_completed)-1}") 
print("LLM: ", model_gpt)
print(user_prompt)

counter: 14/14
LLM:  gpt-4-turbo
Using veggies that are currently on offer, please make a recipe suggestion and pair a wine with it. I prefer European, and Chinese cuisines. I am intolerant to peanuts.


In [95]:
# call the execution plan function:
messages = execution_plan(user_prompt, model_gpt, summary_flag)

In [96]:
messages

[{'role': 'system',
  'content': 'You are asked to complete a task by a user. Please help the user by outlining a detailed query plan in natural language. The query plan should describe the steps you would take and the functions you might use to achieve the user\'s goal. Here is a catalog of available functions: [{"name": "get_current_date", "description": "Returns current the date, time, and weekday.", "parameters": {}}, {"name": "is_public_holiday", "description": "Retrieves all public holidays of a German state. Default is \'Baden-W\\u00fcrttemberg\'.", "parameters": {"type": "object", "properties": {"state": {"type": "string", "description": "Name of state."}}, "required": ["state"]}}, {"name": "fruit_and_veg_offers", "description": "Retrieve supermarket offers for fruit and veg.", "parameters": {"type": "object", "properties": {"date": {"type": "string", "description": "Date for which the offers should be retrieved. Offers always start on a Monday of a week. Format: \'YYYY-MM-DD\'

To complete your request for a recipe suggestion using vegetables currently on offer and pairing it with a suitable wine, while considering your preference for European and Chinese cuisines and your peanut intolerance, I will follow these steps:\n\n1. **Retrieve Current Date**: Use the `get_current_date` function to determine today\'s date. This will help in fetching the relevant offers for vegetables.\n\n2. **Fetch Vegetable Offers**: With the current date obtained, use the `fruit_and_veg_offers` function to retrieve the list of vegetables that are currently on offer. This function requires the date parameter, which should be the start of the current week.\n\n3. **Find Suitable Recipes**: Using the list of vegetables on offer, employ the `find_recipe` function to search for recipes. The parameters will include:\n   - `query`: Use the names of the vegetables retrieved from the offers.\n   - `exclude_ingredients`: Include "peanuts" to ensure the recipes are safe for your intolerance.\n   - `cuisine`: Set to "European" and "Chinese" to match your cuisine preferences.\n\n4. **Select a Recipe**: From the list of recipes obtained, select one that appeals most based on your preferences or any additional criteria like preparation time or complexity.\n\n5. **Get Detailed Recipe Information**: Once a recipe is selected, use the `get_recipe_info` function with the recipe ID to fetch detailed information about the recipe, including ingredients and preparation steps.\n\n6. **Pair Wine with the Recipe**: With the selected recipe, use the `wine_pairing` function to find a suitable wine that complements the dish. The function requires the name of the dish.\n\n7. **Output the Recipe and Wine Pairing**: Present the detailed recipe information along with the wine pairing suggestion.\n\nBy following these steps, you will receive a tailored recipe suggestion based on current vegetable offers and a wine pairing that suits your dietary needs and taste preferences.'

In [97]:
responses = response_check(user_prompt, messages, model_gpt, summary_flag) # manually set flags and write comments
# print_responses(responses)

In [98]:
# add error (domain) to new_row
error_input = input("Error: ").strip()
responses["error"] = " " if error_input == None else error_input
print_responses(responses)

completion: True
correct_functions: True
correct_params: True
correct_interpretation: True
external_error: False
api_output_correct: True
caused_error: False
corrected_error: None
LLM: gpt-4-turbo
comments: perfect! does not take into account translations (like in all other cases)
summary_flag: False
steps: 0
error: 


In [99]:
# load current row of df_prompts:
new_row = df_not_completed.iloc[[counter]].copy(deep=True)

# Update new_row with responses, handling types appropriately
for key, value in responses.items():
    if key in new_row.columns:
        # Update the value in new_row for the corresponding key
        new_row.at[new_row.index[0], key] = value

In [100]:
new_row.head()

Unnamed: 0,ID,CONTROL_PROMPT_ID,user_prompt,LLM,intended_steps,completion,correct_functions,correct_params,correct_interpretation,comments,error,messages
69,97,34,"Using veggies that are currently on offer, ple...",gpt-4-turbo,4,True,True,True,True,perfect! does not take into account translatio...,,"[{'role': 'system', 'content': 'You are asked ..."


In [101]:
# load previous responses as df
df_path_nl = "prompts_and_responses_02/nl_responses_02.csv"
df_nl = pd.read_csv(df_path_nl, sep=';')
df_nl = pd.concat([df_nl, new_row], ignore_index=True)
df_nl.tail(3)

Unnamed: 0,ID,CONTROL_PROMPT_ID,user_prompt,LLM,intended_steps,completion,correct_functions,correct_params,correct_interpretation,comments,error,messages
11,68,37,"Using veggies that are currently on offer, ple...",gpt-3.5-turbo,4,False,False,False,False,"too vague, does not query for date",missing_crucial_step,"[{'role': 'system', 'content': 'You are asked ..."
12,69,34,"Using veggies that are currently on offer, ple...",gpt-3.5-turbo,4,False,False,False,True,"does not query for date, is somewhat unspecifi...","missing_crucial_step, vague","[{'role': 'system', 'content': 'You are asked ..."
13,97,34,"Using veggies that are currently on offer, ple...",gpt-4-turbo,4,True,True,True,True,perfect! does not take into account translatio...,,"[{'role': 'system', 'content': 'You are asked ..."


In [102]:
df_nl.to_csv(df_path_nl, index=False, sep=";")

In [83]:
# TODO: Test out 4-step queries with a pre-pended task to first descripe the execution plan

### Log results to W&B

In [107]:
# Convert the DataFrame into a W&B Table
log_df_nl = df_nl.copy(deep=True)
log_df_nl = safe_replace_na_with_none(log_df_nl)
log_df_nl['messages'] = log_df_nl['messages'].apply(json.dumps)
# change the datatype of CONTROL_PROMPT_ID to int
log_df_nl['CONTROL_PROMPT_ID'] = log_df_nl['CONTROL_PROMPT_ID'].astype(int)
log_df_nl['steps'] = log_df_nl['intended_steps'].astype(int)
log_df_nl.tail(3)

Skipping column messages due to complex data type.


Unnamed: 0,ID,CONTROL_PROMPT_ID,user_prompt,LLM,intended_steps,completion,correct_functions,correct_params,correct_interpretation,comments,error,messages,steps
11,68,37,"Using veggies that are currently on offer, ple...",gpt-3.5-turbo,4,False,False,False,False,"too vague, does not query for date",missing_crucial_step,"""[{'role': 'system', 'content': 'You are asked...",4
12,69,34,"Using veggies that are currently on offer, ple...",gpt-3.5-turbo,4,False,False,False,True,"does not query for date, is somewhat unspecifi...","missing_crucial_step, vague","""[{'role': 'system', 'content': 'You are asked...",4
13,97,34,"Using veggies that are currently on offer, ple...",gpt-4-turbo,4,True,True,True,True,perfect! does not take into account translatio...,,"[{""role"": ""system"", ""content"": ""You are asked ...",4


In [108]:
logging_table_nl = wandb.Table(dataframe=log_df_nl) # Convert the DataFrame into a W&B Table
# Add the table to an Artifact to increase the row
logging_table_artifact = wandb.Artifact("nl_query_plan", type="dataset")
logging_table_artifact.add(logging_table_nl, "nl_query_plan")

# Log the raw csv file within an artifact to preserve our data
logging_table_artifact.add_file(df_path_nl)

ArtifactManifestEntry(path='nl_responses_02.csv', digest='XEJQJ/fLd+mod5iE81NKJA==', size=123933, local_path='/Users/mayte/Library/Application Support/wandb/artifacts/staging/tmpshdi03io')

In [109]:
WANDB_NOTEBOOK_NAME="LLM_multi_step_query" # set notebook name
# Initialize W&B
run = wandb.init(
    project="LLM_multi_step_query",
    entity="misssophie"
    )
# Log the table to visualize with a run...
run.log({"nl_query_plan": logging_table_nl})

# and Log as an Artifact to increase the available row limit!
run.log_artifact(logging_table_artifact)

[34m[1mwandb[0m: Currently logged in as: [33mmisssophie[0m. Use [1m`wandb login --relogin`[0m to force relogin


<Artifact nl_query_plan>

In [110]:
wandb.finish()



# Testing Zone