In [3]:
import json
import os
import time
import logging
from dataclasses import dataclass
from typing import List, Dict, Any, Optional
from copy import deepcopy

from llm_interact import LLMInteractor, LLMConfig
from llm_interact_env import Environment, EnvironmentConfig, Task, run
from logger import logger  # Import the logger

%load_ext autoreload
%autoreload 2


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### This just follows tau-bench, where we provide a paragraph of instruction and task, then let the user llm and agent llm talk with each other without our intervention

In [None]:


# Define configuration for both agents
user_config = LLMConfig.from_toml("./llm_config_user.toml")

assistant_config = LLMConfig.from_toml("./llm_config_agent.toml")
assistant_config.run_code = True

# Define environment configuration
env_config = EnvironmentConfig(
    user_llm_config=user_config,
    assistant_llm_config=assistant_config,
    assistant_agent_type="base-agent",
    interpreter_config_path="interpreter_config.toml",
    user_prompt_template="""
    You are a data analyst for Walmart.
    
    {task_specific_instruction}
    """,
    max_turns=30,
    checkpoint_path="trajactory_walmart.json"
)

# Define tasks
tasks = [
    Task(
        id="1", 
        description=
        """**background**:
        Walmart is a renowned retail corporation that operates a chain of hypermarkets. Here, Walmart has provided a data combining of 45 stores including store information and monthly sales. The data is provided on weekly basis. You received the data walmart_data/walmart.csv and your Main Objective is to predict sales of store in a week. You just joined Walmart and you are excited to start your first project.
        
        
        **knowledge**:
        
        Missing values should be impute with zero; 
        Negative sales should be dropped; 
        The holidays might have a significant impact on sales;
        The four holidays are Super Bowl, Labor Day, Thanksgiving, and Christmas; 
        The Holt-Winters exponential smoothing model is the best model for this task;
        Make the data more stationary by computing a differenced series and resampling the data to weekly frequency by averaging values;
        Calculate the weighted mean absolute error of the model, the weight on the holiday weeks should be 5, and the weight on the non-holiday weeks should be 1.""",
        success_criteria=""
    )
]

# Create and run the environment
environment = Environment(env_config, tasks)
completed_tasks = run(environment, "taubench")


2025-04-30 23:19:37 - DataSciBench - INFO - Loaded configuration from ./llm_config_user.toml
2025-04-30 23:19:37 - DataSciBench - INFO - Loaded configuration from ./llm_config_agent.toml


2025-04-30 23:19:37 - DataSciBench - INFO - Initializing Environment with 1 tasks
2025-04-30 23:19:37 - DataSciBench - INFO - Initialized LLMInteractor with model: gemini/gemini-2.5-flash-preview-04-17, temperature: 0.4
2025-04-30 23:19:37 - DataSciBench - INFO - Initialized LLMInteractor with model: gemini/gemini-2.5-flash-preview-04-17, temperature: 0.4
2025-04-30 23:19:37 - DataSciBench - INFO - Starting environment run with version: taubench
2025-04-30 23:19:37 - DataSciBench - INFO - Starting interaction between agents
2025-04-30 23:19:37 - DataSciBench - INFO - Starting interaction using version2 strategy
2025-04-30 23:19:37 - DataSciBench - INFO - Starting task loop with max turns: 30
2025-04-30 23:19:37 - DataSciBench - INFO - Calling LLM API with a message (total length: ~59 chars)
2025-04-30 23:19:38 - DataSciBench - INFO - LLM API call successful on attempt 1
2025-04-30 23:19:38 - DataSciBench - INFO - Checkpoint saved successfully
2025-04-30 23:19:38 - DataSciBench - INFO -

IndexError: list index out of range