In [None]:
!pip install tiktoken==0.8.0 wandb==0.19.6 weave==0.51.33 docker==7.1.0 openai==1.61.1

In [None]:
import os
import docker
import json
import re
import requests
import tiktoken
import wandb
import weave
from openai import OpenAI

openai_client = OpenAI() # OpenAI gets the API key from the env var
docker_client = docker.from_env()

In [None]:
# Create containers on network
container_name = "nbtest_wandb"
try:
    container = docker_client.containers.get(container_name)
except docker.errors.NotFound:
    # Ensure network exists
    network_name = "wandb_network"
    try:
        network = docker_client.networks.get(network_name)
    except docker.errors.NotFound:
        network = docker_client.networks.create(network_name)

    # Create container
    container = docker_client.containers.run(
        image="code-runner-client",
        name=container_name,
        shm_size="512mb",
        restart_policy={"Name": "unless-stopped"},
        network=network_name,
        detach=True,
        command=["/bin/sh", "-c", "sleep infinity"],
    )

In [None]:
## Start: Question / prompt
question = """
What is the highest rated (according to IMDB) Isabelle Adjani feature film that is less than 2 hours and is available on Vudu (now called Fandango at Home) to buy or rent?
""".strip()

In [None]:
## 1) Generate title
system_prompt = """
You are an AI that generates concise, descriptive titles based on the given question.
Your response must be in the format: <title>Generated title here</title>. "
Keep the title brief and relevant to the question.

Title examples:
Question: "What are the best practices for securing a FastAPI backend?"
<title>What Securing FastAPI best practices</title>

Question: "What are the key considerations when designing an LLM evaluation framework?"
<title>Designing an LLM eval framework</title>

Question: "How do I optimize OpenAI API costs for large-scale applications?"
<title>Optimizing OpenAI API costs</title>
""".strip()

oai_response = openai_client.chat.completions.create(
    model="gpt-3.5-turbo",
    messages=[
        {
            "role": "system",
            "content": system_prompt,
        },
        {"role": "user", "content": f"Question: {question}"},
    ]
)
completion_response = oai_response.choices[0].message.content

pattern = r'<title>(.*?)</title>'
title = re.findall(pattern, completion_response)
title = title[0]
print(title)

In [None]:
## 2) Generate plan
system_prompt = """
You are an AI that generates a structured plan for answering a given question. Your response must be in the format: <planning>Generated plan here</planning>.
Your plan should outline the logical steps and methodologies required to find the answer but should not provide the answer itself.

Consider the following when planning:
- The agent can execute Python code.
- The agent can make web searches using an external service.
- The agent can go to specific web pages to retrieve information.
- The plan should break down the approach into discrete steps.

Example:

Question: "How can I find the average temperature for a given city over the past month?"
<planning>
1. Identify a reliable weather data API that provides historical temperature data.
2. Structure an API call to fetch daily temperature data for the specified city over the past month.
3. Parse the API response and extract temperature values.
4. Compute the average temperature based on the extracted values.
5. Return a structured response containing the computed average.
</planning>

Question: "What are the most frequent words in a given text?"
<planning>
1. Receive the input text.
2. Preprocess the text by removing punctuation and converting it to lowercase.
3. Tokenize the text into individual words.
4. Count the frequency of each unique word.
5. Sort words by frequency and return the top results.
</planning>

Keep your response clear and structured within the <planning> block.
""".strip()

oai_response = openai_client.chat.completions.create(
    model="gpt-4o",
    messages=[
        {
            "role": "system",
            "content": system_prompt,
        },
        {"role": "user", "content": f"Question: {question}"},
    ]
)
completion_response = oai_response.choices[0].message.content

pattern = r'<planning>(.*?)</planning>'
plan = re.findall(pattern, completion_response, re.DOTALL)
print(plan[0])

In [None]:
context_file = './context.json'
context = ""
if os.path.exists(context_file):
    try:
        with open(context_file, 'r') as f:
            context_data = json.load(f)
    except json.JSONDecodeError:
        print("Error decoding JSON from context.json")
else:
    context_data = False
    print("No context.json file found. Continuing without previous context.")

# Prepare a system prompt that instructs the assistant on how to generate the answer.
# This prompt references the question, planning, and context.
system_prompt = """
You are an AI that generates a comprehensive answer to the given question using the provided planning and context.
Follow these instructions:
1. Combine the question, planning, and context to generate your response.
2. If the context already contains an answer, simply output it wrapped in an <answer>...</answer> block.
3. If additional validation or demonstration is needed, include any Python code within an <execute_python>...</execute_python> block.
4. If external information is required, use <search>...</search> for a general search
5. If external information from a specific page is required, use <website_url>...</website_url> to fetch data from a specific webpage.

### Available Python libraries:
The Python environment includes the following pre-installed libraries:
`pandas`, `numpy`, `scipy`, `scikit-learn`, `scikit-image`, `matplotlib`, `seaborn`, `beautifulsoup4`, `requests`

If additional libraries are needed, they can be installed using:

<execute_python>
import subprocess
subprocess.run("pip install package_name", shell=True)
</execute_python>

### Example formats:
#### If the answer is found in the context:
<answer>Your answer here</answer>

#### If Python computation is required:
<execute_python>
# Some Python code for computation
</execute_python>

#### If a search lookup is required:
<search>Query for the relevant information</search>

#### If a specific website page URL lookup is required:
<website_url>https://example.com/specific-page</website_url>
""".strip()

messages=[
    {"role": "system", "content": system_prompt},
    {"role": "user", "content": f"Question: {question}"},
    {"role": "user", "content": f"Planning: {plan}"}
]
if context_data:
    for entry in context_data:
        if "type" in entry and entry["type"] != "assistant":
            content = f'{entry["type"]}: {entry["content"]}'
        else:
            content = entry["content"]
        messages.append({"role": entry["role"], "content": content})

# Generate the final response using the OpenAI client.
oai_response = openai_client.chat.completions.create(
    model="gpt-4o",
    messages=messages
)
final_response = oai_response.choices[0].message.content

# Output the generated response.
print(final_response)

In [None]:
# Define a function to save parsed content to the context file
def save_to_context_file(role, entry_type, content):
    if not content.strip():  # Check if content is blank
        return  # Do not save if content is blank
    entry = {"role": role, "type": entry_type, "content": content}
    if os.path.exists(context_file):
        try:
            with open(context_file, 'r') as f:
                context_data = json.load(f)
        except json.JSONDecodeError:
            context_data = []
    else:
        context_data = []

    context_data.append(entry)

    with open(context_file, 'w') as f:
        json.dump(context_data, f, indent=4)

# Function to remove matched content and tags from the response
def remove_matched_content(response, pattern):
    return re.sub(pattern, '', response, flags=re.DOTALL).strip()

# Parse the response for different blocks
answer_pattern = r'<answer>(.*?)</answer>'
execute_python_pattern = r'<execute_python>(.*?)</execute_python>'
search_pattern = r'<search>(.*?)</search>'
website_url_pattern = r'<website_url>(.*?)</website_url>'

# Extract and save answer block
answer_match = re.search(answer_pattern, final_response, re.DOTALL)
if answer_match:
    # Save the remaining content as assistant
    remaining_response = remove_matched_content(final_response, answer_pattern)
    save_to_context_file("assistant", "assistant" ,remaining_response)
    # Save content
    answer_content = answer_match.group(1).strip()
    save_to_context_file("assistant", "answer", answer_content)

# Extract and save execute_python block
execute_python_match = re.search(execute_python_pattern, final_response, re.DOTALL)
if execute_python_match:
    # Save the remaining content as assistant
    remaining_response = remove_matched_content(final_response, execute_python_pattern)
    save_to_context_file("assistant", "assistant" ,remaining_response)
    # Save content
    execute_python_content = execute_python_match.group(1).strip()
    save_to_context_file("assistant", "execute_python", execute_python_content)

# Extract and save search block
search_match = re.search(search_pattern, final_response, re.DOTALL)
if search_match:
    # Save the remaining content as assistant
    remaining_response = remove_matched_content(final_response, search_pattern)
    save_to_context_file("assistant", "assistant" ,remaining_response)
    # Save content
    search_content = search_match.group(1).strip()
    save_to_context_file("assistant", "search", search_content)

# Extract and save website_url block
website_url_match = re.search(website_url_pattern, final_response, re.DOTALL)
if website_url_match:
    # Save the remaining content as assistant
    remaining_response = remove_matched_content(final_response, website_url_pattern)
    save_to_context_file("assistant", "assistant" ,remaining_response)
    # Save content
    website_url_content = website_url_match.group(1).strip()
    save_to_context_file("assistant", "website_url", website_url_content)
# Check if none of the matches are found
if not website_url_match and not search_match and not execute_python_match:
    save_to_context_file("assistant", "message", final_response)

In [None]:
if search_match:
    url = f"https://s.jina.ai/{search_content}"
    headers = {
        "Authorization": f"Bearer {os.getenv('JINA_API_KEY')}",
        "X-Engine": "direct",
        "X-Retain-Images": "none"
    }
    response = requests.get(url, headers=headers)
    save_to_context_file("assistant", "search_result", response.text)

In [None]:
if website_url_match:
    url = f"https://r.jina.ai/{website_url_content}"
    headers = {
        "Authorization": f"Bearer {os.getenv('JINA_API_KEY')}"
    }
    response = requests.get(url, headers=headers)
    save_to_context_file("assistant", "website_url_result", response.text)

In [None]:
if execute_python_match:
    exec_result = container.exec_run(
        cmd=["python", "-c", execute_python_content], stdout=True, stderr=True
    )
    save_to_context_file("assistant", "execute_python_result", exec_result.output.decode())