In [4]:
import os
import subprocess

# autoreload any .py scripts
%load_ext autoreload
%autoreload 2

# set the project's root directory as the notebooks' working directory
git_root = subprocess.run(['git', 'rev-parse', '--show-toplevel'], capture_output=True, text=True).stdout.strip()
os.chdir(git_root)
print(f"current working directory: {os.getcwd()}")

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
current working directory: /Users/nilsgandlau/code/browser-automation


In [6]:
from vertexai.generative_models import GenerativeModel, Part
import google.generativeai as genai

image_path = "screenshots/79.jpg" # an image showing available/booked tennis courts
image_file = genai.upload_file(path=image_path)

In [7]:
model = genai.GenerativeModel(model_name="models/gemini-1.5-flash")

In [10]:
from dataclasses import dataclass

@dataclass
class QAPair:
    q: str
    a: str

qa1 = QAPair(
    q="how many tennis courts does the club have?",
    a="the tennis club has 17 courts."
)
qa2 = QAPair(
    q="are any courts free for 1h between 17:00 and 19:00?",
    a="available timeslots between 17:00 and 19:00 are: court 14 17:00-18:00, court 14 17:30-18:30, court 16 17:00-18:00.",
)
qa3 = QAPair(
    q="are any courts free for 1h in the timeslot 6-7pm?",
    a="yes. court 14 is free from 6-7pm"
)


In [16]:
from openai import OpenAI

client = OpenAI()

def check_answer(qa: QAPair, prompt: str, answer: str) -> str:
    system_prompt = f"""\
You are a helpful assistant. \
The user asked a question and received an answer from an AI system. \

Your task is to check whether the answer of the AI is factually correct, given the provided solution. \
Provide feedback and then grade the answer.
When the answer is wrong or bad, make a suggestion how the user can improve his prompt.

Answer in the following format:

## Feedback
Your provided feedback.
## Grade
Your given grade.
## Old Prompt
The old prompt the user used.
## Improved Prompt
An improved version of the prompt
"""
    user_prompt = f"""\
Question: {qa.q}
Prompt: {prompt}
Answer: {answer}
Solution: {qa.a}
"""
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ]
    )
    return response.choices[0].message.content

for qa in [qa1, qa2, qa3]:
    print("-"*80)
    print(f"Question:\n{qa.q}\nSolution:\n{qa.a}\n")
    response = model.generate_content([qa.q, image_file], request_options={"timeout": 120})
    answer = response.text
    print(f"AI:\n{answer}")
    criticism = check_answer(qa=qa, prompt=qa.q, answer=answer)
    print(f"Criticism:\n{criticism}")

--------------------------------------------------------------------------------
Question:
how many tennis courts does the club have?
Solution:
the tennis club has 17 courts.

AI:
The club has 17 tennis courts.
Criticism:
## Feedback
The answer provided by the AI is correct according to the solution given. Both the answer and the solution state that the club has 17 tennis courts.

## Grade
A

## Old Prompt
how many tennis courts does the club have?

## Improved Prompt
The original prompt is clear and does not require any improvement. However, if you would like to add some context or ensure clarity, you could specify the name of the club:

"How many tennis courts does [specific club name] have?"
--------------------------------------------------------------------------------
Question:
are any courts free for 1h between 17:00 and 19:00?
Solution:
available timeslots between 17:00 and 19:00 are: court 14 17:00-18:00, court 14 17:30-18:30, court 16 17:00-18:00.

AI:
No, there are no free c

In [8]:
qa4 = QAPair(
    q="which courts are booked in the timeslot 17:00-18:00.",
    a="""
**court 1**
17:00-17:30 (blocked)
17:30-18:00 (blocked)
**court 2**
17:00-17:30 (blocked)
17:30-18:00 (blocked)
**court 3**
17:00-17:30 (blocked)
17:30-18:00 (blocked)
**court 4**
17:00-17:30 (blocked)
17:30-18:00 (blocked)
**court 5**
17:00-17:30 (blocked)
17:30-18:00 (blocked)
**court 6**
17:00-17:30 (blocked)
17:30-18:00 (blocked)
**court 7**
17:00-17:30 (blocked)
17:30-18:00 (blocked)
**court 7**
17:00-17:30 (blocked)
17:30-18:00 (blocked)
**court 8**
17:00-17:30 (blocked)
17:30-18:00 (blocked)
**court 9**
17:00-17:30 (blocked)
17:30-18:00 (blocked)
**court 10**
17:00-17:30 (blocked)
17:30-18:00 (blocked)
**court 11**
17:00-17:30 (blocked)
17:30-18:00 (blocked)
**court 12**
17:00-17:30 (blocked)
17:30-18:00 (blocked)
**court 13**
17:00-17:30 (blocked)
17:30-18:00 (blocked)
**court 14**
17:00-17:30 (free)
17:30-18:00 (free)
**court 15**
17:00-17:30 (blocked)
17:30-18:00 (blocked)
**court 16**
17:00-17:30 (free)
17:30-18:00 (free)
**court 17**
17:00-17:30 (blocked)
17:30-18:00 (blocked)

All courts except 14 and 16 are blocked between 17:00 and 18:00.
"""
)

NameError: name 'QAPair' is not defined