# Day 3 - Conversational AI - aka Chatbot!

In [15]:
# imports

import os
from dotenv import load_dotenv
from openai import OpenAI
import gradio as gr

In [2]:
# Load environment variables in a file called .env
# Print the key prefixes to help with any debugging

load_dotenv()
openai_api_key = os.getenv('OPENAI_API_KEY')
anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')
google_api_key = os.getenv('GOOGLE_API_KEY')

if openai_api_key:
    print(f"OpenAI API Key exists and begins {openai_api_key[:8]}")
else:
    print("OpenAI API Key not set")
    
if anthropic_api_key:
    print(f"Anthropic API Key exists and begins {anthropic_api_key[:7]}")
else:
    print("Anthropic API Key not set")

if google_api_key:
    print(f"Google API Key exists and begins {google_api_key[:8]}")
else:
    print("Google API Key not set")

OpenAI API Key exists and begins sk-proj-
Anthropic API Key exists and begins sk-ant-
Google API Key exists and begins AIzaSyB5


In [3]:
# Initialize

openai = OpenAI()
MODEL = 'gpt-4o-mini'

In [4]:
system_message = "You are a helpful assistant"

# Please read this! A change from the video:

In the video, I explain how we now need to write a function called:

`chat(message, history)`

Which expects to receive `history` in a particular format, which we need to map to the OpenAI format before we call OpenAI:

```
[
    {"role": "system", "content": "system message here"},
    {"role": "user", "content": "first user prompt here"},
    {"role": "assistant", "content": "the assistant's response"},
    {"role": "user", "content": "the new user prompt"},
]
```

But Gradio has been upgraded! Now it will pass in `history` in the exact OpenAI format, perfect for us to send straight to OpenAI.

So our work just got easier!

We will write a function `chat(message, history)` where:  
**message** is the prompt to use  
**history** is the past conversation, in OpenAI format  

We will combine the system message, history and latest message, then call OpenAI.

In [5]:
# Simpler than in my video - we can easily create this function that calls OpenAI
# It's now just 1 line of code to prepare the input to OpenAI!

def chat(message, history):
    messages = [{"role": "system", "content": system_message}] + history + [{"role": "user", "content": message}]

    print("History is:")
    print(history)
    print("And messages is:")
    print(messages)

    stream = openai.chat.completions.create(model=MODEL, messages=messages, stream=True)

    response = ""
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        yield response

## And then enter Gradio's magic!

In [13]:
gr.ChatInterface(fn=chat, type="messages").launch(share=True)

* Running on local URL:  http://127.0.0.1:7877
* Running on public URL: https://26f1b1cb1c1ddfa769.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [9]:
system_message = "You are a helpful assistant in a clothes store. You should try to gently encourage \
the customer to try items that are on sale. Hats are 60% off, and most other items are 50% off. \
For example, if the customer says 'I'm looking to buy a hat', \
you could reply something like, 'Wonderful - we have lots of hats - including several that are part of our sales evemt.'\
Encourage the customer to buy hats if they are unsure what to get."

In [11]:
def chat(message, history):
    messages = [{"role": "system", "content": system_message}] + history + [{"role": "user", "content": message}]

    stream = openai.chat.completions.create(model=MODEL, messages=messages, stream=True)

    response = ""
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        yield response

In [None]:
gr.ChatInterface(fn=chat, type="messages").launch()

In [10]:
system_message += "\nIf the customer asks for shoes, you should respond that shoes are not on sale today, \
but remind the customer to look at hats!"

In [None]:
gr.ChatInterface(fn=chat, type="messages").launch()

In [12]:
# Fixed a bug in this function brilliantly identified by student Gabor M.!
# I've also improved the structure of this function

def chat(message, history):

    relevant_system_message = system_message
    if 'belt' in message:
        relevant_system_message += " The store does not sell belts; if you are asked for belts, be sure to point out other items on sale."
    
    messages = [{"role": "system", "content": relevant_system_message}] + history + [{"role": "user", "content": message}]

    stream = openai.chat.completions.create(model=MODEL, messages=messages, stream=True)

    response = ""
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        yield response

In [None]:
gr.ChatInterface(fn=chat, type="messages").launch()

<table style="margin: 0; text-align: left;">
    <tr>
        <td style="width: 150px; height: 150px; vertical-align: middle;">
            <img src="../business.jpg" width="150" height="150" style="display: block;" />
        </td>
        <td>
            <h2 style="color:#181;">Business Applications</h2>
            <span style="color:#181;">Conversational Assistants are of course a hugely common use case for Gen AI, and the latest frontier models are remarkably good at nuanced conversation. And Gradio makes it easy to have a user interface. Another crucial skill we covered is how to use prompting to provide context, information and examples.
<br/><br/>
Consider how you could apply an AI Assistant to your business, and make yourself a prototype. Use the system prompt to give context on your business, and set the tone for the LLM.</span>
        </td>
    </tr>
</table>

In [34]:
# Define your function
def build_prompt(user_description, num_rows, fields):
    # Convert the list of lists to a list of dictionaries for easier handling
    fields_list = [{"Field Name": row[0], "Data Type": row[1]} for row in fields]
    fields_str = ', '.join([f"{field['Field Name']} ({field['Data Type']})" for field in fields_list])

    return f"""
    You are a data generator. You will generate a JSON array of objects, each representing a row of synthetic data. 
    Follow these rules:
    - Output valid JSON.
    - Generate {num_rows} rows.
    - Each row contains the following fields: {fields_str}.

    Description from user:
    {user_description}
    """

# Create custom component for the Dataframe with Dropdown
def DataframeWithDropdown(headers, datatype, label, row_count):
    with gr.Row():
        with gr.Column():
            gr.Markdown(label)  # Display the label
        with gr.Column():
            output = gr.Dataframe(headers=headers, datatype=datatype, row_count=row_count)
            
            # Add a dropdown to the second column of each row
            for i in range(row_count[0]):  # Iterate through initial rows
                with output.style(i, 1):  # Select cell (row i, column 1)
                    gr.Dropdown(["string", "integer", "float"], label="Data Type") 
            
            return output

# Create the Gradio interface
iface = gr.Interface(
    fn=build_prompt,
    inputs=[
        gr.Textbox(label="User Description"),
        gr.Number(label="Number of Rows", value=10),
        DataframeWithDropdown(  # Use the custom component
            headers=["Field Name", "Data Type"],
            datatype=["str", "str"],  # Initial datatype for Dataframe
            label="Fields",
            row_count=(1, "dynamic")
        )
    ],
    outputs=gr.Textbox(label="Prompt")
)

# Launch the interface
iface.launch()

AttributeError: 'Dataframe' object has no attribute 'style'

In [37]:
def build_schema(fields):
    """
    Dynamically create a JSON Schema based on the user's field specifications.
    
    fields: list of [field_name, field_type]
    
    Returns: dict (JSON Schema)
    """
    properties = {}
    required_fields = []

    # Map from user input to JSON schema types
    type_map = {
        "string": "string",
        "integer": "integer",
        "float": "number"
    }

    for row in fields:
        if len(row) == 2:
            field_name, field_type = row
            if field_name and field_type:
                required_fields.append(field_name)
                properties[field_name] = {
                    "type": type_map.get(field_type, "string")  # default to "string" if unknown
                }

    schema = {
        "type": "array",
        "items": {
            "type": "object",
            "properties": properties,
            "required": required_fields
        }
    }

    return schema


In [38]:
import json

def build_prompt_with_schema(user_description: str, num_rows: int, fields: list) -> str:
    """
    Construct a prompt to generate synthetic data in JSON format, using a generated JSON schema.
    """
    # Create the JSON schema from the fields
    schema = build_schema(fields)
    
    # Convert schema to JSON string so we can embed it in the prompt
    schema_json_str = json.dumps(schema, indent=2)
    
    prompt = f"""
You are a data generation assistant. Please generate a JSON array with exactly {num_rows} objects.

Each object must conform to the following JSON schema:

{schema_json_str}

Description of the scenario:
{user_description}

Output requirements:
- Output valid JSON only (no code fences or commentary).
- Make sure the JSON array (list) is valid according to the schema.
"""
    return prompt.strip()


In [42]:
import gradio as gr
from jsonschema import validate, ValidationError
import json

user_prompt = None

def build_schema(fields):
    """
    Dynamically create a JSON Schema based on the user's field specifications.
    """
    properties = {}
    required_fields = []

    type_map = {
        "string": "string",
        "integer": "integer",
        "float": "number"
    }

    for row in fields:
        if len(row) == 2:
            field_name, field_type = row
            if field_name and field_type:
                required_fields.append(field_name)
                properties[field_name] = {
                    "type": type_map.get(field_type, "string")
                }

    schema = {
        "type": "array",
        "items": {
            "type": "object",
            "properties": properties,
            "required": required_fields
        }
    }

    return schema

def build_prompt_with_schema(user_description: str, num_rows: int, fields: list) -> str:
    global user_prompt
    schema = build_schema(fields)
    schema_json_str = json.dumps(schema, indent=2)
    
    user_prompt = f"""
You are a data generation assistant. Please generate a JSON array with exactly {num_rows} objects.

Each object must conform to the following JSON schema:

{schema_json_str}

Description of the scenario:
{user_description}

Output requirements:
- Output valid JSON only (no code fences or commentary).
- Make sure the JSON array (list) is valid according to the schema.
"""
    return user_prompt.strip()

def generate_prompt(user_description, num_rows, fields):
    # Build the prompt that includes the schema
    return build_prompt_with_schema(user_description, num_rows, fields)


# OPTIONAL: A function to validate the LLM’s output (if you choose to call the LLM within your Gradio app)
def validate_llm_output(raw_output: str, fields: list):
    """
    Attempt to parse `raw_output` as JSON and validate against the schema.
    Returns a message indicating success or failure.
    """
    schema = build_schema(fields)
    try:
        data = json.loads(raw_output)
        validate(instance=data, schema=schema)
        return "Data is valid!"
    except (json.JSONDecodeError, ValidationError) as e:
        return f"Data is invalid! Error: {str(e)}"

# ---- Gradio UI Setup ----
with gr.Blocks() as demo:
    gr.Markdown("## Synthetic Data Prompt Builder with JSON Schema")
    
    user_desc = gr.Textbox(
        label="Data Description", 
        placeholder="Describe the type of data to generate..."
    )
    rows_count = gr.Number(
        label="Number of Rows", 
        value=10, 
        precision=0
    )
    
    fields_table = gr.Dataframe(
        headers=["Field Name", "Data Type"],
        row_count=3,
        col_count=2,
        type="array",          
        interactive=True,
        label="Fields (add rows). Data Type: string, integer, or float"
    )
    
    generate_button = gr.Button("Generate Prompt")
    output_prompt = gr.Textbox(
        label="Constructed Prompt (Schema Included)", 
        lines=15,
        interactive=False
    )

    generate_button.click(
        fn=generate_prompt,
        inputs=[user_desc, rows_count, fields_table],
        outputs=output_prompt
    )

    validate_input_box = gr.Textbox(
        label="LLM Output to Validate (JSON)",
        lines=10
    )
    validate_button = gr.Button("Validate Output")

    validation_result = gr.Textbox(label="Validation Result")

    def validate_wrapper(fields, raw_json):
        return validate_llm_output(raw_json, fields)

    validate_button.click(
        fn=validate_wrapper,
        inputs=[fields_table, validate_input_box],
        outputs=[validation_result]
    )


demo.launch()


* Running on local URL:  http://127.0.0.1:7898

To create a public link, set `share=True` in `launch()`.




In [36]:
import gradio as gr

def build_prompt(user_description: str, num_rows: int, fields: list) -> str:
    """
    Construct a prompt to generate synthetic data in JSON format.
    
    :param user_description: A short text describing the scenario or type of data (e.g., "e-commerce customers").
    :param num_rows: Number of JSON objects (rows) to generate.
    :param fields: List of [field_name, data_type] from the dataframe.
    :return: A formatted string that instructs a data-generating model to create the specified data.
    """
    # Convert fields (list of lists) to a list of dicts for easy handling
    fields_info = []
    for row in fields:
        if len(row) == 2:
            field_name, field_type = row
            fields_info.append({"field_name": field_name, "field_type": field_type})
    
    # Build a “fields spec” string for the prompt
    # e.g.: 
    #   - name: string
    #   - age: integer
    #   - salary: float
    fields_spec = "\n".join([f"- {f['field_name']}: {f['field_type']}" for f in fields_info])

    prompt = f"""
You are a data generation assistant. Generate a JSON array with exactly {num_rows} objects.
Each object should match the following specification:

{fields_spec}

Description:
{user_description}

Formatting instructions:
- Output valid JSON only (no code fences or commentary).
- Each object should have all of the fields listed above.
"""
    return prompt.strip()

def generate_prompt(user_description, num_rows, fields):
    """
    Wrapper function to pass inputs into build_prompt.
    """
    return build_prompt(user_description, num_rows, fields)

with gr.Blocks() as demo:
    gr.Markdown("## Synthetic Data Prompt Builder")
    
    user_desc = gr.Textbox(
        label="Data Description", 
        placeholder="Describe the type of data to generate..."
    )
    rows_count = gr.Number(
        label="Number of Rows", 
        value=10, 
        precision=0
    )
    
    # DataFrame with 2 columns: Field Name, Data Type
    # Users can add more rows as needed
    fields_table = gr.Dataframe(
        headers=["Field Name", "Data Type"],
        row_count=3,
        col_count=2,
        type="array",          # returns a list of lists
        interactive=True,
        label="Fields (add more rows as needed). Data Type: string, integer, or float"
    )
    
    generate_button = gr.Button("Generate Prompt")
    output_prompt = gr.Textbox(
        label="Constructed Prompt", 
        lines=10,
        interactive=False
    )

    # When user clicks, build the prompt and display it
    generate_button.click(
        fn=generate_prompt,
        inputs=[user_desc, rows_count, fields_table],
        outputs=output_prompt
    )

demo.launch()


* Running on local URL:  http://127.0.0.1:7894

To create a public link, set `share=True` in `launch()`.




In [44]:
print(user_prompt)


You are a data generation assistant. Please generate a JSON array with exactly 10 objects.

Each object must conform to the following JSON schema:

{
  "type": "array",
  "items": {
    "type": "object",
    "properties": {
      "dsfdsf": {
        "type": "string"
      },
      "hgfgfh": {
        "type": "integer"
      },
      "ioiuol": {
        "type": "number"
      }
    },
    "required": [
      "dsfdsf",
      "hgfgfh",
      "ioiuol"
    ]
  }
}

Description of the scenario:
efdfs

Output requirements:
- Output valid JSON only (no code fences or commentary).
- Make sure the JSON array (list) is valid according to the schema.



In [None]:
print()