<h1> Set-up </h1>

In [1]:
import os
import sys
import json
import openai
from langchain.tools import tool
from pydantic.v1 import BaseModel, Field
from IPython.display import display, HTML
from langchain.chat_models import ChatOpenAI
from langchain.schema.agent import AgentFinish
from langchain.prompts import ChatPromptTemplate
from langchain.prompts import MessagesPlaceholder
from langchain.agents.format_scratchpad import format_to_openai_functions
from langchain.agents.output_parsers import OpenAIFunctionsAgentOutputParser
from langchain_core.utils.function_calling import convert_to_openai_function
sys.path.append('../..')

In [2]:
# Setting your personal OPENAI Api Key
openai.api_key = os.environ['OPENAI_API_KEY']

<h1> Initializing Tools </h1>

In [3]:
class Design_logo_input(BaseModel):
    topic: str = Field(..., description="The topic for which the logo should be designed for. For example, if the topic were 'ice-cream shop' then the logo returned would be a logo fit for the ice-cream shop to put on their sign and merchandise.")

@tool(args_schema=Design_logo_input)
def design_logo_concepts(topic: str) -> str:
    """Returns the url for a logo design that is based on the provided topic"""
    # Currently a very empty function
    return f"s3://designs/logo_{len(topic)%5+1}.jpg"

In [4]:
class UI_Output_Request(BaseModel):
    inpt: str = Field(... , description=f"A description of the UI function you want and how you would call it along with the arguments you would use")

@tool(args_schema=UI_Output_Request)
def ui_output_request(inpt: str) -> str:
    """Provides a convenient way for you to describe what UI function you want and what to call it with without actually calling a UI function."""
    return inpt

In [5]:
class Product_UI_input(BaseModel):
    path: str = Field(... , description=f"The path of the image for the UI page. If no image is wanted or required provide an empty string.")

@tool(args_schema=Product_UI_input)
def produce_image_UI(path: str) -> str:
    """This is a UI function. Produces the JSON for a general UI page about an image."""
    data = {"component":"Image", "props":{"url":path}}
    return  json.dumps(data)            

<h1> Registering Tools </h1>

In [6]:
tools = [design_logo_concepts]
functions = [convert_to_openai_function(f) for f in tools]

In [7]:
ui_tools = [produce_image_UI]
ui_functions = [convert_to_openai_function(f) for f in ui_tools]

In [8]:
tool_dictionary = {
        "design_logo_concepts": design_logo_concepts
    }

In [9]:
ui_tool_dictionary = {
    "produce_image_UI": produce_image_UI
}

<h1> Setting up LLM Prompts </h1>

In [128]:
agent_system_message = "You are helpful assistant."

In [129]:
cognitivie_ui_instruction_I = "Should the output to the following prompt be a html page or text? Answer 'Yes' if you think a html page" +\
    " is an appropriate option and 'No' if text is enough. The prompt is as follows:"

In [130]:
cognitive_ui_instruction_UI = "Your task is to choose a function from the available options that best matches your ideal UI page" +\
    " that will serve as output for information from the scratch pad. Return a function call to it with the appropriate arguments" +\
    " and nothing else."

In [131]:
cognitive_ui_instruction_html = "Your task is to present the information from the scratch pad in html. Produce fully functioning" +\
    " and standalone html code (i.e. no references to images you don't have but feel free to use image references from the scratch" +\
    " pad but if there are no image references in the scratch pad don't make them up) that I can take and output immediately with no edits." +\
    "Return only the html code, and nothing else. Do not provide any commentary about the code or any of your commands."

In [132]:
cognitive_ui_instruction_text = "Answer what is in the scratchpad. Do not comment on this instruction."

<h1> Initializing Chains + Agent </h1>

In [133]:
gpt_version = "gpt-3.5-turbo"

In [134]:
init_prompt_model = ChatOpenAI(model_name= gpt_version, temperature=0).bind(functions=functions)
cognitive_ui_model = ChatOpenAI(model_name=gpt_version, temperature=0).bind(functions=ui_functions)
no_func_model = ChatOpenAI(model_name=gpt_version, temperature=0)

In [135]:
prompt = ChatPromptTemplate.from_messages([
    ("system", agent_system_message),
    ("user", "{input}"),
    MessagesPlaceholder(variable_name="agent_scratchpad")
])

In [136]:
init_prompt_chain = prompt | init_prompt_model | OpenAIFunctionsAgentOutputParser()
cognitivie_ui_chain =  prompt | cognitive_ui_model | OpenAIFunctionsAgentOutputParser()
no_func_chain = prompt | no_func_model | OpenAIFunctionsAgentOutputParser()

<h1> UI Output Generators </h1>

In [137]:
def produce_ui_output(user_input, intermediate_steps):
    result = cognitivie_ui_chain.invoke({
        "input": cognitive_ui_instruction_UI,
        "agent_scratchpad": format_to_openai_functions(intermediate_steps) + [user_input]
    })
    tool = ui_tool_dictionary[result.tool]
    observation = tool.run(result.tool_input)
    return observation

In [138]:
def produce_html(user_input, intermediate_steps):
    result = no_func_chain.invoke({
        "input": cognitive_ui_instruction_html,
        "agent_scratchpad": format_to_openai_functions(intermediate_steps) + [user_input]
    })
    return result

<h1> Agent Runnable </h1>

In [139]:
def run_agent(user_input, ui_producer=produce_ui_output):
    """
    Runs the UI agent which will return a tuple of (LLM Response, Is it calling a UI Function)
    """
    intermediate_steps = []
    result = init_prompt_chain.invoke({
        "input": user_input,
        "agent_scratchpad": format_to_openai_functions(intermediate_steps)
    })
    # Are we calling the functions?
    if not isinstance(result, AgentFinish):
        # Run first non-UI function
        tool = tool_dictionary[result.tool]
        observation = tool.run(result.tool_input)
        intermediate_steps.append((result, observation))
    # Cognitive UI Steps
    result = no_func_chain.invoke({
        "input": cognitivie_ui_instruction_I + user_input,
        "agent_scratchpad": []
    })
    print(result)
    if result.log.lower() == "yes":
        return ui_producer(user_input, intermediate_steps) , True
    else:
        result = no_func_chain.invoke({
            "input": cognitive_ui_instruction_text,
            "agent_scratchpad": [user_input]
        })
        return result.log, False
        

<h1>Testing</h1>

In [140]:
print(run_agent("Hello!"))

return_values={'output': 'No'} log='No'
('Hello! How can I assist you today?', False)


In [141]:
print(run_agent("Design me a big red logo"))

return_values={'output': 'Yes'} log='Yes'
('{"component": "Image", "props": {"url": "s3://designs/logo_3.jpg"}}', True)


In [142]:
print(run_agent("Tell me a bit about Cornell University, I hear they are also reffered to as Big Red."))

return_values={'output': 'No'} log='No'
('Cornell University is a prestigious Ivy League institution located in Ithaca, New York. It was founded in 1865 and is known for its exceptional academic programs, rigorous curriculum, and diverse student body. Cornell offers a wide range of undergraduate and graduate programs across various fields of study, including arts and sciences, engineering, business, agriculture, architecture, and more.\n\nThe nickname "Big Red" refers to Cornell\'s athletic teams, which compete in the NCAA Division I Ivy League conference. The university\'s sports teams are known for their competitive spirit and have a strong following among students and alumni. The nickname is also reflected in the university\'s official colors, which are carnelian red and white.\n\nCornell University is renowned for its commitment to research and innovation. It is home to numerous research centers and institutes, and its faculty and students have made significant contributions to var

In [143]:
print(run_agent("Make me a nice UI page about the movie 'Dune'"))

return_values={'output': 'Yes'} log='Yes'
('{"component": "Image", "props": {"url": "s3://designs/logo_5.jpg"}}', True)


<h1> Direct HTML Output by LLM </h1>

In [144]:
html_code = run_agent("Design me a big red logo", produce_html)[0].log
display(HTML(html_code))

return_values={'output': 'Yes'} log='Yes'


In [146]:
user_input = "Provide me with a colorful list of all the movies Christain Bale played in along with links" +\
    " to them in IMDB and pictures and a short description"
html_code = run_agent(user_input, produce_html)[0].log
display(HTML(html_code))

return_values={'output': 'Yes'} log='Yes'


In [149]:
user_input = "Make me a colorful ad for a milk chocolate product I am making."
print(html_code)
html_code = run_agent(user_input, produce_html)[0].log
display(HTML(html_code))

<!DOCTYPE html>
<html>
<head>
  <title>Milk Chocolate Ad</title>
  <style>
    body {
      background-color: #f5f5f5;
      font-family: Arial, sans-serif;
    }
    .container {
      max-width: 800px;
      margin: 0 auto;
      padding: 20px;
      background-color: #fff;
      box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
    }
    h1 {
      text-align: center;
      color: #8b4513;
    }
    .ad-image {
      display: block;
      margin: 20px auto;
      max-width: 600px;
    }
    .ad-text {
      text-align: center;
      color: #8b4513;
      font-size: 20px;
      margin-bottom: 20px;
    }
    .cta-button {
      display: inline-block;
      padding: 10px 20px;
      background-color: #8b4513;
      color: #fff;
      text-decoration: none;
      font-weight: bold;
      border-radius: 5px;
    }
  </style>
</head>
<body>
  <div class="container">
    <h1>Milk Chocolate</h1>
    <img src="s3://designs/logo_5.jpg" alt="Milk Chocolate" class="ad-image">
    <p class="ad-text">Ind

<h1>Conclusions</h1>

<h3> Analysis of following 'Should the output be UI?' (2/16/2024) </h3> 
I have discovered that giving ChatGPT a long and complicated write-up followed by options followed by the expectation to execute one of the options resulted in tremendously poor decision making (almost always choosing the first option). I have had more success with reducing the mental difficulty of each query into first a simple yes/no question and then dealing with each option separately depending on the answer. An interesting trait I've detected that even with the yes/no question ChatGPT is more likely to say no when the user direclty asks for a UI page/output and more likely to say yes to prompts asking for explanation/description (i.e. 'Tell me what Cornell is?'). In any case, the prompt's wording is incredibly important and has to be fine-tuned to not make the model always say 'Yes'(UI output would be better) as ChatGPT does have a bias towards choosing the UI output format. One reason for this is that ChatGPT has a pre-disposition to be helpful and optimistic so when your prompt includes phrases like 'benefits the user' (which mine does) it is more likely to agree as to benefit the user the most, even if it is overkill. That being said, it becomes very cautious on long prompts and is difficult to persuade to choose the UI option (Laziness?). Interestingly enough, it is more likely to say yes when it has some former function call in its scratchpad.
<br/><br/>
Interestingly enough, it is far more likely to say yes to a UI format for a complicated prompt when you make it pretend it is a customer. As soon as you put in a command word like 'Make' it largely ignores the Yes/No question and answers directly. Telling it is an all powerful LLM makes it say yes to the UI page every time.
<br/><br/>
The system message, I realized, is incredibly powerful and the LLM's answers depend significantly on what is in the system message.
<br/><br/>
Putting the prompt or scratchpad info at the end makes the LLM much more likely to answer the question at hand than start concentrating on what the scratchpad is asking. This indicates it places heavy emphasis on commands that come first.

<h3>Analysis of the initial attempt to make the LLM produce its own HTML code directly (2/16/2024)</h3>

In general it is able to construct a simple html file quite well. I had to give it explicit instructions to not comment or talk about the code as it has a big tendency to explain what it is doing (possibly a sign of its fine-tuned cautiosness). Even for a long prompt, the smallest and seemingly inconsequential changes made it change its html output completely and even though it was explicitly told to not hallucinate image references it struggled with this command in particular.
<br/><br/>
It also fails to grasp all the details of the prompt. If I say I want a colorful list then it tends to ignore it but has the ability to make very nice html pages that reflect the main points of my request (look at the Christian Bale output above). It is smart enough to make dummy buttons and knows not to make up random links to connect the button to.
<br/><br/>
Very impressively, even though it was never explicitly told to do so, it proactively made CSS styles.