# DALLE-3

In [1]:
import os

os.chdir("../../")

In [2]:
from langchain.chat_models import ChatOpenAI

from src.initialization import credential_init


credential_init()

model = ChatOpenAI(openai_api_key=os.environ['OPENAI_API_KEY'],
                   model_name="gpt-4o-2024-05-13", temperature=0)

  warn_deprecated(


### OpenAI Image API Parameters:

- model: dall-e-3
- size (str): 1024x1024, 1024x1792, 1792x1024
- quality: hd, standard
- style: vivid, natural. Default vivid

In [3]:
from openai import OpenAI

prompt = """
A Sumi-e style watercolor painting of mountains during sunset. The sky is depicted with bold splashes of orange, pink, and purple hues, 
blending and overlapping in a dynamic composition. The mountains are represented with expressive brushstrokes, emphasizing their majestic and serene presence. 
The focus is on capturing the essence and mood of the scene rather than detailed realism. The overall effect is serene and contemplative, with a harmonious 
balance of color and form.
"""

client = OpenAI()

response = client.images.generate(
    model="dall-e-3",
    prompt=prompt,
    size="1024x1024",
    quality="hd",
    n=1,
    response_format = 'b64_json'
)

image_base64 = response.data[0].b64_json

## Save the image in your local computer

In [4]:
import base64

with open("tutorial/Week-8/test.png", "wb") as fh:
    fh.write(base64.b64decode(image_base64))

## Two Challenges:

### 1. How to create prompt more efficiently? 

There are two types of prompt: 

1. Danbooru Tag: masterpiece, best quality, beautiful eyes, clear eyes, detailed eyes, Blue-eyes, 1girl, 20_old, full-body, break, smoking, break, high_color, blue-hair, beauty, black-boots,break, break, Flat vector art, Colorful art, white_shirt, simple_background, blue_background, Ink art, peeking out upper body, Eyes

2. Narutal language: A Sumi-e style watercolor painting of mountains during sunset. The sky is depicted with bold splashes of orange, pink, and purple hues, blending and overlapping in a dynamic composition. The mountains are represented with expressive brushstrokes,emphasizing their majestic and serene presence. The focus is on capturing the essence and mood of the scene rather than detailed realism. The overall effect is serene and contemplative, with a harmonious balance of color and form.

As non-native English speakers, we find the natural language prompt challenging, even for native speakers, due to the inclusion of specialized terminologies and advanced vocabulary.

由於涉及專業術語和高級詞彙，我們作為非母語英語使用者，發現這個自然語言提示對我們來說是具有挑戰性的，即使對母語使用者來說也是如此。

### 2. How to make it an LCEL?

## Some websites for natural language prompt

- https://leonardo.ai/: An Image generation SaaS. A lot of works are created with natural language prompt. 
- https://blog.mlq.ai/dalle-prompts/: Some tutorial about how to come up with a natural language prompt.

### Natural Language Prompt Generation

In [5]:
from langchain.prompts import PromptTemplate, ChatPromptTemplate, HumanMessagePromptTemplate, SystemMessagePromptTemplate
from langchain_core.output_parsers import StrOutputParser


system_prompt = PromptTemplate.from_template('''You are a helpful AI assistant and an art expert with extensive knowledge of photography and illustration. 
You excel at creating breathtaking masterpieces using the DALLE-3 model. For this task, you will be provided with a description of an image, and you will 
generate a corresponding DALLE-3 prompt. The prompt should be detailed and descriptive, capturing the essence of the image. The length of the prompt should be 
around 100-500 tokens.''')

# System prompt
system_message = SystemMessagePromptTemplate(prompt=system_prompt)

human_prompt = PromptTemplate(template="{image_desc}",
                              input_variables=['image_desc'])

# Create a human message prompt template based on the prompt
human_message = HumanMessagePromptTemplate(prompt=human_prompt)

chat_prompt = ChatPromptTemplate.from_messages([system_message, human_message])

nl_prompt_generation_chain = chat_prompt | model | StrOutputParser()

In [6]:
from typing import Dict


def dalle3_worker(kwargs: Dict):

    """
    Generates an image using OpenAI's DALL-E 3 model based on the provided prompt and optional parameters.
    
    Parameters:
    kwargs (Dict): A dictionary containing the following keys:
        - 'nl_prompt' (str): The natural language prompt describing the image to be generated.
        - 'size' (str, optional): The size of the generated image. Default is "1024x1024".
        - 'quality' (str, optional): The quality of the generated image. Default is "standard".
        - 'style' (str, optional): The style of the generated image. Default is "vivid".
    
    Returns:
    str: The URL of the generated image.
    
    Example:
    >>> kwargs = {
    ...     "nl_prompt": "A futuristic city skyline at sunset",
    ...     "size": "1024x1024",
    ...     "quality": "high",
    ...     "style": "photorealistic"
    ... }
    >>> image_base64 = dalle3_worker(kwargs)
    Start generating image...
    prompt: A futuristic city skyline at sunset
    generated_image: https://example.com/generated_image.png
    >>> print(image_base64)
    https://example.com/generated_image.png
    """
    
    print("Start generating image...")
    print(f"prompt: {kwargs['nl_prompt']}")
    client = OpenAI()

    response = client.images.generate(
        model="dall-e-3",
        prompt=kwargs['nl_prompt'],
        size=kwargs.get("size", "1024x1024"),
        quality=kwargs.get('quality', 'standard'),
        style=kwargs.get('style', 'vivid'),
        n=1,
        response_format = 'b64_json')

    image_base64 = response.data[0].b64_json

    print("Image is generated succesfully.")
    
    return image_base64


def base64_to_file(kwargs):

    image_base64 = kwargs['image_base64']
    filename = kwargs['filename']
    
    with open(f"{filename}", "wb") as fh:
        fh.write(base64.b64decode(image_base64))
    

In [7]:
from operator import itemgetter

from langchain_core.runnables import RunnableLambda, RunnableParallel, RunnablePassthrough

step_1 = RunnablePassthrough.assign(nl_prompt=itemgetter('image_desc')|nl_prompt_generation_chain)

step_2 = RunnablePassthrough.assign(image_base64=dalle3_worker)

step_3 = RunnableLambda(base64_to_file)

dalle3_chain =  step_1|step_2|step_3

In [8]:
dalle3_chain.invoke({"size": "1024x1792",
                     "quality": "hd",
                     "image_desc": """
                                     masterpiece, best quality, beautiful eyes, clear eyes, detailed eyes, Blue-eyes, 1girl, 20_old, full-body, 
                                     break, smoking, break, high_color, blue-hair, beauty, black-boots,break, break, Flat vector art, Colorful art, white_shirt, 
                                     simple_background, blue_background, Ink art,peeking out upper body,Eyes, portrait
                                     """,
                     
                     "filename": "tutorial/Week-8/test_01.png"
                    })

Start generating image...
prompt: Create a stunning flat vector art illustration of a 20-year-old woman with striking blue eyes and detailed, clear eyes. She has beautiful blue hair that complements her eyes. The scene captures her during a break, smoking a cigarette. She is dressed in a white shirt and black boots, exuding a sense of casual beauty. The background is simple and blue, allowing her to stand out. The artwork should be colorful and high-quality, with a focus on her eyes and facial features. The composition should be a full-body portrait, with her peeking out from the upper body, creating an engaging and dynamic visual. The style should blend elements of ink art with vibrant colors, resulting in a masterpiece that highlights her beauty and the serene moment of her break.
Image is generated succesfully.


In [9]:
dalle3_chain.invoke({"size": "1024x1792",
                     "quality": "hd",
                     "image_desc": """
                                 close-up portrait, black fox ears, animal ear fluff, black fox tail, black hair, red inner hair, hair ornament, 
                                 magatama necklace, fur trim, black short kimono, exquisite design, cat_collar, off-shoulder,wide sleeves, 
                                 long sleeves, obi, miniskirt, perfect model body, a 17-years-old ethereal and glamorously beautiful girl, from above, 
                                 eating donut, holding a donut, a large cup of coffee on table, in a coffee shop, pencil sketch, perfect detail, intricate detail, 
                                 masterpiece, best quality, beauty & aesthetic, sketch
                                 """,
                     "filename": "tutorial/Week-8/test_02.png"
                    })


Start generating image...
prompt: Create a pencil sketch of an ethereal and glamorously beautiful 17-year-old girl with a perfect model body, captured from above in a coffee shop. She has black fox ears with fluffy fur, a black fox tail, and black hair with striking red inner highlights. Her hair is adorned with a delicate ornament, and she wears a magatama necklace. Her outfit is a black short kimono with an exquisite design, featuring off-shoulder wide sleeves, long sleeves, and a fur trim. The kimono is paired with an obi and a miniskirt, and she also wears a cat collar. The girl is holding a donut and eating it, with a large cup of coffee on the table in front of her. The sketch should be highly detailed and intricate, capturing the beauty and aesthetic of the scene with perfect detail and best quality.


BadRequestError: Error code: 400 - {'error': {'code': 'content_policy_violation', 'message': 'This request has been blocked by our content filters.', 'param': None, 'type': 'invalid_request_error'}}

### There is censorship in OpenAI...so I do not like it that much.

In [10]:
dalle3_chain.invoke({"size": "1024x1024",
                     "quality": "hd",
                     "image_desc": """
                                 close-up portrait, black fox ears, animal ear fluff, black fox tail, black hair, red inner hair, hair ornament, 
                                 magatama necklace, fur trim, black short kimono, exquisite design, cat_collar, off-shoulder,wide sleeves, 
                                 long sleeves, obi, miniskirt, perfect model body, a 17-years-old ethereal and glamorously beautiful girl, from above, 
                                 eating donut, holding a donut, a large cup of coffee on table, in a coffee shop, pencil sketch, perfect detail, intricate detail, 
                                 masterpiece, best quality, beauty & aesthetic, sketch
                                 """,
                     "filename": "tutorial/Week-8/test_03.png"
                    })

Start generating image...
prompt: A breathtaking pencil sketch of an ethereal and glamorously beautiful 17-year-old girl with a perfect model body, captured in a close-up portrait from above. She has striking black fox ears with fluffy fur, a black fox tail, and long, flowing black hair with vibrant red inner highlights. Her hair is adorned with an elegant ornament, and she wears a magatama necklace that adds a touch of mystique. Her attire is a meticulously designed black short kimono with exquisite fur trim, featuring wide, long sleeves and an off-shoulder style that reveals her delicate collarbones. The kimono is paired with an obi and a miniskirt, enhancing her graceful appearance. She holds a donut in one hand, about to take a bite, while a large cup of coffee sits on the table in front of her in a cozy coffee shop setting. The sketch is rendered with perfect and intricate detail, capturing the beauty and aesthetic of the scene, making it a true masterpiece.
Image is generated suc

## Use this as a tool for Agent

In [18]:
from langchain.agents import Tool, AgentExecutor, create_react_agent
from langchain.prompts import PromptTemplate
from langchain.tools import BaseTool
from langchain.output_parsers import StructuredOutputParser, ResponseSchema

from src.agent.react_zero_shot import prompt_template as zero_shot_prompt_template

# We need both the query and filename (minimal requirement):
# Some control variables
# What we learned last week?


class ImageTool(BaseTool):

    name = "Image generator with DALLE-3"

    input_response_schemas = [
        ResponseSchema(name="image_desc", description="image description / prompt"),
        ResponseSchema(name="filename", description="the location at which the image will be saved"),
        ResponseSchema(name="size", description="image size, can be `1024x1024`, `1024x1792`, `1792x1024`"),
        ResponseSchema(name="quality", description="image quality, can be `hd` or `standard`"),
        ResponseSchema(name="style", description="image style, can be `vivid` or `natural`")]
    
    input_output_parser = StructuredOutputParser.from_response_schemas(input_response_schemas)
    
    input_format_instructions = input_output_parser.get_format_instructions()

    description_template = """
                           Use this tool when you need to create an image:
                           input format_instructions: {input_format_instructions}
                           """

    description = description_template.format(input_format_instructions=input_format_instructions)
    
    def _run(self, query):

        input_ = self.input_output_parser.parse(query)
        
        image_desc = input_['image_desc']
        size = input_['size']
        quality = input_['quality']
        style = input_['style']
        filename = input_['filename']
        
        dalle3_chain.invoke({"image_desc": image_desc,
                             "size": size,
                             "quality": quality,
                             "style": style,
                             "filename": filename})
        
        return "Done"

    def _arun(self, radius: int):
        raise NotImplementedError("This tool does not support async")

# Zero Shot 標準模板
prompt = PromptTemplate.from_template(zero_shot_prompt_template)

# 建立工具庫 
tools = [ImageTool()]

# 創造Agent 
zero_shot_agent = create_react_agent(
    llm=model,
    tools=tools,
    prompt=prompt,
)

# 創造Agent Executor
agent_executor = AgentExecutor(agent=zero_shot_agent, tools=tools, verbose=True)

In [19]:
image_prompt = """
brown hair, bangs, two side up, twin ponytails, sidelocks, black hat, jewelry, black cheongsam, intricate golden embroidery, long sleeves,
wide sleeves, black shorts, hat ornament, hat flower, a 17-years-old ethereal and glamorous beautiful japanese idol,
translucent skin tone, profound facial features, bright eyes, faint rosy blush, ultra realistic, raw photo, award-winning photo, masterpiece, 
best quality, high resolution, official art, 8k uhd, high fidelity, depth of field, on the top of a skyscaper, mesmetizing city view, 
night
"""

filename = "tutorial/Week-8/test_04.png"

In [20]:
agent_executor.invoke({"input": f"Generate in image with the following information: \n {image_prompt}. and save the image at {filename}"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mTo generate the requested image, I will use the Image generator with DALLE-3 tool. The image will feature a 17-year-old ethereal and glamorous beautiful Japanese idol with specific physical attributes and attire, set against a mesmerizing city view at night from the top of a skyscraper.

Action: Image generator with DALLE-3

Action Input:
```json
{
	"image_desc": "A 17-year-old ethereal and glamorous beautiful Japanese idol with brown hair, bangs, two side up, twin ponytails, sidelocks, wearing a black hat with a hat ornament and hat flower, jewelry, and a black cheongsam with intricate golden embroidery, long wide sleeves, and black shorts. She has translucent skin tone, profound facial features, bright eyes, and a faint rosy blush. The image is ultra realistic, raw photo, award-winning photo, masterpiece, best quality, high resolution, official art, 8k uhd, high fidelity, depth of field. She is standing on the top of a skys

{'input': 'Generate in image with the following information: \n \nbrown hair, bangs, two side up, twin ponytails, sidelocks, black hat, jewelry, black cheongsam, intricate golden embroidery, long sleeves,\nwide sleeves, black shorts, hat ornament, hat flower, a 17-years-old ethereal and glamorous beautiful japanese idol,\ntranslucent skin tone, profound facial features, bright eyes, faint rosy blush, ultra realistic, raw photo, award-winning photo, masterpiece, \nbest quality, high resolution, official art, 8k uhd, high fidelity, depth of field, on the top of a skyscaper, mesmetizing city view, \nnight\n. and save the image at tutorial/Week-8/test_04.png',
 'output': 'The image has been generated and saved at `tutorial/Week-8/test_04.png`.'}

## ACG Characters



In [21]:
from langchain.utilities.tavily_search import TavilySearchAPIWrapper
from langchain.tools.tavily_search import TavilySearchResults

search = TavilySearchAPIWrapper()
tavily_tool = TavilySearchResults(api_wrapper=search)

outputs = tavily_tool.invoke("What is the appearance of Hutou from Genshin")

In [22]:
outputs[0]['content']

"Hu Tao[Note 2] (Chinese: 胡桃 Hú Táo) is a playable Pyro character in Genshin Impact. Hu Tao's antics and eccentricity belies her role as the 77th Director of the Wangsheng Funeral Parlor and her talent as a poet. Nevertheless, she treats the parlor's operations with utmost importance, and holds funeral ceremonies with the highest dignity and solemnity. Toggle Ascension MaterialsTotal Cost"

# **** 預計第一個小時結束 ****

## LCEL ACG character appearance chain

In [None]:
from langchain.docstore.document import Document
from langchain.agents import Tool, AgentExecutor, create_react_agent
from langchain.tools import BaseTool
from langchain.prompts import PromptTemplate, ChatPromptTemplate, HumanMessagePromptTemplate, SystemMessagePromptTemplate
from langchain_core.output_parsers import StrOutputParser


def tavily_content_parser(outputs):

    """
    Parses the content from a list of outputs into Document objects.
    
    Parameters:
    outputs (list): A list of dictionaries where each dictionary contains a 'content' key 
                    representing the content of a document.
    
    Returns:
    list: A list of Document objects, each initialized with the content from the 'outputs'.
    
    Example:
    >>> outputs = [
    ...     {'content': 'This is document 1 content.'},
    ...     {'content': 'This is document 2 content.'}
    ... ]
    >>> documents = tavily_content_parser(outputs)
    >>> for doc in documents:
    ...     print(doc.page_content)
    This is document 1 content.
    This is document 2 content.
    """
    
    documents = [Document(page_content=output['content']) for output in outputs]
    
    return documents


system_prompt = PromptTemplate.from_template('''You are a helpful AI assistant with deep knowledge of anime, manga, 
and mobile games. You will generate the face, body, attire, hairstyle, and accessories of an character in great 
detail with data provided from the `context` The output should look like:

 - Face:
 - Body:
 - Attire:
 - Hairstyle:
 - Accessories:

''')

system_message = SystemMessagePromptTemplate(prompt=system_prompt)


human_prompt = PromptTemplate(template="context: {context}",
                                  input_variables=['context'])

# Create a human message prompt template based on the prompt
human_message = HumanMessagePromptTemplate(prompt=human_prompt)

# Create a chat prompt template from system and human message prompt templates
chat_prompt = ChatPromptTemplate.from_messages([system_message, human_message])

# Construct the processing chain
step_1 = {'context': tavily_tool|tavily_content_parser}
step_2 = chat_prompt
step_3 = model
step_4 = StrOutputParser()
acg_chain = step_1 | step_2 | step_3 | step_4



In [None]:
acg_chain.invoke("What is the appearance of Hutou from Genshin")

In [23]:
class ACGLLMTool(BaseTool):

    name = "`Anime character design generator`"
    description = "Use this tool to generate and explore detailed designs for anime and ACG (Animation, Comics, and Games) characters."

    def _run(self, query: str):
        
        description = acg_chain.invoke(query)
        
        return description

    def _arun(self, radius: int):
        raise NotImplementedError("This tool does not support async")
        
        
class ImageTool(BaseTool):

    name = "ACG characters image generator with DALLE-3"

    input_response_schemas = [
        ResponseSchema(name="image_desc", description="image description / prompt"),
        ResponseSchema(name="filename", description="the location at which the image will be saved"),
        ResponseSchema(name="size", description="image size, can be `1024x1024`, `1024x1792`, `1792x1024`"),
        ResponseSchema(name="quality", description="image quality, can be `hd` or `standard`"),
        ResponseSchema(name="style", description="image style, can be `vivid` or `natural`")]
    
    input_output_parser = StructuredOutputParser.from_response_schemas(input_response_schemas)
    
    input_format_instructions = input_output_parser.get_format_instructions()

    description_template = """
                           This is a tool for creating images. 
                           It's best used when you're considering the need for an ACG (anime, comics, games) character design. 
                           Before using this tool, you may want to utilize the `Anime character design generator` to gather 
                           relevant information. The generated image will maintain the specified art style. 
                           input format: {input_format_instructions}
                           """

    description = description_template.format(input_format_instructions=input_format_instructions)



    
    def _run(self, query):
        
        input_ = self.input_output_parser.parse(query)
        
        image_desc = input_['image_desc']
        size = input_['size']
        quality = input_['quality']
        style = input_['style']
        filename = input_['filename']
        
        dalle3_chain.invoke({"image_desc": image_desc,
                             "size": size,
                             "quality": quality,
                             "style": style,
                             "filename": filename})
        
        return "Done"

    def _arun(self, radius: int):
        raise NotImplementedError("This tool does not support async")

        
prompt_template = """
Answer the following questions as best you can. You have access to the following tools:

{tools}

Use the following format:

Question: the input question you must answer

Thought: you should always think about what to do

Action: the action to take, should be one of [{tool_names}]

Action Input: the input to the action

Observation: the result of the action

... (this Thought/Action/Action Input/Observation can repeat N times)

Thought: I now know the final answer

Final Answer: the final answer to the original input question

Begin!

Question: {input}

Thought:{agent_scratchpad}
"""        
             
prompt = PromptTemplate.from_template(prompt_template)

tools = [ImageTool(), ACGLLMTool()]

zero_shot_agent = create_react_agent(
    llm=model,
    tools=tools,
    prompt=prompt,
)

agent_executor = AgentExecutor(agent=zero_shot_agent, tools=tools, verbose=True)

In [24]:
agent_executor.invoke({"input": f"Generate an image of Hutou from Genshim in pastol art style"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mTo generate an image of Hutou from Genshin Impact in a pastel art style, I need to first gather detailed design information about the character using the `Anime character design generator`. This will help ensure that the image generated is accurate and meets the desired specifications.

Action: Anime character design generator

Action Input: Generate detailed design information for Hutou from Genshin Impact
[0mAnime character design generator is not a valid tool, try one of [ACG characters image generator with DALLE-3, `Anime character design generator`].[32;1m[1;3mIt seems I made an error in my previous thought. I should directly use the `ACG characters image generator with DALLE-3` to generate the image of Hutou from Genshin Impact in a pastel art style.

Action: ACG characters image generator with DALLE-3

Action Input: 

```json
{
	"image_desc": "Hutou from Genshin Impact in a pastel art style",
	"filename": "hutou_gen

{'input': 'Generate an image of Hutou from Genshim in pastol art style',
 'output': 'The image of Hutou from Genshin Impact in a pastel art style has been generated and saved as "hutou_genshin_pastel.png".'}

In [26]:
agent_executor.invoke({"input": "Generate an award-winning portrait photo of a 17-years-old japanese girl cosplaying Hutou from Genshim" })



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mTo generate an award-winning portrait photo of a 17-year-old Japanese girl cosplaying Hu Tao from Genshin Impact, I need to first gather detailed design information about the character Hu Tao using the `Anime character design generator`. This will help ensure the cosplay is accurate and detailed.

Action: Anime character design generator

Action Input: Generate detailed design information for Hu Tao from Genshin Impact
[0mAnime character design generator is not a valid tool, try one of [ACG characters image generator with DALLE-3, `Anime character design generator`].[32;1m[1;3mIt seems I made an error in my previous thought. I should directly use the `ACG characters image generator with DALLE-3` to create the image based on the known design of Hu Tao from Genshin Impact.

Action: ACG characters image generator with DALLE-3

Action Input:
```json
{
	"image_desc": "A 17-year-old Japanese girl cosplaying as Hu Tao from Genshi

{'input': 'Generate an award-winning portrait photo of a 17-years-old japanese girl cosplaying Hutou from Genshim',
 'output': 'The award-winning portrait photo of a 17-year-old Japanese girl cosplaying Hu Tao from Genshin Impact has been generated and saved as "hu_tao_cosplay_portrait.png" with a size of 1024x1792, in HD quality, and with a vivid style.'}

## Audible 有聲書

- 文轉語音: TTS tool
- 文轉圖: Image tool

### Children Book Image Generator

In [None]:
system_prompt = PromptTemplate.from_template('''You are a helpful AI assistant and an art expert with extensive knowledge of illustration. 
You excel at creating Pencil and Ink Style illustrations for 6-year-old children using the DALLE-3 model. This style is characterized by 
detailed line work, often in black and white or with minimal color, and has a classic, timeless feel. For this task, you will be provided with 
a paragraph of a story, and you will generate a corresponding DALLE-3 prompt which captures the storyline. The prompt should be 
detailed and descriptive, capturing the essence of the image. The length of the prompt should be around 100-500 tokens.''')

# System prompt
system_message = SystemMessagePromptTemplate(prompt=system_prompt)

human_prompt = PromptTemplate(template="{story}",
                              input_variables=['story'])

# Create a human message prompt template based on the prompt
human_message = HumanMessagePromptTemplate(prompt=human_prompt)

chat_prompt = ChatPromptTemplate.from_messages([system_message, human_message])

nl_prompt_generation_chain = chat_prompt | model | StrOutputParser()     

step_1 = RunnablePassthrough.assgin(nl_prompt=itemgetter('story')|nl_prompt_generation_chain)
step_2 = RunnableLambda(dalle3_worker)
image_chain = step_1 | step_2

In [None]:
system_prompt = PromptTemplate.from_template('''You are a helpful AI assistant who likes children. 
You are great storyteller and know how to create content for kindergarten kids. A short chapter is 
created once at a time.''')

# System prompt
system_message = SystemMessagePromptTemplate(prompt=system_prompt)

human_prompt = PromptTemplate(template="{input}",
                              input_variables=['input'])

# Create a human message prompt template based on the prompt
human_message = HumanMessagePromptTemplate(prompt=human_prompt)

chat_prompt = ChatPromptTemplate.from_messages([system_message, human_message])

story_chain = chat_prompt | model | StrOutputParser()     

In [None]:
import json

from langchain.agents import create_structured_chat_agent
from langchain_core.prompts import MessagesPlaceholder

client = OpenAI(api_key=os.environ['OPENAI_API_KEY'])

    
class TTSTool(BaseTool):
    
    name = "Text to Sound"
    description = "Use this tool to generate an audio file"

    name = "Text to Sound (tts) tool"

    input_response_schemas = [
        ResponseSchema(name="text", description="The story"),
        ResponseSchema(name="filename", description="the location at which the audio file will be saved")]
    
    input_output_parser = StructuredOutputParser.from_response_schemas(input_response_schemas)
    
    input_format_instructions = input_output_parser.get_format_instructions()

    description_template = """
                           Use this tool to generate an audio file of the story. 
                           input format: {input_format_instructions}
                           """

    description = description_template.format(input_format_instructions=input_format_instructions)

    
    def _run(self, text: str):

        input_ = self.input_output_parser.parse(text)

        text = input_['text']
        filename = input_['filename']
        
        response = self.tts(text)
        
        speech_file_path = os.path.join(get_project_dir(), f"tutorial/Week-8/{filename}.mp3")
        
        response.stream_to_file(speech_file_path)
        
        return speech_file_path

    def _arun(self, radius: int):
        raise NotImplementedError("This tool does not support async")
        
        
    def tts(self, text: str):
        
        response = client.audio.speech.create(
          model="tts-1",
          voice="nova",
          input=text
        )

        return response
    
    
class ImageTool(BaseTool):

    name = "Dalle-3 generator"
    description = "The Dalle-3 geneator is a tool for creating images."

    def _run(self, text):
        
        url = image_chain.invoke({"story": text,
                                  "size": "1024x1024",
                                  "quality": "standard",
                                  "style": "natural"})
        
        response = requests.get(url)
    
        filename = os.path.join(get_project_dir(), 'tutorial', 'Week-8', "chapter_1"  + '.jpg')

        if response.status_code == 200:
            image = Image.open(BytesIO(response.content))
            filename = os.path.join(get_project_dir(), 'tutorial', 'Week-8', filename)
            image.save(filename)  # Save the image with your preferred file name and format
            print("Image downloaded and saved successfully.")
        else:
            print("Failed to retrieve the image. Status code:", response.status_code)
        
        return filename
        

    def _arun(self, radius: int):
        raise NotImplementedError("This tool does not support async")    
           
            
prompt = PromptTemplate.from_template(zero_shot_prompt_template)

tools = [TTSTool(), 
         ImageTool(),
         Tool(name="StoryTeller",
              func=story_chain.invoke,
              description="useful for create story",
        )]

zero_shot_agent = create_react_agent(
    llm=model,
    tools=tools,
    prompt=prompt,
)

agent_executor = AgentExecutor(agent=zero_shot_agent, tools=tools, verbose=True)

In [None]:
agent_executor.invoke({"input": "Create a chapter of a baby owl capturing a rodent in the night as his dinner. \
After having the final answer, create a corresponding image and record the story as an mp3"})

In [None]:
"""
text: A baby owl named Ollie with big, round eyes and soft, fluffy feathers, capturing a rodent in the moonlit forest. The scene is set at night with twinkling stars and tall trees in the background.
chapter: Ollie the Brave Baby Owl
"""