# DALLE-3

In [None]:
import os

os.chdir("../../")

In [None]:
from langchain.chat_models import ChatOpenAI

from src.initialization import credential_init


credential_init()

model = ChatOpenAI(openai_api_key=os.environ['OPENAI_API_KEY'],
                   model_name="gpt-4o-2024-05-13", temperature=0)

### OpenAI Image API Parameters:

- model: dall-e-3
- size (str): 1024x1024, 1024x1792, 1792x1024
- quality: hd, standard
- style: vivid, natural. Default vivid

In [None]:
from openai import OpenAI

prompt = """
A Sumi-e style watercolor painting of mountains during sunset. The sky is depicted with bold splashes of orange, pink, and purple hues, 
blending and overlapping in a dynamic composition. The mountains are represented with expressive brushstrokes, emphasizing their majestic and serene presence. 
The focus is on capturing the essence and mood of the scene rather than detailed realism. The overall effect is serene and contemplative, with a harmonious 
balance of color and form.
"""

client = OpenAI()

response = client.images.generate(
    model="dall-e-3",
    prompt=prompt,
    size="1024x1024",
    quality="hd",
    n=1,
)

image_url = response.data[0].url

In [None]:
image_url

## Save the image in your local computer

In [None]:
import requests
from PIL import Image
from io import BytesIO

response = requests.get(image_url)

if response.status_code == 200:
    image = Image.open(BytesIO(response.content))
    filename = os.path.join(get_project_dir(), 'tutorial', 'Week-8', 'test.jpg')
    image.save(filename)  # Save the image with your preferred file name and format
    print("Image downloaded and saved successfully.")
else:
    print("Failed to retrieve the image. Status code:", response.status_code)

## Two Challenges:

### 1. How to create prompt more efficiently? 

There are two types of prompt: 

1. Danbooru Tag: masterpiece, best quality, beautiful eyes, clear eyes, detailed eyes, Blue-eyes, 1girl, 20_old, full-body, break, smoking, break, high_color, blue-hair, beauty, black-boots,break, break, Flat vector art, Colorful art, white_shirt, simple_background, blue_background, Ink art, peeking out upper body, Eyes

2. Narutal language: A Sumi-e style watercolor painting of mountains during sunset. The sky is depicted with bold splashes of orange, pink, and purple hues, blending and overlapping in a dynamic composition. The mountains are represented with expressive brushstrokes,emphasizing their majestic and serene presence. The focus is on capturing the essence and mood of the scene rather than detailed realism. The overall effect is serene and contemplative, with a harmonious balance of color and form.

As non-native English speakers, we find the natural language prompt challenging, even for native speakers, due to the inclusion of specialized terminologies and advanced vocabulary.

由於涉及專業術語和高級詞彙，我們作為非母語英語使用者，發現這個自然語言提示對我們來說是具有挑戰性的，即使對母語使用者來說也是如此。

### 2. How to make it an LCEL?

## Some websites for natural language prompt

- https://leonardo.ai/: An Image generation SaaS. A lot of works are created with natural language prompt. 
- https://blog.mlq.ai/dalle-prompts/: Some tutorial about how to come up with a natural language prompt.

### Natural Language Prompt Generation

In [None]:
from langchain.prompts import PromptTemplate, ChatPromptTemplate, HumanMessagePromptTemplate, SystemMessagePromptTemplate
from langchain_core.output_parsers import StrOutputParser


system_prompt = PromptTemplate.from_template('''You are a helpful AI assistant and an art expert with extensive knowledge of photography and illustration. 
You excel at creating breathtaking masterpieces using the DALLE-3 model. For this task, you will be provided with a description of an image, and you will 
generate a corresponding DALLE-3 prompt. The prompt should be detailed and descriptive, capturing the essence of the image. The length of the prompt should be 
around 100-500 tokens.''')

# System prompt
system_message = SystemMessagePromptTemplate(prompt=system_prompt)


human_prompt = PromptTemplate(template="{image_desc}",
                              input_variables=['image_desc'])

# Create a human message prompt template based on the prompt
human_message = HumanMessagePromptTemplate(prompt=human_prompt)

chat_prompt = ChatPromptTemplate.from_messages([system_message, human_message])

nl_prompt_generation_chain = chat_prompt | model | StrOutputParser()

In [None]:
from typing import Dict


def dalle3_worker(kwargs: Dict):

    """
    Generates an image using OpenAI's DALL-E 3 model based on the provided prompt and optional parameters.
    
    Parameters:
    kwargs (Dict): A dictionary containing the following keys:
        - 'nl_prompt' (str): The natural language prompt describing the image to be generated.
        - 'size' (str, optional): The size of the generated image. Default is "1024x1024".
        - 'quality' (str, optional): The quality of the generated image. Default is "standard".
        - 'style' (str, optional): The style of the generated image. Default is "vivid".
    
    Returns:
    str: The URL of the generated image.
    
    Example:
    >>> kwargs = {
    ...     "nl_prompt": "A futuristic city skyline at sunset",
    ...     "size": "1024x1024",
    ...     "quality": "high",
    ...     "style": "photorealistic"
    ... }
    >>> image_url = dalle3_worker(kwargs)
    Start generating image...
    prompt: A futuristic city skyline at sunset
    generated_image: https://example.com/generated_image.png
    >>> print(image_url)
    https://example.com/generated_image.png
    """
    
    print("Start generating image...")
    print(f"prompt: {kwargs['nl_prompt']}")
    client = OpenAI()

    response = client.images.generate(
        model="dall-e-3",
        prompt=kwargs['nl_prompt'],
        size=kwargs.get("size", "1024x1024"),
        quality=kwargs.get('quality', 'standard'),
        style=kwargs.get('style', 'vivid'),
        n=1)

    image_url = response.data[0].url
    
    print(f"generated_image: {image_url}")
    
    return image_url


def url_to_file(image_url: str):

    """
    Downloads an image from the given URL and saves it to a specified directory.
    
    Parameters:
    image_url (str): The URL of the image to be downloaded.
    
    Returns:
    None
    
    Example:
    >>> image_url = "https://example.com/image.jpg"
    >>> url_to_file(image_url)
    Image downloaded and saved successfully.
    """
    
    response = requests.get(image_url)
    
    filename = image_url.split("/")[-1] + '.jpg'
    
    if response.status_code == 200:
        image = Image.open(BytesIO(response.content))
        filename = os.path.join(get_project_dir(), 'tutorial', 'Week-8', filename)
        image.save(filename)  # Save the image with your preferred file name and format
        print("Image downloaded and saved successfully.")
    else:
        print("Failed to retrieve the image. Status code:", response.status_code)

In [None]:
from operator import itemgetter

from langchain_core.runnables import RunnableLambda, RunnableParallel


dalle3_chain = RunnableParallel(nl_prompt=itemgetter('image_desc')|nl_prompt_generation_chain,
                                size=itemgetter("size"),
                                quality=itemgetter("quality")) | RunnableLambda(dalle3_worker) | RunnableLambda(url_to_file)

In [None]:
dalle3_chain.invoke({"size": "1024x1792",
                     "quality": "hd",
                     "image_desc": """
                                 masterpiece, best quality, beautiful eyes, clear eyes, detailed eyes, Blue-eyes, 1girl, 20_old, full-body, 
                                 break, smoking, break, high_color, blue-hair, beauty, black-boots,break, break, Flat vector art, Colorful art, white_shirt, 
                                 simple_background, blue_background, Ink art,peeking out upper body,Eyes, portrait
                                 """
                    })

In [None]:
dalle3_chain.invoke({"size": "1024x1792",
                     "quality": "hd",
                     "image_desc": """
                                 close-up portrait, black fox ears, animal ear fluff, black fox tail, black hair, red inner hair, hair ornament, 
                                 magatama necklace, fur trim, black short kimono, exquisite design, cat_collar, off-shoulder,wide sleeves, 
                                 long sleeves, obi, miniskirt, perfect model body, a 17-years-old ethereal and glamorously beautiful girl, from above, 
                                 eating donut, holding a donut, a large cup of coffee on table, in a coffee shop, pencil sketch, perfect detail, intricate detail, 
                                 masterpiece, best quality, beauty & aesthetic, sketch
                                 """
                    })


In [None]:
dalle3_chain.invoke({"size": "1024x1024",
                     "quality": "hd",
                     "image_desc": """
                                 close-up portrait, black fox ears, animal ear fluff, black fox tail, black hair, red inner hair, hair ornament, 
                                 magatama necklace, fur trim, black short kimono, exquisite design, cat_collar, off-shoulder,wide sleeves, 
                                 long sleeves, obi, miniskirt, perfect model body, a 17-years-old ethereal and glamorously beautiful girl, from above, 
                                 eating donut, holding a donut, a large cup of coffee on table, in a coffee shop, pencil sketch, perfect detail, intricate detail, 
                                 masterpiece, best quality, beauty & aesthetic, sketch
                                 """
                    })

## Use this as a tool for Agent

In [None]:
from langchain.agents import Tool, AgentExecutor, create_react_agent
from langchain.prompts import PromptTemplate
from langchain.tools import BaseTool

from src.agent.react_zero_shot import prompt_template as zero_shot_prompt_template


class ImageTool(BaseTool):

    name = "Dalle-3 generator"
    description = "Use this tool when you need to create an image"

    def _run(self, query):
        
        print("\n")
        print(f"image_desc: {query}")
        print("\n")
        
        # nl_prompt = nl_prompt_generation_chain.invoke({"image_desc": query})
        
        dalle3_chain.invoke({"image_desc": query,
                             "size": "1024x1024",
                             "quality": "hd",
                             "style": "vivid"})
        
        return "Done"

    def _arun(self, radius: int):
        raise NotImplementedError("This tool does not support async")

prompt = PromptTemplate.from_template(zero_shot_prompt_template)

tools = [ImageTool()]

zero_shot_agent = create_react_agent(
    llm=model,
    tools=tools,
    prompt=prompt,
)

agent_executor = AgentExecutor(agent=zero_shot_agent, tools=tools, verbose=True)

In [None]:
image_prompt = """
brown hair, bangs, two side up, twin ponytails, sidelocks, black hat, jewelry, black cheongsam, intricate golden embroidery, long sleeves,
wide sleeves, black shorts, hat ornament, hat flower, a 17-years-old ethereal and glamorous beautiful japanese idol,
translucent skin tone, profound facial features, bright eyes, faint rosy blush, ultra realistic, raw photo, award-winning photo, masterpiece, 
best quality, high resolution, official art, 8k uhd, high fidelity, depth of field, on the top of a skyscaper, mesmetizing city view, 
night
"""

In [None]:
agent_executor.invoke({"input": f"Generate in image with the following information: \n {image_prompt}"})

## ACG Characters



In [None]:
from langchain.utilities.tavily_search import TavilySearchAPIWrapper
from langchain.tools.tavily_search import TavilySearchResults

search = TavilySearchAPIWrapper()
tavily_tool = TavilySearchResults(api_wrapper=search)

outputs = tavily_tool.invoke("What is the appearance of Hutou from Genshin")

In [None]:
outputs[0]['content']

# **** 預計第一個小時結束 ****

## LCEL ACG character appearance chain

In [None]:
from langchain.docstore.document import Document
from langchain.agents import Tool, AgentExecutor, create_react_agent
from langchain.tools import BaseTool
from langchain.prompts import PromptTemplate, ChatPromptTemplate, HumanMessagePromptTemplate, SystemMessagePromptTemplate
from langchain_core.output_parsers import StrOutputParser


def tavily_content_parser(outputs):

    """
    Parses the content from a list of outputs into Document objects.
    
    Parameters:
    outputs (list): A list of dictionaries where each dictionary contains a 'content' key 
                    representing the content of a document.
    
    Returns:
    list: A list of Document objects, each initialized with the content from the 'outputs'.
    
    Example:
    >>> outputs = [
    ...     {'content': 'This is document 1 content.'},
    ...     {'content': 'This is document 2 content.'}
    ... ]
    >>> documents = tavily_content_parser(outputs)
    >>> for doc in documents:
    ...     print(doc.page_content)
    This is document 1 content.
    This is document 2 content.
    """
    
    documents = [Document(page_content=output['content']) for output in outputs]
    
    return documents


system_prompt = PromptTemplate.from_template('''You are a helpful AI assistant with deep knowledge of anime, manga, 
and mobile games. You will generate the face, body, attire, hairstyle, and accessories of an character in great 
detail with data provided from the `context` The output should look like:

 - Face:
 - Body:
 - Attire:
 - Hairstyle:
 - Accessories:

''')

system_message = SystemMessagePromptTemplate(prompt=system_prompt)


human_prompt = PromptTemplate(template="context: {context}",
                                  input_variables=['context'])

# Create a human message prompt template based on the prompt
human_message = HumanMessagePromptTemplate(prompt=human_prompt)

# Create a chat prompt template from system and human message prompt templates
chat_prompt = ChatPromptTemplate.from_messages([system_message, human_message])

# Construct the processing chain
acg_chain = {'context': tavily_tool|tavily_content_parser} | chat_prompt | model | StrOutputParser()



In [None]:
acg_chain.invoke("What is the appearance of Hutou from Genshin")

In [None]:
class ACGLLMTool(BaseTool):

    name = "`Anime character design generator`"
    description = "Use this tool to generate and explore detailed designs for anime and ACG (Animation, Comics, and Games) characters."

    def _run(self, query: str):
        
        description = acg_chain.invoke(query)
        
        return description

    def _arun(self, radius: int):
        raise NotImplementedError("This tool does not support async")
        
        
class ImageTool(BaseTool):

    name = "Dalle-3 generator"
    description = """The Dalle-3 generator is a tool for creating images. It's best used when you're considering the need for an ACG (anime, comics, games) character design. 
    Before using this tool, you may want to utilize the `Anime character design generator` to gather relevant information. The generated image will maintain the specified art style."""

    def _run(self, query):
        
        print("\n")
        print(f"image_desc: {query}")
        print("\n")
        
        dalle3_chain.invoke({"image_desc": query,
                             "size": "1024x1024",
                             "quality": "hd",
                             "style": "vivid"})
        
        return "Done"

    def _arun(self, radius: int):
        raise NotImplementedError("This tool does not support async")

        
prompt_template = """
Answer the following questions as best you can. You have access to the following tools:

{tools}

Use the following format:

Question: the input question you must answer

Thought: you should always think about what to do

Action: the action to take, should be one of [{tool_names}]

Action Input: the input to the action

Observation: the result of the action

... (this Thought/Action/Action Input/Observation can repeat N times)

Thought: I now know the final answer

Final Answer: the final answer to the original input question

Begin!

Question: {input}

Thought:{agent_scratchpad}
"""        
             
prompt = PromptTemplate.from_template(prompt_template)

tools = [ImageTool(), ACGLLMTool()]

zero_shot_agent = create_react_agent(
    llm=model,
    tools=tools,
    prompt=prompt,
)

agent_executor = AgentExecutor(agent=zero_shot_agent, tools=tools, verbose=True)

In [None]:
agent_executor.invoke({"input": f"Generate an image of Hutou from Genshim in pastol art style"})

In [None]:
agent_executor.invoke({"input": """Generate an image of Hutou from Genshim. Extra image quality prompt  
- ultra realistic
- realistic photo
- raw photo
- high resolution
- depth of field
- bokeh
"""})

In [None]:
agent_executor.invoke({"input": """Generate an image of Hutou from Genshim. Extra image quality prompt  
- ultra realistic
- realistic photo
- raw photo
- Cosplay
- high resolution
- depth of field
- bokeh
"""})

In [None]:
agent_executor.invoke({"input": "Generate an award-winning portrait photo of a 17-years-old japanese girl cosplaying Hutou from Genshim" })

## Audible 有聲書

- 文轉語音: TTS tool
- 文轉圖: Image tool

### Children Book Image Generator

In [None]:
system_prompt = PromptTemplate.from_template('''You are a helpful AI assistant and an art expert with extensive knowledge of illustration. 
You excel at creating Pencil and Ink Style illustrations for 6-year-old children using the DALLE-3 model. This style is characterized by 
detailed line work, often in black and white or with minimal color, and has a classic, timeless feel. For this task, you will be provided with 
a paragraph of a story, and you will generate a corresponding DALLE-3 prompt which captures the storyline. The prompt should be 
detailed and descriptive, capturing the essence of the image. The length of the prompt should be around 100-500 tokens.''')

# System prompt
system_message = SystemMessagePromptTemplate(prompt=system_prompt)

human_prompt = PromptTemplate(template="{story}",
                              input_variables=['story'])

# Create a human message prompt template based on the prompt
human_message = HumanMessagePromptTemplate(prompt=human_prompt)

chat_prompt = ChatPromptTemplate.from_messages([system_message, human_message])

nl_prompt_generation_chain = chat_prompt | model | StrOutputParser()     

image_chain = RunnableParallel(nl_prompt=itemgetter('story')|nl_prompt_generation_chain,
                               size=itemgetter("size"),
                               quality=itemgetter("quality")) | RunnableLambda(dalle3_worker)

In [None]:
system_prompt = PromptTemplate.from_template('''You are a helpful AI assistant who likes children. 
You are great storyteller and know how to create content for kindergarten kids. A short chapter is 
created once at a time.''')

# System prompt
system_message = SystemMessagePromptTemplate(prompt=system_prompt)

human_prompt = PromptTemplate(template="{input}",
                              input_variables=['input'])

# Create a human message prompt template based on the prompt
human_message = HumanMessagePromptTemplate(prompt=human_prompt)

chat_prompt = ChatPromptTemplate.from_messages([system_message, human_message])

story_chain = chat_prompt | model | StrOutputParser()     

In [None]:
import json

from langchain.agents import create_structured_chat_agent
from langchain_core.prompts import MessagesPlaceholder

client = OpenAI(api_key=os.environ['OPENAI_API_KEY'])

    
class TTSTool(BaseTool):

    name = "Text to Sound"
    description = "Use this tool to generate an audio file"

    def _run(self, text: str):
        
        response = self.tts(text)
        
        speech_file_path = os.path.join(get_project_dir(), f"tutorial/Week-8/chapter_1.mp3")
        
        response.stream_to_file(speech_file_path)
        
        return speech_file_path

    def _arun(self, radius: int):
        raise NotImplementedError("This tool does not support async")
        
        
    def tts(self, text: str):
        
        response = client.audio.speech.create(
          model="tts-1",
          voice="nova",
          input=text
        )

        return response
    
    
class ImageTool(BaseTool):

    name = "Dalle-3 generator"
    description = "The Dalle-3 geneator is a tool for creating images."

    def _run(self, text):
        
        url = image_chain.invoke({"story": text,
                                  "size": "1024x1024",
                                  "quality": "standard",
                                  "style": "natural"})
        
        response = requests.get(url)
    
        filename = os.path.join(get_project_dir(), 'tutorial', 'Week-8', "chapter_1"  + '.jpg')

        if response.status_code == 200:
            image = Image.open(BytesIO(response.content))
            filename = os.path.join(get_project_dir(), 'tutorial', 'Week-8', filename)
            image.save(filename)  # Save the image with your preferred file name and format
            print("Image downloaded and saved successfully.")
        else:
            print("Failed to retrieve the image. Status code:", response.status_code)
        
        return filename
        

    def _arun(self, radius: int):
        raise NotImplementedError("This tool does not support async")    
           
            
prompt = PromptTemplate.from_template(zero_shot_prompt_template)

tools = [TTSTool(), 
         ImageTool(),
         Tool(name="StoryTeller",
              func=story_chain.invoke,
              description="useful for create story",
        )]

zero_shot_agent = create_react_agent(
    llm=model,
    tools=tools,
    prompt=prompt,
)

agent_executor = AgentExecutor(agent=zero_shot_agent, tools=tools, verbose=True)

In [None]:
agent_executor.invoke({"input": "Create a chapter of a baby owl capturing a rodent in the night as his dinner. \
After having the final answer, create a corresponding image and record the story as an mp3"})

In [None]:
"""
text: A baby owl named Ollie with big, round eyes and soft, fluffy feathers, capturing a rodent in the moonlit forest. The scene is set at night with twinkling stars and tall trees in the background.
chapter: Ollie the Brave Baby Owl
"""