Install the required packages

In [1]:
%pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124
%pip install ipywidgets huggingface_hub diffusers transformers matplotlib accelerate

Looking in indexes: https://download.pytorch.org/whl/cu124
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


Load packages

In [2]:
import ipywidgets as widgets
from IPython.display import display, clear_output
from diffusers import StableDiffusionXLPipeline, EulerDiscreteScheduler
from huggingface_hub import InferenceClient
import torch
import matplotlib.pyplot as plt

Load device

In [3]:
if torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"

Accessing the API using the token

In [4]:
API_URL = "https://api-inference.huggingface.co/models/meta-llama/Llama-3-8B-Instruct"
client = InferenceClient(api_key = "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")

Generating text using the LLM

In [5]:
system_prompt = ""
def generate_text(prompt):
	messages = [{
		"role": "user",
		"content": prompt
	}]

	completion = client.chat.completions.create(model="meta-llama/Meta-Llama-3-8B-Instruct", 
											 messages=messages, 
											 max_tokens=500,
											 temperature=1.3,
											 top_p=0.95
	)
	return completion.choices[0].message.content

Generating images using the image model

In [6]:
image_pipe = StableDiffusionXLPipeline.from_pretrained("etri-vilab/koala-lightning-700m",
                                          torch_dtype=torch.float16).to(device)
image_pipe.scheduler = EulerDiscreteScheduler.from_config(image_pipe.scheduler.config, timestep_spacing="trailing")

def generate_art(prompt_text):
    negative = "ugly, tiling, poorly drawn hands, poorly drawn feet, poorly drawn face, out of frame, extra limbs, disfigured, deformed, body out of frame, bad anatomy, watermark, signature, cut off, low contrast, underexposed, overexposed, bad art, beginner, amateur, distorted face"
    images = image_pipe(prompt_text, negative_prompt=negative, guidance_scale=3.5, num_inference_steps=10, height=1024, width=1024, num_images_per_prompt=2).images
    return images

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

Define all the widgets

In [7]:
initial_prompt = widgets.Text(
    value="",
    placeholder="Describe the art you want to create",
    layout=widgets.Layout(width='600px')
)

color_slider = widgets.IntSlider(
    value=2,
    min=1,
    max=3,
    description="Colorfulness (1: dark colors, 2: neutral colors, 3: vibrant colors):",
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='600px')
)

realism_slider = widgets.IntSlider(
    value=2,
    min=1,
    max=3,
    step=1,
    description='Realism (1: realistic, 2: blend of realism and futuristic, 3: futuristic):',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='600px')
)

generate_button = widgets.Button(
    description='Generate Art',
    button_style='success',
    layout=widgets.Layout(width='200px')
)

output_area = widgets.Output()

Define the functionality of pressing the button

In [8]:
def on_generate_click(b):
    user_text = None
    refined_prompt = None
    with output_area:
        clear_output()
        user_text = initial_prompt.value.strip()
        if not user_text:
            display(widgets.HTML('<b>Please enter a description of the art you want to create.</b>'))
            return
                
        prompt = f"""You are an assistant that creates concise text prompts for AI art generation models. 
                    Your task is to respond ONLY with the refined prompt in one sentence. ENSURE that the refined prompt does not exceed 75 tokens. 
                    Do not include explanations, user input, or any additional text—just the prompt.

                    Generate an AI art prompt for this description using ONLY one sentence and incorporate the user's preferences. 
                    Here are the preferences and instructions you MUST follow: {user_text}

                    - Colorfulness: {color_slider.value}, where 1 represents very dark colors like black or gray, 2 represents neutral colors such as beige, taupe, or ivory, and 3 represents very colorful, vibrant hues that evoke happiness, like yellow, red, or bright blue.
                    - Realism: {realism_slider.value}, where 1 represents realistic art that reflects the current times we live in, 2 is a blend of realism and futuristic elements, so a mix that is not entirely realistic but also not completely futuristic, and 3 is more towards futuristic and sci-fi concepts, showing things that do not exist and make you think of the future.

                    Here are some examples to help you understand what kind of prompts you should generate, based on the user's preferences:
                    ## Example 1
                    User text: A painting of a forest with a river running through it.
                    - Colorfulness: 2
                    - Realism: 1
                    Refined prompt: "A tranquil forest with a gently flowing river, painted in soft neutral tones with realistic details."

                    ## Example 2
                    User text: A city with cool buildings.
                    - Colorfulness: 3
                    - Realism: 3
                    Refined prompt: "A vibrant futuristic cityscape filled with glowing neon skyscrapers and hovering vehicles under a starry sky."

                    ## Example 3
                    User text: A portrait of a person.
                    - Colorfulness: 1
                    - Realism: 2
                    Refined prompt: "A semi-realistic portrait of a thoughtful individual with muted dark colors and subtle metallic futuristic elements."

                    If the user's preferences are not clear, you can use your best judgment to interpret the user's text and slider values to generate a refined prompt that matches their preferences.
                    ENSURE that the colors should be dark, neutral, or colorful based on the user's preference, and the art should be realistic, a blend of realism and futuristic elements, or futuristic based on the user's preference.
                    
                    Respond ONLY with the refined prompt and ensure the response reflects the user's slider values."""
        
        display(widgets.HTML('<hr>'))
        display(widgets.HTML('<b>Working on refining the prompt based on the given preferences...</b>'))
        refined_prompt = generate_text(prompt)
        split_length = len(refined_prompt) // 2
        split_index = refined_prompt.rfind(" ", 0, split_length)
        refined_prompt = refined_prompt[:split_index] + "\n" + refined_prompt[split_index:].strip()

        display(widgets.HTML('<b>Refinement completed, here is the refined prompt:</b>'))
        
        display(widgets.HTML('<hr>'))
        print(refined_prompt)
        display(widgets.HTML('<hr>'))

        display(widgets.HTML('The art is being generated. It may take up to 60 seconds to load the images after reaching 100%. Please be patient...'))
        images = generate_art(refined_prompt)
        resized_images = [image.resize((450, 450)) for image in images]
        _, axes = plt.subplots(1, len(resized_images), figsize=(10, 5))
        for ax, img in zip(axes, resized_images):
            ax.imshow(img)
            ax.axis('off')
        plt.show()
        
        generate_button.description = "Generate New Art"

generate_button._click_handlers.callbacks = []
generate_button.on_click(on_generate_click)

Run the display command to show all the widgets

In [9]:
display(
    widgets.VBox([
        widgets.HTML("<h2>Customize your art preferences</h2>"),
        initial_prompt,
        color_slider,
        realism_slider,
        output_area,
        generate_button
    ])
)

VBox(children=(HTML(value='<h2>Customize your art preferences</h2>'), Text(value='', layout=Layout(width='600p…