In [1]:
pip install gradio diffusers transformers accelerate

Collecting gradio
  Downloading gradio-5.12.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.6-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.5.4 (from gradio)
  Downloading gradio_client-1.5.4-py3-none-any.whl.metadata (7.1 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.2.2 (from gradio)
  Downloading ruff-0.9.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.meta

In [2]:
import random
import gradio as gr
from diffusers import StableDiffusionPipeline
import torch
import json
from textblob import TextBlob
# Save and load user preferences
def save_preferences(preferences, filename="preferences.json"):
    with open(filename, "w") as f:
        json.dump(preferences, f)
def load_preferences(filename="preferences.json"):
    try:
      with open(filename, "r") as f:
          return json.load(f)
    except FileNotFoundError:
      return {"landscape": 1, "portrait": 1, "abstract": 1, "still life": 1, "fantasy": 1}
# Sentiment analysis
def analyze_sentiment(text):
    analysis = TextBlob(text)
    if analysis.sentiment.polarity > 0.5:
        return "joyful"
    elif analysis.sentiment.polarity < -0.5:
        return "somber"
    else:
        return "neutral"
# Initialize population
def initialize_population(size, themes, max_length):
    return [random.sample(themes, random.randint(1, max_length)) for _ in range(size)]
# Fitness evaluation
def evaluate_fitness(individual, user_preference):
    primary_preference = max(user_preference, key=user_preference.get)
    primary_weight = user_preference[primary_preference]
    fitness = sum([user_preference.get(theme, 0) for theme in individual])
    if primary_preference in individual:
      fitness += primary_weight * 2
    return fitness
# Selection operation
def select(population, fitness_scores):
    total_fitness = sum(fitness_scores)
    if total_fitness == 0:
      return population
    probabilities = [score / total_fitness for score in fitness_scores]
    return random.choices(population, probabilities, k=len(population))
# Crossover operation
def crossover(parent1, parent2):
    child = list(set(parent1 + parent2))
    random.shuffle(child)
    return child[:len(parent1)]
# Mutation operation
def mutate(individual, themes, mutation_rate):
    if random.random() < mutation_rate:
        operation = random.choice(["add", "remove", "swap"])
        if operation == "add" and len(individual) < len(themes):
            new_theme = random.choice([t for t in themes if t not in individual])
            individual.append(new_theme)
        elif operation == "remove" and len(individual) > 1:
            individual.pop(random.randint(0, len(individual) - 1))
        elif operation == "swap" and len(individual) > 1:
            idx1, idx2 = random.sample(range(len(individual)), 2)
            individual[idx1], individual[idx2] = individual[idx2], individual[idx1]
    return individual
# Genetic algorithm main function
def genetic_algorithm(themes, user_preference, generations=10, population_size=10, mutation_rate=0.1):
    max_length = len(themes)
    population = initialize_population(population_size, themes, max_length)
    for generation in range(generations):
        fitness_scores = [evaluate_fitness(ind, user_preference) for ind in population]
        if not fitness_scores:
            break
        best_individual = population[fitness_scores.index(max(fitness_scores))]
        print(f"Generation {generation + 1} Best: {best_individual} Fitness: {max(fitness_scores)})")
        selected_population = select(population, fitness_scores)
        next_generation = []
        for i in range(0, len(selected_population) - 1, 2):
            parent1, parent2 = selected_population[i], selected_population[i + 1]
            child = crossover(parent1, parent2)
            next_generation.append(mutate(child, themes, mutation_rate))
        if len(selected_population) % 2 == 1:
            next_generation.append(selected_population[-1])
        if not next_generation:
            next_generation = population
        population = next_generation
    if population:
        fitness_scores = [evaluate_fitness(ind, user_preference) for ind in population]
        if fitness_scores:
            best_individual = population[fitness_scores.index(max(fitness_scores))]
            return best_individual
    return None

# Load Stable Diffusion model
pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5")
pipe = pipe.to("cuda" if torch.cuda.is_available() else "cpu")
# Generate an image
def create_image_with_stable_diffusion(prompt, save_path="generated_image.png"):
    try:
        print(f"Debug Prompt: {prompt}")
        image = pipe(prompt).images[0]
        image.save(save_path)  # Save the image locally
        return image, save_path
    except Exception as e:
        print(f"Error generating image: {e}")
        return None, None
# Generate a prompt based on themes, sentiment, and description
def generate_prompt_from_idea(idea, sentiment, description):
    # Sort and select the top theme based on user preference
    sorted_ideas = sorted(idea.items(), key=lambda x: x[1], reverse=True)[:2]
    selected_themes = [theme for theme, _ in sorted_ideas]

    # Initialize the prompt with the description
    prompt = f"A creative artwork based on the concept of {description}. "

    # Add descriptions for each selected theme based on the user's idea
    if "portrait" in selected_themes:
        prompt += f"A highly detailed and {random.choice(['expressive', 'captivating'])} portrayal of the human form, full of emotion and depth. "
    if "landscape" in selected_themes:
        prompt += f"A sweeping and majestic scene, filled with vast expanses and dynamic natural elements, illuminated by the changing light. "
    if "abstract" in selected_themes:
        prompt += f"An abstract expression, exploring the fluidity of color and shape, with an emphasis on {random.choice(['balance', 'chaos', 'flow'])}. "
    if "fantasy" in selected_themes:
        prompt += f"A mystical scene that evokes a sense of wonder, filled with {random.choice(['dreamlike', 'otherworldly', 'ethereal'])} elements and energy. "
    if "still life" in selected_themes:
        prompt += f"A subtle and peaceful composition, focusing on texture, light, and shadow in a {random.choice(['delicate', 'soft', 'serene'])} arrangement. "

    # Adjust the prompt based on sentiment
    if sentiment == "joyful":
        prompt += "The artwork features vibrant, bright colors and an uplifting mood."
    elif sentiment == "somber":
        prompt += "The artwork is depicted with muted tones and a reflective atmosphere."
    else:
        prompt += "The artwork is balanced with natural tones and a harmonious atmosphere."

    return prompt

# Integrate style into prompt generation
def generate_prompt_with_style(idea, sentiment, style, description):
    base_prompt = generate_prompt_from_idea(idea, sentiment, description)
    style_options = {
        "impressionism": "in the style of impressionism",
        "cubism": "in the style of cubism",
        "surrealism": "in the style of surrealism",
    }
    if style in style_options:
        base_prompt += f" {style_options[style]}."
    return base_prompt
# Gradio interface function
def gradio_interface_with_style(landscape, portrait, abstract, still_life, fantasy, description, style="impressionism"):
    themes = ["landscape", "portrait", "abstract", "still life", "fantasy"]
    user_preference = load_preferences()
    user_preference.update({
        "landscape": landscape,
        "portrait": portrait,
        "abstract": abstract,
        "still life": still_life,
        "fantasy": fantasy
    })
    sentiment = analyze_sentiment(description)
    best_idea = genetic_algorithm(themes, user_preference, generations=10, population_size=10, mutation_rate=0.2)
    if not best_idea:
        return "No valid themes found.", None, None
    prompt = generate_prompt_with_style(user_preference, sentiment, style, description)
    image, image_path = create_image_with_stable_diffusion(prompt, save_path="output_image.png")
    if image is None:
        return "Failed to generate image.", None, None
    save_preferences(user_preference)
    return prompt, image, image_path
# Set up Gradio interface
interface = gr.Interface(
    fn=gradio_interface_with_style,
    inputs=[
        gr.Slider(1, 5, step=1, label="Landscape Preference"),
        gr.Slider(1, 5, step=1, label="Portrait Preference"),
        gr.Slider(1, 5, step=1, label="Abstract Preference"),
        gr.Slider(1, 5, step=1, label="Still Life Preference"),
        gr.Slider(1, 5, step=1, label="Fantasy Preference"),
        gr.Textbox(label="Describe your idea or theme"),
        gr.Radio(["impressionism", "cubism", "surrealism"], label="Select Style"),
    ],
    outputs=[
        gr.Textbox(label="Generated Prompt"),
        gr.Image(label="Generated Image"),
        gr.Textbox(label="Image Path"),
    ],
    title="Creative Drawing Assistant with Style Options",
    description="Generate creative images based on your preferences and chosen style!"
)
# Launch the interface
interface.launch()

The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.


0it [00:00, ?it/s]

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model_index.json:   0%|          | 0.00/541 [00:00<?, ?B/s]

Fetching 15 files:   0%|          | 0/15 [00:00<?, ?it/s]

model.safetensors:   0%|          | 0.00/492M [00:00<?, ?B/s]

text_encoder/config.json:   0%|          | 0.00/617 [00:00<?, ?B/s]

safety_checker/config.json:   0%|          | 0.00/4.72k [00:00<?, ?B/s]

scheduler/scheduler_config.json:   0%|          | 0.00/308 [00:00<?, ?B/s]

tokenizer/merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

tokenizer/special_tokens_map.json:   0%|          | 0.00/472 [00:00<?, ?B/s]

tokenizer/tokenizer_config.json:   0%|          | 0.00/806 [00:00<?, ?B/s]

(…)ature_extractor/preprocessor_config.json:   0%|          | 0.00/342 [00:00<?, ?B/s]

tokenizer/vocab.json:   0%|          | 0.00/1.06M [00:00<?, ?B/s]

unet/config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/3.44G [00:00<?, ?B/s]

vae/config.json:   0%|          | 0.00/547 [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/335M [00:00<?, ?B/s]

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://5bb1769863fa5da96d.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




#Assess the quality of image
 The assessment of quality is to evaluate how well for the theme, description and emotions. The aim is to determine if the images align with the users description and preferences. As a robust way, we used CLIP model, a text description and an image path are specified. This model output the similarity score between the image and the text. Evaluating 50 samples from the tasks, we found the similarity score is between 30\% to 60\% when compared with prompt and generated image.

In [3]:
from transformers import ViltProcessor, ViltForImageAndTextRetrieval
from PIL import Image
processor = ViltProcessor.from_pretrained("dandelin/vilt-b32-mlm")
model = ViltForImageAndTextRetrieval.from_pretrained("dandelin/vilt-b32-mlm")
def preprocess_image(image_path):
    image = Image.open(image_path).convert("RGB")
    return image
def truncate_prompt(prompt, max_length=40):
    tokens = prompt.split()
    truncated_tokens = tokens[:max_length]
    return " ".join(truncated_tokens)

def evaluate_vilt_similarity(prompt, image_path):
    image = preprocess_image(image_path)
    truncated_prompt = truncate_prompt(prompt)
    inputs = processor(
        text=truncated_prompt,
        images=image,
        return_tensors="pt",
        padding=True,
        truncation=True,
        max_length=40,
    )
    print(f"Input IDs shape: {inputs['input_ids'].shape}")
    print(f"Pixel values shape: {inputs['pixel_values'].shape}")
    outputs = model(**inputs)
    similarity = outputs.logits.sigmoid().item()
    return similarity

image_path = 'output_image.png'
prompt = "A creative artwork based on the concept of rivers. A sweeping and majestic scene, filled with vast expanses and dynamic natural elements, illuminated by the changing light. A mystical scene that evokes a sense of wonder, filled with dreamlike elements and energy. The artwork is balanced with natural tones and a harmonious atmosphere. in the style of impressionism."
similarity = evaluate_vilt_similarity(prompt, image_path)
print(f"Text and image similarity: {similarity}")


preprocessor_config.json:   0%|          | 0.00/251 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/320 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/653 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/543M [00:00<?, ?B/s]

Some weights of ViltForImageAndTextRetrieval were not initialized from the model checkpoint at dandelin/vilt-b32-mlm and are newly initialized: ['rank_output.bias', 'rank_output.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Input IDs shape: torch.Size([1, 40])
Pixel values shape: torch.Size([1, 3, 384, 384])
Text and image similarity: 0.505266547203064
