In [2]:
!pip install fal-client requests python-dotenv pillow torch
!pip install aesthetic-predictor-v2-5
!wget https://openaipublic.blob.core.windows.net/clip/bpe_simple_vocab_16e6.txt.gz -P /usr/local/lib/python3.12/dist-packages/hpsv2/src/open_clip/
!pip install hpsv2

Collecting aesthetic-predictor-v2-5
  Downloading aesthetic_predictor_v2_5-2024.12.18.1-py2.py3-none-any.whl.metadata (42 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/43.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.0/43.0 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
Downloading aesthetic_predictor_v2_5-2024.12.18.1-py2.py3-none-any.whl (28 kB)
Installing collected packages: aesthetic-predictor-v2-5
Successfully installed aesthetic-predictor-v2-5-2024.12.18.1


In [9]:
import fal_client
import os
import requests
import time
from datetime import datetime
from pathlib import Path
from typing import Optional, Dict, Tuple
from dotenv import load_dotenv
from PIL import Image
import tempfile
import torch
from IPython.display import display, HTML
from aesthetic_predictor_v2_5 import convert_v2_5_from_siglip
import hpsv2

# Load environment variables
load_dotenv()

print("✅ All imports loaded successfully!")

✅ All imports loaded successfully!


In [10]:
class ImageGenerator:
    """Simple image generator with aesthetic evaluation."""

    def __init__(self, api_key: Optional[str] = None):
        """Initialize the generator with FAL AI and aesthetic model."""
        # Setup FAL AI
        if api_key:
            os.environ['FAL_KEY'] = api_key
        elif not os.getenv('FAL_KEY'):
            raise ValueError("API key required. Set FAL_KEY environment variable or provide api_key parameter.")

        # Initialize aesthetic model
        print("Loading aesthetic evaluation model...")
        self.model, self.preprocessor = convert_v2_5_from_siglip(
            low_cpu_mem_usage=True,
            trust_remote_code=True,
        )
        self.model = self.model.to(torch.bfloat16).cuda()
        print("✅ Aesthetic model loaded successfully!")

    def generate_image(self, prompt: str, model: str = "fal-ai/imagen4/preview") -> Tuple[str, Dict]:
        """Generate image using FAL AI."""
        start_time = time.time()

        try:
            response = fal_client.run(
                model,
                arguments={
                    "prompt": prompt,
                    "image_size": "square_hd",
                    "num_inference_steps": 50,
                    "guidance_scale": 7.5
                }
            )

            end_time = time.time()

            if response and "images" in response and len(response["images"]) > 0:
                image_url = response["images"][0]["url"]
                metadata = {
                    "model": model,
                    "prompt": prompt,
                    "timestamp": datetime.now().isoformat(),
                    "generation_time": end_time - start_time,
                    "response": response
                }
                return image_url, metadata
            else:
                raise Exception("No image generated in response")

        except Exception as e:
            raise Exception(f"Failed to generate image: {str(e)}")

    def download_image(self, url: str) -> Image.Image:
        """Download image from URL and return PIL Image."""
        try:
            response = requests.get(url, timeout=30, stream=True)
            response.raise_for_status()

            with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as tmp_file:
                for chunk in response.iter_content(chunk_size=8192):
                    tmp_file.write(chunk)
                tmp_path = tmp_file.name

            image = Image.open(tmp_path).convert("RGB")
            os.unlink(tmp_path)

            return image

        except Exception as e:
            raise Exception(f"Error downloading image: {str(e)}")

    def calculate_aesthetic_score(self, image: Image.Image) -> float:
        """Calculate aesthetic score using the v2.5 model."""
        try:
            # preprocess image
            pixel_values = (
                self.preprocessor(images=image, return_tensors="pt")
                .pixel_values.to(torch.bfloat16).cuda()
            )

            # predict aesthetic score
            with torch.inference_mode():
                score = self.model(pixel_values).logits.squeeze().float().cpu().numpy()

            return float(score)

        except Exception as e:
            print(f"Error calculating aesthetic score: {str(e)}")
            return 0.0

    def calculate_hpvs2(self, image: Image.Image, prompt: str) -> float:
        """Calculate hpsv2 score"""
        try:

          hpscore = hpsv2.score(image,prompt,hps_version='v2.1')
          return float(hpscore[0])

        except Exception as e:
          print(f'error: {e}')

    def generate_and_evaluate(self, prompt: str) -> Dict:
        """Generate image and calculate aesthetic score."""
        print(f"\n🎯 Generating image for prompt: '{prompt}'")
        print("=" * 60)

        try:
            # Step 1: Generate image
            print("\n1️⃣ Generating image with FAL AI Imagen4...")
            image_url, metadata = self.generate_image(prompt)
            print(f"✅ Image generated in {metadata['generation_time']:.2f}s")

            # Step 2: Download image for aesthetic evaluation
            print("\n2️⃣ Downloading image for evaluation...")
            image = self.download_image(image_url)
            print("✅ Image downloaded successfully")

            # Step 3: Calculate aesthetic score
            print("\n3️⃣ Calculating aesthetic score...")
            aesthetic_score = self.calculate_aesthetic_score(image)
            print("✅ Aesthetic evaluation complete")

            # log the
            print("\n3️⃣ Calculating HPSv2 score...")
            hpscore = self.calculate_hpvs2(image,prompt)
            print("✅ HPSv2 evaluation complete")

            # Step 4: Display results
            print("\n4️⃣ Displaying results...")
            self.display_results(prompt, image_url, aesthetic_score, hpscore)

            # Compile results
            result = {
                "prompt": prompt,
                "image_url": image_url,
                "aesthetic_score": aesthetic_score,
                "hpscore": hpscore,
                "timestamp": metadata["timestamp"],
                "generation_time": metadata["generation_time"],
                "metadata": metadata
            }

            return result

        except Exception as e:
            print(f"❌ Error in generation: {str(e)}")
            return None

    def display_results(self, prompt: str, image_url: str, aesthetic_score: float, hpscore: float):
        """Display the image, prompt, and aesthetic score."""
        # Display prompt
        display(HTML(f"<h3>📝 Prompt: {prompt}</h3>"))

        # Display image
        display(HTML(f'<img src="{image_url}" style="max-width: 512px; max-height: 512px; border: 2px solid #ddd; border-radius: 8px;"/>'))

        # Display aesthetic score
        display(HTML(f"<h3>🎨 Aesthetic Score: {aesthetic_score:.2f}</h3>"))

        # Display HPSv2 score
        display(HTML(f"<h3>🎨 HPSv2 Score: {hpscore:.2f}</h3>"))


        print(f"\n📊 RESULTS SUMMARY:")
        print(f"   Prompt: {prompt}")
        print(f"   Aesthetic Score: {aesthetic_score:.2f}")
        print(f"   HPSv2 Score: {hpscore:.2f}")
        print(f"   Image URL: {image_url}")

print("✅ ImageGenerator class defined!")

✅ ImageGenerator class defined!


In [11]:
example = 'a 5090 nvidia gpu. A product-centered illustration highlighting design functionality and aesthetic appeal showcasing an item or commercial product through detailed artistic visualization, rendered in the style of clay stop-motion animation, smooth gradient seamless color transitions blending, insert shot detailed close-up specific object element within scene, eye level frontal view'
generator = ImageGenerator()
result = generator.generate_and_evaluate(example)

Loading aesthetic evaluation model...


Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

✅ Aesthetic model loaded successfully!

🎯 Generating image for prompt: 'a 5090 nvidia gpu. A product-centered illustration highlighting design functionality and aesthetic appeal showcasing an item or commercial product through detailed artistic visualization, rendered in the style of clay stop-motion animation, smooth gradient seamless color transitions blending, insert shot detailed close-up specific object element within scene, eye level frontal view'

1️⃣ Generating image with FAL AI Imagen4...
✅ Image generated in 9.31s

2️⃣ Downloading image for evaluation...
✅ Image downloaded successfully

3️⃣ Calculating aesthetic score...
✅ Aesthetic evaluation complete

3️⃣ Calculating HPSv2 score...
✅ HPSv2 evaluation complete

4️⃣ Displaying results...


  with torch.cuda.amp.autocast():



📊 RESULTS SUMMARY:
   Prompt: a 5090 nvidia gpu. A product-centered illustration highlighting design functionality and aesthetic appeal showcasing an item or commercial product through detailed artistic visualization, rendered in the style of clay stop-motion animation, smooth gradient seamless color transitions blending, insert shot detailed close-up specific object element within scene, eye level frontal view
   Aesthetic Score: 4.50
   HPSv2 Score: 0.20
   Image URL: https://v3.fal.media/files/zebra/cD-xZPCbL3KaQGOHVc4zR_output.png
