LOAD MODEL

In [1]:
# 1. Import necessary libraries
import torch
from diffusers import StableDiffusionPipeline
from PIL import Image
import os # For saving the image

# 2. Load the pretrained Stable Diffusion model
# You can choose other models available on Hugging Face Hub.
# Using a smaller model or revision like fp16 can save memory if needed.
# Ensure you have accepted the model's license on Hugging Face if required.
model_id = "runwayml/stable-diffusion-v1-5"
# For GPU acceleration (recommended):
# If you have a CUDA-enabled GPU:
if torch.cuda.is_available():
    pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
    pipe = pipe.to("cuda")
    print("Model loaded on GPU.")
else:
    # For CPU (will be much slower):
    pipe = StableDiffusionPipeline.from_pretrained(model_id)
    print("Model loaded on CPU. Generation will be slow.")

# (Optional) If you encounter safety checker issues or want to disable it:
# from diffusers.utils import load_image, make_image_grid
# def dummy_checker(images, **kwargs):
#     return images, [False] * len(images)
# pipe.safety_checker = dummy_checker
# print("Safety checker disabled (optional).")


# 3. Define a function to generate and display/save an avatar
def generate_gandhara_avatar(prompt_text, file_name="gandhara_avatar.png", num_inference_steps=50, guidance_scale=7.5):
    """
    Generates an avatar based on the prompt_text, saves it, and returns the image.
    """
    print(f"\nGenerating avatar for prompt: '{prompt_text}'...")
    print(f"Using inference steps: {num_inference_steps}, guidance scale: {guidance_scale}")

    try:
        # Generate the image
        # For newer diffusers versions, you might not need to wrap prompt in a list
        image = pipe(prompt_text, num_inference_steps=num_inference_steps, guidance_scale=guidance_scale).images[0]

        # Save the image
        # Create a directory for generated images if it doesn't exist
        output_dir = "generated_avatars"
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        save_path = os.path.join(output_dir, file_name)
        image.save(save_path)
        print(f"Avatar saved to {save_path}")

        return image
    except Exception as e:
        print(f"An error occurred during image generation: {e}")
        return None

# 4. Example Usage with a prompt inspired by the project description [cite: 5]

# Prompt Example 1: Monk
# Removed the [cite: 5] which caused the NameError
prompt1 = "A Gandharan-style Buddhist monk with curly hair and flowing robes, sculpted in Greco-Buddhist style, serene expression, intricate drapery, stone texture, detailed face, Taxila art influence."
generated_image1 = generate_gandhara_avatar(prompt1, file_name="gandhara_monk_avatar.png", num_inference_steps=50)

if generated_image1:
    print("Displaying Monk Avatar (in a Jupyter/Colab environment, this would show the image):")
    # In Jupyter/Colab, just having the Image object as the last line of a cell will display it.
    # display(generated_image1) #  Use 'from IPython.display import display'

# Prompt Example 2: Warrior
# Assuming the same issue might exist on this line if [cite: 5] was also present there.
prompt2 = "A Taxila warrior in ancient Indo-Greek lamellar armour, holding a spear, inspired by stone relief sculptures from Gandhara, strong stance, detailed helmet." # [cite: 5] - Remove if present here too
generated_image2 = generate_gandhara_avatar(prompt2, file_name="taxila_warrior_avatar.png", num_inference_steps=60, guidance_scale=8) # Tweaking parameters

if generated_image2:
    print("Displaying Warrior Avatar:")
    # display(generated_image2)

# Prompt Example 3: Philosopher (as per your earlier outline)
prompt3 = "A wise Taxila philosopher, long beard, contemplative gaze, draped in simple Hellenistic-influenced robes, holding a scroll, high-relief stucco figure style from a Gandharan monastery."
generated_image3 = generate_gandhara_avatar(prompt3, file_name="taxila_philosopher_avatar.png", num_inference_steps=50)

if generated_image3:
    print("Displaying Philosopher Avatar:")
    # display(generated_image3)

print("\n--- Implementation Notes ---")
print("1. Model Loading: The first time you run this, the model (around 2-5GB) will be downloaded. This can take time.")
print("2. GPU Usage: Using a GPU (e.g., in Google Colab by selecting 'Runtime' > 'Change runtime type' > 'T4 GPU') is highly recommended for faster generation.")
print("3. Prompt Engineering is Key: The quality and relevance of the avatar will heavily depend on how well you craft your prompts. Experiment with adding details about artistic medium (sculpture, relief, fresco), materials (stone, stucco, bronze), specific Gandharan features (ushnisha, urna, monastic robes), and emotional expression.")
print("4. Customization: You can adjust `num_inference_steps` (quality vs. speed) and `guidance_scale` (how much the model adheres to the prompt).")
print("5. Output: Images are saved in the 'generated_avatars' directory.")
print("6. Optional Prompt Expansion: For the optional lightweight language model to expand prompts[cite: 4], you would insert that logic before calling `generate_gandhara_avatar`. The expanded prompt would then be passed to this function.")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model_index.json:   0%|          | 0.00/541 [00:00<?, ?B/s]

Fetching 15 files:   0%|          | 0/15 [00:00<?, ?it/s]

scheduler_config.json:   0%|          | 0.00/308 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/617 [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/342 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/472 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/492M [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/3.44G [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/335M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/806 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/547 [00:00<?, ?B/s]

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

Model loaded on GPU.

Generating avatar for prompt: 'A Gandharan-style Buddhist monk with curly hair and flowing robes, sculpted in Greco-Buddhist style, serene expression, intricate drapery, stone texture, detailed face, Taxila art influence.'...
Using inference steps: 50, guidance scale: 7.5


  0%|          | 0/50 [00:00<?, ?it/s]

Avatar saved to generated_avatars/gandhara_monk_avatar.png
Displaying Monk Avatar (in a Jupyter/Colab environment, this would show the image):

Generating avatar for prompt: 'A Taxila warrior in ancient Indo-Greek lamellar armour, holding a spear, inspired by stone relief sculptures from Gandhara, strong stance, detailed helmet.'...
Using inference steps: 60, guidance scale: 8


  0%|          | 0/60 [00:00<?, ?it/s]

Avatar saved to generated_avatars/taxila_warrior_avatar.png
Displaying Warrior Avatar:

Generating avatar for prompt: 'A wise Taxila philosopher, long beard, contemplative gaze, draped in simple Hellenistic-influenced robes, holding a scroll, high-relief stucco figure style from a Gandharan monastery.'...
Using inference steps: 50, guidance scale: 7.5


  0%|          | 0/50 [00:00<?, ?it/s]

Avatar saved to generated_avatars/taxila_philosopher_avatar.png
Displaying Philosopher Avatar:

--- Implementation Notes ---
1. Model Loading: The first time you run this, the model (around 2-5GB) will be downloaded. This can take time.
2. GPU Usage: Using a GPU (e.g., in Google Colab by selecting 'Runtime' > 'Change runtime type' > 'T4 GPU') is highly recommended for faster generation.
3. Prompt Engineering is Key: The quality and relevance of the avatar will heavily depend on how well you craft your prompts. Experiment with adding details about artistic medium (sculpture, relief, fresco), materials (stone, stucco, bronze), specific Gandharan features (ushnisha, urna, monastic robes), and emotional expression.
4. Customization: You can adjust `num_inference_steps` (quality vs. speed) and `guidance_scale` (how much the model adheres to the prompt).
5. Output: Images are saved in the 'generated_avatars' directory.
6. Optional Prompt Expansion: For the optional lightweight language model

INTERACTIVE INTERFACE

In [2]:
# ENHANCED GANDHARA ART GENERATOR FOR COLAB
from IPython.display import display, clear_output
import ipywidgets as widgets
import os
from PIL import Image
import time
import json

class ColabGUI:
    def __init__(self):
        # Setup directories
        self.temp_dir = "/content/temp_generated_images"
        os.makedirs(self.temp_dir, exist_ok=True)
        self.image_counter = self.get_next_counter()
        self.prompt_history = []

        # Style presets
        self.style_presets = {
            "Classic Gandhara": "Greco-Buddhist style, schist stone texture, Hellenistic facial features, museum lighting",
            "Ancient Relief": "Stone relief carving, weathered surface, 2nd century CE, archaeological find",
            "Golden Gandhara": "Gilded statue, intricate jewelry, Kushan period, warm lighting",
            "Cyber Gandhara": "Neon-lit ancient statue, holographic halo, cyberpunk fusion"
        }

        # Widgets
        self.prompt_input = widgets.Textarea(
            placeholder='Describe your Gandhara art (e.g. "A meditating Buddha with Greek drapery")',
            layout=widgets.Layout(width='80%', height='100px')
        )

        self.style_dropdown = widgets.Dropdown(
            options=list(self.style_presets.keys()),
            description='Style:',
            value="Classic Gandhara"
        )

        self.history_dropdown = widgets.Dropdown(
            options=[],
            description='History:',
            disabled=True
        )

        self.generate_btn = widgets.Button(
            description="Generate",
            button_style='success'
        )

        self.reset_btn = widgets.Button(
            description="Reset",
            button_style='warning'
        )

        self.display_toggle = widgets.Checkbox(
            value=True,
            description='Auto-display images',
            indent=False
        )

        self.output = widgets.Output()
        self.status = widgets.Label()
        self.image_display = widgets.Image(format='png', width=600)

        # Layout
        self.generate_btn.on_click(self.on_generate)
        self.reset_btn.on_click(self.on_reset)
        self.history_dropdown.observe(self.on_history_select, names='value')
        self.style_dropdown.observe(self.apply_style, names='value')

        display(widgets.VBox([
            widgets.HTML("<h1>🖼️ Gandhara Art Generator</h1>"),
            widgets.HBox([
                widgets.VBox([
                    self.prompt_input,
                    widgets.HBox([self.style_dropdown, self.history_dropdown]),
                    widgets.HBox([self.generate_btn, self.reset_btn, self.display_toggle])
                ])
            ]),
            self.status,
            self.image_display,
            self.output
        ]))

    def get_next_counter(self):
        """Get next available image number"""
        if not os.path.exists(self.temp_dir):
            return 1
        existing = [f for f in os.listdir(self.temp_dir) if f.startswith("generated_img_")]
        return len(existing) + 1

    def apply_style(self, change):
        """Appends style preset to prompt"""
        if change['new']:
            self.prompt_input.value += f", {self.style_presets[change['new']]}"

    def on_history_select(self, change):
        """Loads prompt from history"""
        if change['new']:
            self.prompt_input.value = change['new']

    def on_reset(self, btn):
        """Reset the interface"""
        self.prompt_input.value = ""
        self.image_display.value = b""
        self.status.value = "🔄 Interface reset"

    def on_generate(self, btn):
        prompt = self.prompt_input.value.strip()
        if not prompt:
            self.status.value = "⚠️ Please enter a prompt"
            return

        try:
            # Update history
            if prompt not in self.prompt_history:
                self.prompt_history.append(prompt)
                self.history_dropdown.options = self.prompt_history
                self.history_dropdown.disabled = False

            self.status.value = "⏳ Generating... (30-60 seconds)"

            # Generate image with sequential naming
            filename = f"generated_img_{self.image_counter}.png"
            save_path = os.path.join(self.temp_dir, filename)

            start_time = time.time()
            image = generate_gandhara_avatar(
                prompt_text=prompt,
                file_name=filename,
                num_inference_steps=50,
                guidance_scale=7.5
            )

            if image:
                # Save the image first
                image.save(save_path)

                # Display logic
                gen_time = time.time() - start_time
                self.status.value = f"✅ Generated in {gen_time:.1f}s | Saved as {filename}"

                if self.display_toggle.value:
                    with open(save_path, "rb") as f:
                        self.image_display.value = f.read()

                self.image_counter += 1

                # Save prompt history
                with open(f"{self.temp_dir}/prompt_history.json", "w") as f:
                    json.dump(self.prompt_history, f)

        except Exception as e:
            self.status.value = f"💥 Error: {str(e)}"

# Launch interface
gui = ColabGUI()

VBox(children=(HTML(value='<h1>🖼️ Gandhara Art Generator</h1>'), HBox(children=(VBox(children=(Textarea(value=…


Generating avatar for prompt: 'Ancient relief carving flickering in torchlight, warm orange glow against deep shadows, cinematic atmosphere'...
Using inference steps: 50, guidance scale: 7.5


  0%|          | 0/50 [00:00<?, ?it/s]

Avatar saved to generated_avatars/generated_img_1.png
