<a href="https://colab.research.google.com/github/rohitr01/DiffuGen24/blob/main/DiffuGen.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os

# Uninstall conflicting packages
!pip uninstall -y torch torchvision torchaudio diffusers gradio basicsr realesrgan Pillow transformers accelerate safetensors websockets markupsafe google-genai

# Install the latest versions of all required packages
!pip install --upgrade torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!pip install --upgrade git+https://github.com/huggingface/diffusers.git
!pip install --upgrade gradio basicsr realesrgan Pillow transformers accelerate safetensors

# Fix Google GenAI's websockets conflict
!pip install --upgrade websockets google-genai

# Restart runtime automatically to apply changes
print("✅ Installation complete! Restarting runtime...")
os._exit(0)  # Force restart to apply updates

Found existing installation: torch 2.6.0+cu118
Uninstalling torch-2.6.0+cu118:
  Successfully uninstalled torch-2.6.0+cu118
Found existing installation: torchvision 0.21.0+cu118
Uninstalling torchvision-0.21.0+cu118:
  Successfully uninstalled torchvision-0.21.0+cu118
Found existing installation: torchaudio 2.6.0+cu124
Uninstalling torchaudio-2.6.0+cu124:
  Successfully uninstalled torchaudio-2.6.0+cu124
Found existing installation: diffusers 0.33.0.dev0
Uninstalling diffusers-0.33.0.dev0:
  Successfully uninstalled diffusers-0.33.0.dev0
Found existing installation: gradio 4.18.0
Uninstalling gradio-4.18.0:
  Successfully uninstalled gradio-4.18.0
Found existing installation: basicsr 1.4.2
Uninstalling basicsr-1.4.2:
  Successfully uninstalled basicsr-1.4.2
Found existing installation: realesrgan 0.3.0
Uninstalling realesrgan-0.3.0:
  Successfully uninstalled realesrgan-0.3.0
Found existing installation: pillow 10.4.0
Uninstalling pillow-10.4.0:
  Successfully uninstalled pillow-10.4.0

Collecting git+https://github.com/huggingface/diffusers.git
  Cloning https://github.com/huggingface/diffusers.git to /tmp/pip-req-build-uwg0j82v
  Running command git clone --filter=blob:none --quiet https://github.com/huggingface/diffusers.git /tmp/pip-req-build-uwg0j82v
  Resolved https://github.com/huggingface/diffusers.git to commit a7d53a59394d5d8367826663601b69828e9f74fc
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting safetensors>=0.3.1 (from diffusers==0.33.0.dev0)
  Downloading safetensors-0.5.3-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB)
Downloading safetensors-0.5.3-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (471 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m471.6/471.6 kB[0m [31m12.5 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: diffusers
  Buildi

Collecting gradio
  Using cached gradio-5.22.0-py3-none-any.whl.metadata (16 kB)
Collecting basicsr
  Using cached basicsr-1.4.2-py3-none-any.whl
Collecting realesrgan
  Using cached realesrgan-0.3.0-py3-none-any.whl.metadata (17 kB)
Collecting Pillow
  Downloading pillow-11.1.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (9.1 kB)
Collecting transformers
  Downloading transformers-4.50.0-py3-none-any.whl.metadata (39 kB)
Collecting accelerate
  Downloading accelerate-1.5.2-py3-none-any.whl.metadata (19 kB)
Collecting gradio-client==1.8.0 (from gradio)
  Using cached gradio_client-1.8.0-py3-none-any.whl.metadata (7.1 kB)
Collecting websockets<16.0,>=10.0 (from gradio-client==1.8.0->gradio)
  Downloading websockets-15.0.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.8 kB)
Collecting markupsafe<4.0,>=2.0 (from gradio)
  Downloading MarkupSafe-3.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.0 k

Collecting google-genai
  Downloading google_genai-1.7.0-py3-none-any.whl.metadata (32 kB)
Downloading google_genai-1.7.0-py3-none-any.whl (144 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m144.7/144.7 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: google-genai
Successfully installed google-genai-1.7.0


In [1]:
# Import necessary libraries
import torch
import gradio as gr
import webbrowser
from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline
from PIL import Image

# Check if GPU is available
device = "cuda" if torch.cuda.is_available() else "cpu"
if device == "cpu":
    print("⚠️ Warning: Running on CPU, performance may be slow.")

# Load Text-to-Image model
print("🔄 Loading Stable Diffusion txt2img model...")
pipe_txt2img = StableDiffusionPipeline.from_pretrained(
    "runwayml/stable-diffusion-v1-5",
    torch_dtype=torch.float16 if device == "cuda" else torch.float32
).to(device)
print("✅ Text-to-Image model loaded!")

# Load Image-to-Image model
print("🔄 Loading Stable Diffusion img2img model...")
pipe_img2img = StableDiffusionImg2ImgPipeline.from_pretrained(
    "runwayml/stable-diffusion-v1-5",
    torch_dtype=torch.float16 if device == "cuda" else torch.float32
).to(device)
print("✅ Image-to-Image model loaded!")

# Function to generate images from text
def generate_txt2img(prompt, steps=50, guidance=7.5, width=512, height=512, seed=-1, save_format="png"):
    generator = torch.manual_seed(seed) if seed != -1 else None
    image = pipe_txt2img(
        prompt, num_inference_steps=steps, guidance_scale=guidance, width=width, height=height,
        generator=generator
    ).images[0]

    output_path = f"generated_image.{save_format}"  # Save image in the requested format
    image.save(output_path, format=save_format.upper())  # Save in the selected format
    print(f"Image saved to {output_path}")
    return output_path

# Function to transform images using img2img
def generate_img2img(prompt, image, strength=0.5, steps=50, guidance=7.5, width=512, height=512, seed=-1, save_format="png"):
    generator = torch.manual_seed(seed) if seed != -1 else None
    image = pipe_img2img(
        prompt, image=image, strength=strength, num_inference_steps=steps, guidance_scale=guidance,
        width=width, height=height, generator=generator
    ).images[0]

    output_path = f"modified_image.{save_format}"  # Save image in the requested format
    image.save(output_path, format=save_format.upper())  # Save in the selected format
    print(f"Image saved to {output_path}")
    return output_path

# Define Gradio UI
def create_ui():
    with gr.Blocks(title="DiffuGen: AI Image Generation") as demo:
        gr.Markdown("# 🌟 DiffuGen - AI Image Generator")

        # Text-to-Image Tab
        with gr.Tab("📷 Text to Image"):
            with gr.Row():
                prompt = gr.Textbox(label="Enter a text prompt")

            with gr.Row():
                steps = gr.Slider(10, 100, value=50, step=10, label="Steps")
                guidance = gr.Slider(1, 15, value=7.5, label="Guidance Scale")

            with gr.Row():
                width = gr.Slider(256, 1024, value=512, step=64, label="Width")
                height = gr.Slider(256, 1024, value=512, step=64, label="Height")
                seed = gr.Number(value=-1, label="Seed (-1 for random)")

            with gr.Row():
                save_format = gr.Dropdown(
                    choices=["png", "jpg"], value="png", label="Select Image Format"
                )

            generate_btn = gr.Button("🚀 Generate Image")
            output_image = gr.Image(label="Generated Image", type="pil")

            generate_btn.click(
                generate_txt2img,
                inputs=[prompt, steps, guidance, width, height, seed, save_format],
                outputs=output_image
            )

        # Image-to-Image Tab
        with gr.Tab("🖼️ Image to Image"):
            with gr.Row():
                prompt_img2img = gr.Textbox(label="Enter a prompt")

            with gr.Row():
                input_img = gr.Image(label="Upload Image", type="pil")

            with gr.Row():
                strength = gr.Slider(0.1, 1.0, value=0.5, label="Denoising Strength")
                steps_img2img = gr.Slider(10, 100, value=50, label="Steps")
                guidance_img2img = gr.Slider(1, 15, value=7.5, label="Guidance Scale")

            with gr.Row():
                width_img2img = gr.Slider(256, 1024, value=512, step=64, label="Width")
                height_img2img = gr.Slider(256, 1024, value=512, step=64, label="Height")
                seed_img2img = gr.Number(value=-1, label="Seed (-1 for random)")

            with gr.Row():
                save_format_img2img = gr.Dropdown(
                    choices=["png", "jpg"], value="png", label="Select Image Format"
                )

            generate_img_btn = gr.Button("🔄 Transform Image")
            output_img2img = gr.Image(label="Modified Image", type="pil")

            generate_img_btn.click(
                generate_img2img,
                inputs=[prompt_img2img, input_img, strength, steps_img2img, guidance_img2img, width_img2img, height_img2img, seed_img2img, save_format_img2img],
                outputs=output_img2img
            )

    return demo

# Launch Gradio WebUI
web_ui = create_ui()
url = web_ui.launch(share=True)

# Automatically open the WebUI in a new browser tab
webbrowser.open(url)


🔄 Loading Stable Diffusion txt2img model...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

✅ Text-to-Image model loaded!
🔄 Loading Stable Diffusion img2img model...


Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

✅ Image-to-Image model loaded!
Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://fcfcb39dafa9c1f479.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


False