<a href="https://colab.research.google.com/github/ritwikraha/CV-Practice/blob/master/DiffusionLight.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install diffusers -qq
!pip install accelerate -qq

In [None]:
import torch
from diffusers.utils import load_image
from diffusers import StableDiffusionXLControlNetInpaintPipeline, ControlNetModel
from transformers import pipeline
from PIL import Image
import numpy as np

In [None]:
# load pipeline
# Load the ControlNet model, which is part of the inpainting pipeline.
# This model is loaded with specific configurations.
controlnet = ControlNetModel.from_pretrained("diffusers/controlnet-depth-sdxl-1.0", torch_dtype=torch.float16)

In [None]:
# Load the Stable Diffusion XL Control Net Inpainting Pipeline with the previously loaded ControlNet model.
# This pipeline is used for image inpainting tasks.
pipe = StableDiffusionXLControlNetInpaintPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    controlnet=controlnet,
    torch_dtype=torch.float16,
)  # Moving the pipeline to CUDA for GPU acceleration.

pipe.enable_model_cpu_offload()

In [None]:
# Configuration
IS_UNDER_EXPOSURE = False # A boolean variable used as a flag. If set to True, it indicates that the output should be underexposed.

# Conditional statement to check the value of IS_UNDER_EXPOSURE.
if IS_UNDER_EXPOSURE:
    # If IS_UNDER_EXPOSURE is True, set the PROMPT variable to a string describing a black, dark, mirrored, reflective chrome ball.
    PROMPT = "a perfect black dark mirrored reflective chrome ball sphere"
else:
    # If IS_UNDER_EXPOSURE is False, set the PROMPT variable to a string describing a mirrored, reflective chrome ball without the underexposure characteristics.
    PROMPT = "a perfect mirrored reflective chrome ball sphere"

Photo by <a href="https://unsplash.com/@loewe?utm_content=creditCopyText&utm_medium=referral&utm_source=unsplash">Loewe Technology</a> on <a href="https://unsplash.com/photos/a-living-room-with-a-large-book-shelf-and-a-television-u9ar6U_o5oU?utm_content=creditCopyText&utm_medium=referral&utm_source=unsplash">Unsplash</a>
  

In [None]:
NEGATIVE_PROMPT = "matte, diffuse, flat, dull"
IMAGE_URL = "https://i.imgur.com/0FwdO10.jpg"

In [None]:
# Load LoRA weights into the pipeline.
pipe.load_lora_weights("DiffusionLight/DiffusionLight")

# Fuse LoRA layers into the model with a specified scaling factor.
# LoRA layers are used to adjust the model's behavior without extensive retraining.
pipe.fuse_lora(lora_scale=0.75)

# Load a depth estimation pipeline.
# This model is used to estimate the depth of objects in images, which is useful in various computer vision tasks.
depth_estimator = pipeline(task="depth-estimation", model="Intel/dpt-large")

# prepare input image
# Load an image from a given URL. This image will be used as the input for the depth estimation model.
init_image = load_image(IMAGE_URL)

In [None]:
init_image.resize((1024,1024))

In [None]:
# Apply the depth estimation model to the loaded image to generate a depth image.
# This depth image represents the perceived depth of objects in the input image.
depth_image = depth_estimator(images=init_image)['depth']

In [None]:
# Function definition for creating a circular mask.
def get_circle_mask(size=256):
    # Create a linear space from -1 to 1 with 'size' number of elements.
    x = torch.linspace(-1, 1, size)
    y = torch.linspace(1, -1, size)

    # Create a 2D grid using the x and y tensors.
    y, x = torch.meshgrid(y, x)

    # Compute the z values to form a circle. Points inside the circle have z >= 0.
    z = (1 - x**2 - y**2)

    # Create a mask where the values inside the circle are True (1) and outside are False (0).
    mask = z >= 0
    return mask

# Generate the circular mask.
mask = get_circle_mask().numpy()

# Convert the depth image to a numpy array.
depth = np.asarray(depth_image).copy()



In [None]:
# Apply the mask to a specific region of the depth image.
# The region from (384, 384) to (640, 640) is modified.
#### NOTE: You can choose your own region or take it as user input ####
# The depth values inside the circle are set to 255 (maximum depth).
depth[384:640, 384:640] = depth[384:640, 384:640] * (1 - mask) + (mask * 255)

# Convert the modified depth array back to an image.
depth_mask = Image.fromarray(depth)

# Create an image to represent the mask itself. Initialize with zeros (black image).
mask_image = np.zeros_like(depth)

# Apply the circular mask to the same region as before. The mask values are set to 255 (white) inside the circle.
mask_image[384:640, 384:640] = mask * 255

# Convert the mask array back to an image.
mask_image = Image.fromarray(mask_image)

In [None]:
# run the pipeline
output = pipe(
    prompt=PROMPT,
    negative_prompt=NEGATIVE_PROMPT,
    num_inference_steps=30,
    image=init_image,
    mask_image=mask_image,
    control_image=depth_mask,
    controlnet_conditioning_scale=0.5,
)

# save output
output["images"][0].save("output.png")