In [None]:
!pip install -q regex tqdm
!pip install -q diffusers transformers accelerate scipy
!pip install -q -U xformers==0.0.25
!pip install -q opencv-python

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.4/302.4 kB[0m [31m11.9 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
!pip install -q 'git+https://github.com/facebookresearch/segment-anything.git'

In [None]:
!pip install -q pycocotools matplotlib onnxruntime onnx

In [None]:
!pip install -q gradio diffusers

In [None]:
### Import libraries
import torch
from torchvision import transforms
from torchvision.transforms.functional import to_pil_image, to_tensor

import PIL, cv2
from PIL import Image

from io import BytesIO
from IPython.display import display
import base64, json, requests
from matplotlib import pyplot as plt

import numpy as np
import copy

from numpy import asarray

import sys

from segment_anything import sam_model_registry, SamAutomaticMaskGenerator, SamPredictor


In [None]:
import os
from diffusers import StableDiffusionInpaintPipeline, EulerDiscreteScheduler

In [None]:
!wget -q https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth

In [None]:
model_type='vit_h'
device='cuda'
sam_checkpoint='sam_vit_h_4b8939.pth'
sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)
sam.to(device=device)

  # Pay attention to the pred_iou_thresh, the lower the more masks it will generate. Make it higher to generate masks only of
  # good quality and less number of them
mask_generator = SamAutomaticMaskGenerator(
    model=sam,
    points_per_side=32,
    pred_iou_thresh=0.97, # A filtering threshold in [0,1], using the model's predicted mask quality. iou -> intersection over union process which is used for checking quality of Segmentation process.
    stability_score_thresh=0.92,
    crop_n_layers=1,
    crop_n_points_downscale_factor=2,
    min_mask_region_area=100,  # Requires open-cv to run post-processing
  )

In [None]:
from diffusers import StableDiffusionInpaintPipeline, EulerDiscreteScheduler #Inpainting pipeline of Stable diffusion
# The Euler discrete Scheduler is a specific type of scheduler. When we are running a diffusion generative model in inference, we are gonna start with a image with noise/noise
# and we are going to go gradually back to a full image.(Denoising) That gradual process can be done in various ways. Euler Discrete SCheduler is one of them.

model_dir="stabilityai/stable-diffusion-2-inpainting"   # mention the type of the stable diffusion you want.

### The scheduler determines the algorithm used to produce new samples during the denoising process
scheduler = EulerDiscreteScheduler.from_pretrained(model_dir, subfolder="scheduler")
### pipeline
pipe = StableDiffusionInpaintPipeline.from_pretrained(model_dir,
                                                   scheduler=scheduler,
                                                   revision="fp16", #run the model is floating ploat 16 bit precision
                                                   torch_dtype=torch.float16)
pipe = pipe.to("cuda")
pipe.enable_xformers_memory_efficient_attention() #X formers makes the execution of the transformer more efficient


In [None]:
import gradio as gr
import numpy as np
import matplotlib.pyplot as plt
import cv2

def show_anns(anns):
    if len(anns) == 0:
        return
    centroids={}
    # Sort masks by area in descending order
    sorted_anns = sorted(enumerate(anns), key=(lambda x: x[1]['area']), reverse=True)
    ax = plt.gca()

    # Disable autoscale to keep the image size consistent
    ax.set_autoscale_on(False)

    # Iterate through each mask and display it on top of the original image
    for original_idx, ann in sorted_anns:
        m = ann['segmentation']
        img = np.ones((m.shape[0], m.shape[1], 3))

        # Generate a random color for the mask
        color_mask = np.random.random((1, 3)).tolist()[0]
        for i in range(3):
            img[:,:,i] = color_mask[i]

        # Blend the mask with the image, using 0.35 as the alpha value for transparency
        ax.imshow(np.dstack((img, m*0.35)))

        # Find contours of the mask to compute the centroid
        contours, _ = cv2.findContours(m.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        if contours:
            cnt = contours[0]
            M = cv2.moments(cnt)

            # Compute the centroid of the mask if the moment is non-zero
            if M["m00"] != 0:
                cx = int(M["m10"] / M["m00"])
                cy = int(M["m01"] / M["m00"])
                centroids[original_idx] = (cx, cy)
                # Plot a marker at the centroid with a star shape
                ax.plot(cx, cy, marker='.', color='white', markersize=10)

    plt.show()

    return centroids

In [None]:
def find_bounding_box(image_array):
    # Find indices of non-white pixels
    non_white_indices = np.where(np.any(image_array[..., :3] != 255, axis=-1))
    # Calculate bounding box coordinates
    min_y, min_x = np.min(non_white_indices, axis=1)
    max_y, max_x = np.max(non_white_indices, axis=1)
    return min_x, min_y, max_x, max_y

def sam_model(img):

    seg = asarray(img)
    masks = mask_generator.generate(seg)

    # Display the original image with annotations
    plt.imshow(img)
    c=show_anns(masks)
    plt.axis('off')

    # Render the figure and convert it to a PIL image
    plt_img = plt.gcf()
    plt_img.canvas.draw()
    image_array = np.array(plt_img.canvas.renderer._renderer)

    # Convert RGBA image to RGB
    if image_array.shape[-1] == 4:
        image_array = image_array[..., :3]

    pil_image = Image.fromarray(image_array)

    # Resize the image to match the original PIL image size
    pil_image = pil_image.resize(img.size)  # Assuming img is a PIL image

    # Convert the resized PIL image back to a NumPy array
    image_array_resized = np.array(pil_image)

    # Find bounding box of non-white pixels
    min_x, min_y, max_x, max_y = find_bounding_box(image_array_resized)

    # Crop the resized image using the bounding box coordinates
    cropped_image_array = image_array_resized[min_y:max_y, min_x:max_x]

    # Resize the cropped image to match the size of the original image
    cropped_pil_image = Image.fromarray(cropped_image_array)
    cropped_pil_image = cropped_pil_image.resize(img.size)

    return cropped_pil_image

In [None]:
import gradio as gr
def on_select(masked_image, source_image,evt: gr.SelectData):
    input_points = np.array(evt.index)
    formatted_points = ','.join(map(str, input_points))
    formatted_points = '[' + formatted_points + ']'
    # print(formatted_points)
    seg = asarray(source_image)
    masks = mask_generator.generate(seg)
    c=show_anns(masks)

    min_dist = float('inf')
    nearest_index = None

    for index, coord in c.items():
        dist = np.linalg.norm(np.array(coord) - np.array(input_points))
        if dist < min_dist:
            min_dist = dist
            nearest_index = index

    segmentation_mask=masks[nearest_index]['segmentation']
    stable_diffusion_mask=PIL.Image.fromarray(segmentation_mask)
    return stable_diffusion_mask

In [None]:
def mask_final(source_image,stable_diffusion_mask,inpainting_prompt):
  generator = torch.Generator(device="cuda").manual_seed(155)
  image = pipe(prompt=inpainting_prompt, guidance_scale=30, num_inference_steps=150, generator=generator, image=source_image, mask_image=stable_diffusion_mask).images[0]
  return image

In [None]:
#final code--just index improvment
import gradio as gr

with gr.Blocks() as demo:
  gr.Markdown("# Stable Diffusion with Segment Anything!")
  with gr.Row():
      with gr.Column():
          image = gr.Image(type='pil')
      with gr.Column():
          masked_image = gr.Image(type='pil')

  with gr.Row():
      with gr.Column():
          mask=gr.Image(type='pil')

      with gr.Column():
        prompt = gr.Textbox(placeholder="Processing Prompt",label='Prompt')

  with gr.Row():
    button_final = gr.Button("Process Image")

  with gr.Row():
    with gr.Column():
          output = gr.Image(type='pil')

  image.change(sam_model, inputs=image, outputs=masked_image)
  masked_image.select(on_select, inputs=[masked_image,image],outputs=mask)
  button_final.click(mask_final, inputs=[image,mask,prompt],outputs=output)

demo.launch(debug=True)

Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
Running on public URL: https://f02a2aa09eb56ea4fb.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


  0%|          | 0/150 [00:00<?, ?it/s]

Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://f02a2aa09eb56ea4fb.gradio.live


