In [1]:
import cv2
from ultralytics import YOLO
import numpy as np

In [2]:
# Function to get class colors
def getColours(cls_num):
    base_colors = [(255, 0, 0), (0, 255, 0), (0, 0, 255)]
    color_index = cls_num % len(base_colors)
    increments = [(1, -2, 1), (-2, 1, -1), (1, -1, 2)]
    color = [base_colors[color_index][i] + increments[color_index][i] * 
    (cls_num // len(base_colors)) % 256 for i in range(3)]
    return tuple(color)

In [3]:
yolo = YOLO('yolov8l.pt')

In [4]:
def crop_image(img, y1, y2, x1, x2):
    result = np.zeros(img.shape, dtype=np.uint8)
    img[y1:y2, x1:x2] = result[y1:y2, x1:x2]
    return result

In [5]:
def mask_image(img, y1, y2, x1, x2):
    result = np.full(img.shape, 255, dtype=np.uint8)
    img[y1:y2, x1:x2] = result[y1:y2, x1:x2]
    return result

In [25]:
img = cv2.imread("./testdata/wall.webp")

# cv2.imshow('original image', img)

results = yolo.track(img, stream=False)

mask = np.zeros(img.shape, dtype=np.uint8)

while True:
    for result in results:
        # get the classes names
        classes_names = result.names

        print(result)

        # iterate over each box
        for box in result.boxes:
            # check if confidence is there
            if box.conf[0] > 0.6:
                # get coordinates
                [x1, y1, x2, y2] = box.xyxy[0]
                # convert to int
                x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)

                #crop image
                crop_image(img, y1, y2, x1, x2)

                # mask image
                mask_image(mask, y1, y2, x1, x2)
                
    # show the cropped image
    # cv2.imshow('cropped image', img)
    cv2.imwrite('./testdata/crop.jpg', img)

    # show the mask image
    # cv2.imshow('mask image', mask)
    cv2.imwrite('./testdata/mask.jpg', mask)

    img = cv2.imread('./testdata/crop.jpg')
    mask = cv2.imread('./testdata/mask.jpg', cv2.IMREAD_GRAYSCALE)
    # mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)

    print(mask.shape)

    # Inpaint
    result = cv2.inpaint(img, mask, inpaintRadius=5, flags=cv2.INPAINT_NS)

    # Show final image
    cv2.imshow("ai filled image", result)

    # break the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# destroy all windows
cv2.destroyAllWindows()


0: 288x640 1 cup, 1 couch, 2 vases, 320.7ms
Speed: 0.0ms preprocess, 320.7ms inference, 0.0ms postprocess per image at shape (1, 3, 288, 640)
ultralytics.engine.results.Results object with attributes:

boxes: ultralytics.engine.results.Boxes object
keypoints: None
masks: None
names: {0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich',

In [None]:
from diffusers import StableDiffusionInpaintPipeline
import torch
from PIL import Image

model_id = "runwayml/stable-diffusion-inpainting"
pipe = StableDiffusionInpaintPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
pipe.to("cuda")  # Use GPU for better performance

image = Image.open("wall.webp").convert("RGB")
mask = Image.open("mask.jpg").convert("L")  # White areas are to be filled

result = pipe(prompt="A realistic completion", image=image, mask_image=mask).images[0]
result.save("inpainted_image.png")