In [1]:
!nvidia-smi

Fri Oct  4 19:27:06 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA A100-SXM4-40GB          Off | 00000000:00:04.0 Off |                    0 |
| N/A   31C    P0              46W / 400W |      2MiB / 40960MiB |      0%      Default |
|                                         |                      |             Disabled |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [8]:
!pip install --upgrade diffusers[torch] transformers accelerate

Collecting transformers
  Downloading transformers-4.45.1-py3-none-any.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.4/44.4 kB[0m [31m627.5 kB/s[0m eta [36m0:00:00[0m
Collecting diffusers[torch]
  Downloading diffusers-0.30.3-py3-none-any.whl.metadata (18 kB)
Collecting tokenizers<0.21,>=0.20 (from transformers)
  Downloading tokenizers-0.20.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Downloading transformers-4.45.1-py3-none-any.whl (9.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.9/9.9 MB[0m [31m27.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading tokenizers-0.20.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.9/2.9 MB[0m [31m88.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading diffusers-0.30.3-py3-none-any.whl (2.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.7/2.7 MB

In [2]:
!pip install 'git+https://github.com/facebookresearch/segment-anything.git'
# Change to the home directory (optional in Colab, as it starts in /content)
import os
os.chdir('/content')

# Create a weights directory
!mkdir weights

# Change to the weights directory
os.chdir('weights')

# Download the weights file
!wget -q https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth
# Define the checkpoint path
CHECKPOINT_PATH = os.path.join('/content', 'weights', 'sam_vit_h_4b8939.pth')

# Check if the checkpoint file exists
print(CHECKPOINT_PATH, "; exist:", os.path.isfile(CHECKPOINT_PATH))
import torch

DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
MODEL_TYPE = "vit_h"

Collecting git+https://github.com/facebookresearch/segment-anything.git
  Cloning https://github.com/facebookresearch/segment-anything.git to /tmp/pip-req-build-ch2dnl_i
  Running command git clone --filter=blob:none --quiet https://github.com/facebookresearch/segment-anything.git /tmp/pip-req-build-ch2dnl_i
  Resolved https://github.com/facebookresearch/segment-anything.git to commit dca509fe793f601edb92606367a655c15ac00fdf
  Preparing metadata (setup.py) ... [?25l[?25hdone
/content/weights/sam_vit_h_4b8939.pth ; exist: True


In [6]:
import os
import cv2
import numpy as np
import torch
from segment_anything import sam_model_registry, SamAutomaticMaskGenerator

# Configuration
CHECKPOINT_PATH = '/content/weights/sam_vit_h_4b8939.pth'  # Path to your SAM model
MODEL_TYPE = "vit_h"  # Model type
DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

# Load YOLOv5 model
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)

# Function to detect an object, save the ROI, and segment the object from the ROI
def detect_and_segment(image_path, target_object):
    # Load the image
    img = cv2.imread(image_path)

    # Detect objects using YOLOv5
    results = model(img)
    labels, cords = results.xyxyn[0][:, -1], results.xyxyn[0][:, :-1]
    object_detected = False

    # Iterate through detected objects to find the target object
    for label, cord in zip(labels, cords):
        if model.names[int(label)] == target_object:
            object_detected = True
            x1, y1, x2, y2, conf = cord
            print(f"{target_object.capitalize()} detected with confidence {conf:.2f} at location: ({x1:.2f}, {y1:.2f}, {x2:.2f}, {y2:.2f})")

            # Extract ROI
            h, w = img.shape[:2]
            x1, y1, x2, y2 = int(x1 * w), int(y1 * h), int(x2 * w), int(y2 * h)
            roi = img[y1:y2, x1:x2]
            print("Region of Interest (ROI) extracted.")

            # Save the ROI as a separate image (optional)
            # roi_output_path = '/content/roi_chair.png'
            # cv2.imwrite(roi_output_path, roi)
            # print(f"Region of Interest saved as 'roi_chair.png'.")

            # Load SAM model
            sam = sam_model_registry[MODEL_TYPE](checkpoint=CHECKPOINT_PATH).to(device=DEVICE)
            mask_generator = SamAutomaticMaskGenerator(sam)

            # Segment the ROI using SAM
            image_rgb = cv2.cvtColor(roi, cv2.COLOR_BGR2RGB)
            sam_result = mask_generator.generate(image_rgb)

            if not sam_result:
                print("No segments found in the ROI image.")
                return

            # Display the number of segments found
            print(f"Found {len(sam_result)} segments.")

            # Find and apply the largest mask
            largest_mask = None
            max_area = 0
            for mask in sam_result:
                segmentation = mask['segmentation']
                area = np.sum(segmentation)
                if area > max_area:
                    max_area = area
                    largest_mask = segmentation.astype(np.uint8) * 255

            if largest_mask is None:
                print("No valid mask found.")
                return

            print(f"Applying the largest mask with area {max_area}.")

            # Create an output image for the segmented chair
            segmented_chair = cv2.bitwise_and(roi, roi, mask=largest_mask)

            mask = cv2.resize(largest_mask, (segmented_chair.shape[1], segmented_chair.shape[0]))

            # Bitwise operation to isolate the object
            isolated_object = cv2.bitwise_and(segmented_chair, segmented_chair, mask=mask)

            # Save the segmented chair as a separate image (optional)
            # segmented_output_path = '/content/segmented_chair.png'
            # cv2.imwrite(segmented_output_path, segmented_chair)
            # print(f"Segmented chair saved as 'segmented_chair.png'.")

            return isolated_object
        else:
            print(f"No {target_object} detected.")
    return None

# Function to remove background from an image
def remove_background(image_path, output_path):
    # Load the image
    image = cv2.imread(image_path)

    # Detect and segment the target object
    segmented_chair = detect_and_segment(image_path, 'chair')

    if segmented_chair is not None:
        # Save the result
        cv2.imwrite(output_path, segmented_chair)
        print(f"Background removed and saved as '{output_path}'.")
    else:
        print("No chair detected in the image.")

# Example usage
input_image_path = '/content/chair.jpg'  # Path to your image
output_image_path = '/content/isolated_chair.png'   # Output path for the isolated image
remove_background(input_image_path, output_image_path)

Using cache found in /root/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2024-10-4 Python-3.10.12 torch-2.4.1+cu121 CUDA:0 (NVIDIA A100-SXM4-40GB, 40514MiB)

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
Adding AutoShape... 
  with amp.autocast(autocast):


Chair detected with confidence 0.83 at location: (0.30, 0.37, 0.67, 0.90)
Region of Interest (ROI) extracted.
Found 75 segments.
Applying the largest mask with area 9804.
Background removed and saved as '/content/isolated_chair.png'.


In [11]:
import torch
from PIL import Image
from diffusers import DiffusionPipeline, EulerAncestralDiscreteScheduler

# Load the pipeline
pipeline = DiffusionPipeline.from_pretrained(
    "sudo-ai/zero123plus-v1.1",
    custom_pipeline="sudo-ai/zero123plus-pipeline",
    torch_dtype=torch.float16
)

# Update the scheduler
pipeline.scheduler = EulerAncestralDiscreteScheduler.from_config(
    pipeline.scheduler.config, timestep_spacing='trailing'
)

# Move the pipeline to GPU
pipeline.to('cuda:0' if torch.cuda.is_available() else 'cpu')

# Load the local image
cond = Image.open("/content/isolated_chair.png")  # Path to your isolated chair image

# Define rotation angles
azimuth_angle = 72  # Rotation around the Y-axis
polar_angle = 0     # No rotation around the X-axis

# Add rotation to the prompt
prompt = f"Change pose of the object by azimuth {azimuth_angle} degrees; polar {polar_angle} degrees."

# Run the pipeline with the prompt
result = pipeline(cond, prompt=prompt, num_inference_steps=28).images[0]

# Display and save the result
result.show()
result.save("/content/rotated_chair.png")  # Save the rotated image


text_encoder/model.safetensors not found


Loading pipeline components...:   0%|          | 0/8 [00:00<?, ?it/s]

An error occurred while trying to fetch /root/.cache/huggingface/hub/models--sudo-ai--zero123plus-v1.1/snapshots/36df7de980afd15f80b2e1a4e9a920d7020e2654/unet: Error no file named diffusion_pytorch_model.safetensors found in directory /root/.cache/huggingface/hub/models--sudo-ai--zero123plus-v1.1/snapshots/36df7de980afd15f80b2e1a4e9a920d7020e2654/unet.
Defaulting to unsafe serialization. Pass `allow_pickle=False` to raise an error instead.
An error occurred while trying to fetch /root/.cache/huggingface/hub/models--sudo-ai--zero123plus-v1.1/snapshots/36df7de980afd15f80b2e1a4e9a920d7020e2654/vae: Error no file named diffusion_pytorch_model.safetensors found in directory /root/.cache/huggingface/hub/models--sudo-ai--zero123plus-v1.1/snapshots/36df7de980afd15f80b2e1a4e9a920d7020e2654/vae.
Defaulting to unsafe serialization. Pass `allow_pickle=False` to raise an error instead.


  0%|          | 0/28 [00:00<?, ?it/s]

In [28]:
import torch
from PIL import Image
from diffusers import DiffusionPipeline, EulerAncestralDiscreteScheduler

# Load the pipeline
pipeline = DiffusionPipeline.from_pretrained(
    "sudo-ai/zero123plus-v1.1",
    custom_pipeline="sudo-ai/zero123plus-pipeline",
    torch_dtype=torch.float16
)

# Update the scheduler
pipeline.scheduler = EulerAncestralDiscreteScheduler.from_config(
    pipeline.scheduler.config, timestep_spacing='trailing'
)

# Move the pipeline to GPU
pipeline.to('cuda:0' if torch.cuda.is_available() else 'cpu')

# Load the local image
cond = Image.open("/content/chair.jpg")  # Path to your isolated chair image

# Define rotation angles
azimuth_angle = 72  # Rotation around the Y-axis
polar_angle = 0     # No rotation around the X-axis

# Add rotation to the prompt
prompt = f"Change pose of the chair and rotate it by azimuth {azimuth_angle} degrees; polar {polar_angle} degrees and return 1 image."

# Run the pipeline with the prompt
results = pipeline(cond, prompt=prompt, num_inference_steps=28)

# Print the length of the images list to confirm (should be 1)
print(len(results.images))

# Get the first image from the results
result_image = results.images[0]  # Access the first (and only) generated image

# Display and save the result
result_image.show()
result_image.save("/content/rotated_chair.png")  # Save the rotated image

vision_encoder/model.safetensors not found


Loading pipeline components...:   0%|          | 0/8 [00:00<?, ?it/s]

An error occurred while trying to fetch /root/.cache/huggingface/hub/models--sudo-ai--zero123plus-v1.1/snapshots/36df7de980afd15f80b2e1a4e9a920d7020e2654/vae: Error no file named diffusion_pytorch_model.safetensors found in directory /root/.cache/huggingface/hub/models--sudo-ai--zero123plus-v1.1/snapshots/36df7de980afd15f80b2e1a4e9a920d7020e2654/vae.
Defaulting to unsafe serialization. Pass `allow_pickle=False` to raise an error instead.
An error occurred while trying to fetch /root/.cache/huggingface/hub/models--sudo-ai--zero123plus-v1.1/snapshots/36df7de980afd15f80b2e1a4e9a920d7020e2654/unet: Error no file named diffusion_pytorch_model.safetensors found in directory /root/.cache/huggingface/hub/models--sudo-ai--zero123plus-v1.1/snapshots/36df7de980afd15f80b2e1a4e9a920d7020e2654/unet.
Defaulting to unsafe serialization. Pass `allow_pickle=False` to raise an error instead.


  0%|          | 0/28 [00:00<?, ?it/s]

1


In [24]:
!git clone https://github.com/ultralytics/yolov5  # Clone the YOLOv5 repository
%cd yolov5
!pip install -r requirements.txt  # Install the dependencies

Cloning into 'yolov5'...
remote: Enumerating objects: 16965, done.[K
remote: Counting objects: 100% (160/160), done.[K
remote: Compressing objects: 100% (110/110), done.[K
remote: Total 16965 (delta 81), reused 102 (delta 50), pack-reused 16805 (from 1)[K
Receiving objects: 100% (16965/16965), 15.71 MiB | 28.21 MiB/s, done.
Resolving deltas: 100% (11619/11619), done.
/content/weights/yolov5
Collecting thop>=0.1.1 (from -r requirements.txt (line 14))
  Downloading thop-0.1.1.post2209072238-py3-none-any.whl.metadata (2.7 kB)
Downloading thop-0.1.1.post2209072238-py3-none-any.whl (15 kB)
Installing collected packages: thop
Successfully installed thop-0.1.1.post2209072238


In [38]:
import os
import cv2
import numpy as np
import torch
from segment_anything import sam_model_registry, SamAutomaticMaskGenerator

# Configuration
CHECKPOINT_PATH = '/content/weights/sam_vit_h_4b8939.pth'  # Path to your SAM model
MODEL_TYPE = "vit_h"  # Model type
DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

# Load YOLOv5 model
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)

# Function to detect an object and save the ROI
def detect_and_save_roi(image_path, target_object):
    # Load the image
    img = cv2.imread(image_path)

    # Detect objects using YOLOv5
    results = model(img)
    labels, cords = results.xyxyn[0][:, -1], results.xyxyn[0][:, :-1]
    object_detected = False
    position = None  # Initialize position variable

    # Iterate through detected objects to find the target object
    for label, cord in zip(labels, cords):
        if model.names[int(label)] == target_object:
            object_detected = True
            x1, y1, x2, y2, conf = cord
            print(f"{target_object.capitalize()} detected with confidence {conf:.2f} at location: ({x1:.2f}, {y1:.2f}, {x2:.2f}, {y2:.2f})")

            # Extract ROI
            h, w = img.shape[:2]
            x1, y1, x2, y2 = int(x1 * w), int(y1 * h), int(x2 * w), int(y2 * h)
            position = (x1, y1, x2, y2, h , w)
            roi = img[y1:y2, x1:x2]
            print("Region of Interest (ROI) extracted.")

            # Save the ROI as a separate image
            roi_output_path = '/content/roi_chair.png'
            cv2.imwrite(roi_output_path, roi)
            print(f"Region of Interest saved as 'roi_chair.png'.")

            """# Save position coordinates
            position = (x1, y1)
            break"""

    if not object_detected:
        print(f"No {target_object} detected.")

    return roi_output_path, position

# Example usage
image_path = '/content/chair.jpg'  # Replace with your image path
target_object = 'chair'  # The object you want to save
roi_path, coordinates = detect_and_save_roi(image_path, target_object)

print(f"ROI saved at: {roi_path}")
print(f"Coordinates of the detected object: {coordinates}")


Using cache found in /root/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2024-10-4 Python-3.10.12 torch-2.4.1+cu121 CUDA:0 (NVIDIA A100-SXM4-40GB, 40514MiB)

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
Adding AutoShape... 


Chair detected with confidence 0.83 at location: (0.30, 0.37, 0.67, 0.90)
Region of Interest (ROI) extracted.
Region of Interest saved as 'roi_chair.png'.
ROI saved at: /content/roi_chair.png
Coordinates of the detected object: (95, 118, 213, 289, 320, 320)


  with amp.autocast(autocast):


In [29]:
from PIL import Image

# Step 1: Crop the rotated chair image into 3 rows and 2 columns
def crop_rotated_image(image, rows=3, cols=2):
    width, height = image.size
    part_width = width // cols  # Width of each cropped part
    part_height = height // rows  # Height of each cropped part
    cropped_images = []

    # Iterate over each row and column to crop the image
    for row in range(rows):
        for col in range(cols):
            left = col * part_width
            top = row * part_height
            right = left + part_width
            bottom = top + part_height

            # Crop the image and add to the list
            cropped_image = image.crop((left, top, right, bottom))
            cropped_images.append(cropped_image)

            # Save each cropped image
            cropped_image.save(f"/content/rotated_chair_angle_{row * cols + col}.png")

    return cropped_images

# Load the rotated chair image
rotated_image_path = "/content/rotated_chair.png"  # Path to the rotated chair image
rotated_image = Image.open(rotated_image_path).convert("RGBA")

# Step 2: Crop the rotated image into 3 rows and 2 columns and save them
cropped_rotated_images = crop_rotated_image(rotated_image, rows=3, cols=2)

# Step 3: Load each saved rotated chair image for future use
rotated_chair_images = [
    Image.open(f"/content/rotated_chair_angle_{i}.png").convert("RGBA")  # Load all six cropped images
    for i in range(6)
]

# Now you can use the `rotated_chair_images` for blending or other operations


In [44]:
import torch
from PIL import Image
import matplotlib.pyplot as plt
import cv2

# Load the YOLOv5 model (pre-trained on COCO dataset)
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)

# Load the image
image_path = "/content/rotated_chair_angle_0.png"  # Path to your image
image = Image.open(image_path)

# Perform inference
results = model(image_path)

# Extract results
results_df = results.pandas().xyxy[0]  # Get predictions as a Pandas DataFrame
print(results_df)  # Print results

# Set a threshold for detection
threshold = 0.5  # Adjust this as necessary
crops = []  # List to store cropped images

# Load the original image using OpenCV
original_image = cv2.imread(image_path)

for _, row in results_df.iterrows():
    if row['confidence'] >= threshold and row['name'] == "chair":
        x1, y1, x2, y2 = int(row['xmin']), int(row['ymin']), int(row['xmax']), int(row['ymax'])
        # Crop the detected chair from the original image
        cropped_chair = original_image[y1:y2, x1:x2]
        crops.append(cropped_chair)  # Store the cropped image

# Convert crops to PIL Images and display or save them
for i, crop in enumerate(crops):
    cropped_image_pil = Image.fromarray(cv2.cvtColor(crop, cv2.COLOR_BGR2RGB))
    cropped_image_pil.show()  # Display the cropped chair
    cropped_image_pil.save(f"cropped_chair_{i}.png")  # Save the cropped image

Using cache found in /root/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2024-10-4 Python-3.10.12 torch-2.4.1+cu121 CUDA:0 (NVIDIA A100-SXM4-40GB, 40514MiB)

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
Adding AutoShape... 
  with amp.autocast(autocast):


        xmin        ymin       xmax        ymax  confidence  class   name
0  98.952385  110.911385  212.19046  267.330475    0.840804     56  chair


In [46]:
import torch
import numpy as np
import cv2
from PIL import Image

def remove_background(image_path, model, threshold=0.5):
    """
    Remove the background of the detected chairs in the given image using YOLOv5.

    Parameters:
        image_path (str): The path to the image file.
        model: The YOLOv5 model used for object detection.
        threshold (float): Confidence threshold for detection.

    Returns:
        Image: The image with the background removed, keeping only the detected chairs.
    """
    # Load the image using OpenCV
    original_image = cv2.imread(image_path)

    # Perform inference
    results = model(image_path)

    # Extract results
    results_df = results.pandas().xyxy[0]  # Get predictions as a Pandas DataFrame

    # Create an empty mask with the same dimensions as the original image
    mask = np.zeros(original_image.shape[:2], dtype=np.uint8)

    for _, row in results_df.iterrows():
        if row['confidence'] >= threshold and row['name'] == "chair":
            x1, y1, x2, y2 = int(row['xmin']), int(row['ymin']), int(row['xmax']), int(row['ymax'])

            # Create a filled rectangle on the mask for the detected chair
            cv2.rectangle(mask, (x1, y1), (x2, y2), 255, thickness=cv2.FILLED)  # Fill the rectangle with white (255)

    # Create a new image where the background is removed
    result_image = cv2.bitwise_and(original_image, original_image, mask=mask)

    # Convert the result image to PIL format
    result_image_pil = Image.fromarray(cv2.cvtColor(result_image, cv2.COLOR_BGR2RGB))

    return result_image_pil

# Example usage
if __name__ == "__main__":
    # Load the YOLOv5 model (pre-trained on COCO dataset)
    model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)

    # Path to the image
    image_path = "/content/cropped_chair_0.png"  # Change this to your image path

    # Remove background and get the result
    result_image = remove_background(image_path, model)

    # Save and show the result
    result_image.save("result_image_with_chair.png")
    result_image.show()


Using cache found in /root/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2024-10-4 Python-3.10.12 torch-2.4.1+cu121 CUDA:0 (NVIDIA A100-SXM4-40GB, 40514MiB)

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
Adding AutoShape... 
  with amp.autocast(autocast):


In [42]:
import torch
from PIL import Image, ImageDraw
from diffusers import DiffusionPipeline

# Step 1: Overlay Function to place rotated chair on original background
def overlay_chair(original_image, rotated_chair, position, target_size):
    # Resize rotated chair to fill the target size
    rotated_chair = rotated_chair.resize(target_size, Image.LANCZOS)  # Use LANCZOS for high-quality scaling

    # Create a new image to hold the blended result
    blended_image = Image.new("RGB", original_image.size)

    # Paste the original image onto the blended image
    blended_image.paste(original_image, (0, 0))

    # Paste the resized rotated chair image onto the blended image at the specified position
    blended_image.paste(rotated_chair, position)

    return blended_image

# Step 2: Load original image (background)
original_image_path = "/content/chair.jpg"
original_image = Image.open(original_image_path).convert("RGB")

# Step 3: Load the cropped chair image
cropped_chair_path = "/content/cropped_chair_0.png"
cropped_chair = Image.open(cropped_chair_path).convert("RGBA")

# Step 4: Coordinates and dimensions for the overlay
x1, y1, x2, y2, height, width= coordinates
target_size = (width, height)  # Define target size based on coordinates

# Step 5: Create a mask for the inpainting area
mask = Image.new("L", original_image.size, 0)  # A black mask (0 means no inpainting)
mask_draw = ImageDraw.Draw(mask)
mask_draw.rectangle([x1, y1, x2, y2], fill=255)  # White mask for inpainting area

# Step 6: Use Stable Diffusion inpainting to blend the background
pipeline = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2-inpainting", torch_dtype=torch.float16)
pipeline.to('cuda' if torch.cuda.is_available() else 'cpu')

# Define the prompt for blending
prompt = "Blend the elements naturally into the background."

# Run the inpainting process
result = pipeline(
    prompt=prompt,
    image=original_image,
    mask_image=mask,
    num_inference_steps=50,
    guidance_scale=7.5
).images[0]

# Step 7: Overlay the resized cropped chair onto the inpainted background
position = (x1, y1)
final_result_image = overlay_chair(result, cropped_chair, position, target_size)

# Step 8: Save and display the final result
final_result_image.save("final_result_image.png")
final_result_image.show()


Loading pipeline components...:   0%|          | 0/6 [00:00<?, ?it/s]



  0%|          | 0/50 [00:00<?, ?it/s]