In [1]:
from simple_lama_inpainting import SimpleLama
from PIL import Image, ImageChops, ImageDraw
import numpy as np
import cv2
import os
from torchmetrics.image.fid import FrechetInceptionDistance
import torch
from diffusers import AutoPipelineForInpainting
from diffusers.utils import load_image, make_image_grid
from PIL import Image
import numpy as np


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# 1. Lama

In [3]:
# Iterationen für Maskenmanipulation
ITERATIONS = 10

# Funktion, um die Maske zu erweitern (Dilation)
def expand_mask(mask, iterations=ITERATIONS):
    mask_array = np.array(mask)
    kernel = np.ones((3, 3), np.uint8)
    expanded_mask = mask_array
    for _ in range(iterations):
        expanded_mask = cv2.dilate(expanded_mask, kernel, iterations=1)
    return Image.fromarray(expanded_mask)

In [4]:

# Directories
images_dir = './Dataset_new/images'
masks_dir = './Dataset_new/masks'

# Initialize output
image_files = {}

# List all files in images and masks directories
image_files = [f for f in os.listdir(images_dir) if f.endswith('.jpg') and f in os.listdir(masks_dir)]

print(f'Found {len(image_files)} images')


Found 230 images


In [5]:
# Initialisiere SimpleLama
simple_lama = SimpleLama()

In [6]:
def image_mask_generator(images_dir, masks_dir, image_files):
    """
    Generator to read images and their corresponding masks.

    Args:
    - images_dir (str): Directory containing the images.
    - masks_dir (str): Directory containing the masks.
    - image_files (list): List of filenames (same for both images and masks).

    Yields:
    - tuple: A tuple containing a PIL image and a corresponding mask (PIL image).
    """
    for fname in image_files:
        image_path = os.path.join(images_dir, fname)  # Full path to the image
        mask_path = os.path.join(masks_dir, fname)  # Full path to the mask
        
        # Load image and mask
        try:
            image = Image.open(image_path).convert('RGB')  # Convert image to RGB
            mask = Image.open(mask_path).convert('L')  # Convert mask to grayscale (L)

            mask = expand_mask(mask) # Expand mask 
            
            # Yield image and mask
            yield fname, image, mask
        except Exception as e:
            print(f"Error loading {fname}: {e}")

In [7]:
import os

def save_result(image, result, fname, results_folder):
    """
    Save the result to a directory.
    
    Args:
    - image (PIL.Image): The input image.
    - result (PIL.Image): The result of the inpainting or processing.
    - fname (str): The filename of the current image.
    - results_folder (str): Folder where results will be saved.
    
    Returns:
    - None
    """

    results_dir = f'results/{results_folder}'
    
    # Ensure the result directory exists
    os.makedirs(results_dir, exist_ok=True)
    
    
    # Create the result path for saving the image
    result_path = os.path.join(results_dir, fname)
    
    # Save the result
    result.save(result_path)
    
    # Print the success message
    print(f"Ergebnis für Bild {fname} gespeichert: {result_path}")





In [8]:
import torch
from torchvision.transforms import ToTensor, Resize, Compose
from torchmetrics.image.fid import FrechetInceptionDistance
import lpips

# Define a fixed image size
target_size = (256, 256)  # Resize all images to this size

# Preprocessing function to resize and convert images to tensors
def preprocess_images(images, target_size=(256, 256)):
    """
    Preprocess images by resizing them to the target size and converting to tensors.

    Args:
    - images (list of PIL Images): The list of images to preprocess.
    - target_size (tuple): The target size to resize images to (default is 256x256).

    Returns:
    - torch.Tensor: A tensor containing all preprocessed images, stacked in a batch.
    """
    transform = Compose([
        Resize(target_size),  # Resize the images to the target size
        ToTensor(),  # Convert the images to tensor
    ])

    # Apply the transformation to each image in the list
    image_tensors = [transform(image) for image in images]

    # Stack the tensors into a single batch
    stacked_tensor = torch.stack(image_tensors)
    
    return stacked_tensor


# Error calculation function to compute the FID score
def calculate_fid(real_images_tensor, infilled_images_tensor):
    """
    Calculate the Fréchet Inception Distance (FID) between real and Lama generated images.

    Args:
    - real_images_tensor (tensor): The tensor of real images.
    - infilled_images_tensor (list of PIL Images): The tensor of infilled images.

    Returns:
    - float: The FID score.
    """
    # Initialize the FID metric
    fid = FrechetInceptionDistance(normalize=True)

    # Update the FID with the preprocessed images
    fid.update(real_images_tensor, real=True)
    fid.update(infilled_images_tensor, real=False)

    # Calculate and return the FID score
    return float(fid.compute())


# LPIPS calculation function to compute the LPIPS score
def calculate_lpips(real_images_tensor, infilled_images_tensor):
    """
    Calculate the Learned Perceptual Image Patch Similarity (LPIPS) between real and Lama generated images.

    Args:
    - real_images_tensor (tensor): The tensor of real images.
    - infilled_images_tensor (list of PIL Images): The tensor of infilled images.

    Returns:
    - float: The LPIPS score.
    """

    # Load the LPIPS model
    loss_fn = lpips.LPIPS(net='alex')  # Using AlexNet architecture for LPIPS


    # Compute the LPIPS score
    lpips_score = loss_fn(real_images_tensor, infilled_images_tensor)

    # Return the average LPIPS score
    return lpips_score.mean().item()



In [9]:
real_images = []
lama_images = []

# Schleife durch alle Bilder und Masken

image_gen = image_mask_generator(images_dir, masks_dir, image_files)
for fname, image, mask in image_gen:

    result = simple_lama(image, mask) # Infill mit Lama und der neuen Maske

    real_images.append(image)
    lama_images.append(result)

    # Append and save result
    save_result(image, result, fname, 'lama')
    

Ergebnis für Bild ADE_train_00001504.jpg gespeichert: results/lama/ADE_train_00001504.jpg
Ergebnis für Bild ADE_train_00003093.jpg gespeichert: results/lama/ADE_train_00003093.jpg
Ergebnis für Bild ADE_train_00001850.jpg gespeichert: results/lama/ADE_train_00001850.jpg
Ergebnis für Bild ADE_train_00008819.jpg gespeichert: results/lama/ADE_train_00008819.jpg
Ergebnis für Bild ADE_train_00008798.jpg gespeichert: results/lama/ADE_train_00008798.jpg
Ergebnis für Bild ADE_train_00025047.jpg gespeichert: results/lama/ADE_train_00025047.jpg
Ergebnis für Bild ADE_train_00005350.jpg gespeichert: results/lama/ADE_train_00005350.jpg
Ergebnis für Bild ADE_train_00022999.jpg gespeichert: results/lama/ADE_train_00022999.jpg
Ergebnis für Bild ADE_train_00002279.jpg gespeichert: results/lama/ADE_train_00002279.jpg
Ergebnis für Bild ADE_train_00019283.jpg gespeichert: results/lama/ADE_train_00019283.jpg
Ergebnis für Bild ADE_train_00008815.jpg gespeichert: results/lama/ADE_train_00008815.jpg
Ergebnis f

In [10]:
# Preprocess the images (resize and convert to tensors)
real_images_tensor = preprocess_images(real_images, target_size)
lama_images_tensor = preprocess_images(lama_images, target_size)

fid_score = calculate_fid(real_images_tensor, lama_images_tensor)

lpips_score = calculate_lpips(real_images_tensor, lama_images_tensor)

print(f"FID: {fid_score}")
print(f"LPIPS: {lpips_score}")

Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off]




Loading model from: /home/ludo/.conda/envs/lama/lib/python3.10/site-packages/lpips/weights/v0.1/alex.pth


  self.load_state_dict(torch.load(model_path, map_location='cpu'), strict=False)


FID: 66.4185562133789
LPIPS: 0.19224275648593903


In [11]:
# Funktion, um OpenCV-Inpainting durchzuführen
def inpaint_with_opencv(image, mask, inpaint_radius=3, method=cv2.INPAINT_TELEA):
    # Konvertiere das PIL-Image zu einem OpenCV-kompatiblen NumPy-Array
    image_array = np.array(image)
    mask_array = np.array(mask)

    # Sicherstellen, dass Maske binär ist (0 und 255)
    _, mask_array = cv2.threshold(mask_array, 1, 255, cv2.THRESH_BINARY)

    # OpenCV-Inpainting
    inpainted_image = cv2.inpaint(image_array, mask_array, inpaint_radius, method)

    return Image.fromarray(cv2.cvtColor(inpainted_image, cv2.COLOR_BGR2RGB))

In [12]:
# Baseline - Opencv

In [13]:


real_images = []
opencv_images = []

# Schleife durch alle Bilder und Masken

image_gen = image_mask_generator(images_dir, masks_dir, image_files)
for fname, image, mask in image_gen:

    result = inpaint_with_opencv(image, mask) # Infill mit Lama und der neuen Maske

    real_images.append(image)
    opencv_images.append(result)

    # Append and save result
    save_result(image, result, fname, 'opencv')

Ergebnis für Bild ADE_train_00001504.jpg gespeichert: results/opencv/ADE_train_00001504.jpg
Ergebnis für Bild ADE_train_00003093.jpg gespeichert: results/opencv/ADE_train_00003093.jpg
Ergebnis für Bild ADE_train_00001850.jpg gespeichert: results/opencv/ADE_train_00001850.jpg
Ergebnis für Bild ADE_train_00008819.jpg gespeichert: results/opencv/ADE_train_00008819.jpg
Ergebnis für Bild ADE_train_00008798.jpg gespeichert: results/opencv/ADE_train_00008798.jpg
Ergebnis für Bild ADE_train_00025047.jpg gespeichert: results/opencv/ADE_train_00025047.jpg
Ergebnis für Bild ADE_train_00005350.jpg gespeichert: results/opencv/ADE_train_00005350.jpg
Ergebnis für Bild ADE_train_00022999.jpg gespeichert: results/opencv/ADE_train_00022999.jpg
Ergebnis für Bild ADE_train_00002279.jpg gespeichert: results/opencv/ADE_train_00002279.jpg
Ergebnis für Bild ADE_train_00019283.jpg gespeichert: results/opencv/ADE_train_00019283.jpg
Ergebnis für Bild ADE_train_00008815.jpg gespeichert: results/opencv/ADE_train_0

In [14]:
# Preprocess the images (resize and convert to tensors)
real_images_tensor = preprocess_images(real_images, target_size)
opencv_images_tensor = preprocess_images(opencv_images, target_size)

fid_score = calculate_fid(real_images_tensor, opencv_images_tensor)

lpips_score = calculate_lpips(real_images_tensor, opencv_images_tensor)

print(f"FID: {fid_score}")
print(f"LPIPS: {lpips_score}")

Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off]
Loading model from: /home/ludo/.conda/envs/lama/lib/python3.10/site-packages/lpips/weights/v0.1/alex.pth
FID: 142.9036865234375
LPIPS: 0.33470818400382996


In [15]:
## Stable Diffusion

In [23]:
import torch
from diffusers import AutoPipelineForInpainting
from diffusers.utils import load_image, make_image_grid
from PIL import Image
import numpy as np

# Pipeline initialisieren
pipeline = AutoPipelineForInpainting.from_pretrained(
    "kandinsky-community/kandinsky-2-2-decoder-inpaint", torch_dtype=torch.float16
)
pipeline.enable_model_cpu_offload()

pipeline.to("cuda")




Loading pipeline components...: 100%|██████████| 3/3 [00:00<00:00,  7.03it/s]
Loading pipeline components...: 100%|██████████| 6/6 [00:00<00:00,  6.47it/s]
It seems like you have activated model offloading by calling `enable_model_cpu_offload`, but are now manually moving the pipeline to GPU. It is strongly recommended against doing so as memory gains from offloading are likely to be lost. Offloading automatically takes care of moving the individual components unet, scheduler, movq, prior_prior, prior_image_encoder, prior_text_encoder, prior_tokenizer, prior_scheduler, prior_image_processor to GPU when needed. To make sure offloading works as expected, you should consider moving the pipeline back to CPU: `pipeline.to('cpu')` or removing the move altogether if you use offloading.


KandinskyV22InpaintCombinedPipeline {
  "_class_name": "KandinskyV22InpaintCombinedPipeline",
  "_diffusers_version": "0.32.2",
  "_name_or_path": "kandinsky-community/kandinsky-2-2-decoder-inpaint",
  "movq": [
    "diffusers",
    "VQModel"
  ],
  "prior_image_encoder": [
    "transformers",
    "CLIPVisionModelWithProjection"
  ],
  "prior_image_processor": [
    "transformers",
    "CLIPImageProcessor"
  ],
  "prior_prior": [
    "diffusers",
    "PriorTransformer"
  ],
  "prior_scheduler": [
    "diffusers",
    "UnCLIPScheduler"
  ],
  "prior_text_encoder": [
    "transformers",
    "CLIPTextModelWithProjection"
  ],
  "prior_tokenizer": [
    "transformers",
    "CLIPTokenizer"
  ],
  "scheduler": [
    "diffusers",
    "DDPMScheduler"
  ],
  "unet": [
    "diffusers",
    "UNet2DConditionModel"
  ]
}

In [26]:
from torchvision import transforms


def sd_inpaint(image, mask):

    # Generator mit festem Seed für reproduzierbare Ergebnisse
    generator = torch.Generator("cuda").manual_seed(92)

    # Define a transformation to convert PIL.Image to a PyTorch tensor
    transform = transforms.ToTensor()
    
    # Convert image and mask to tensors
    image_tensor = transform(image).unsqueeze(0)  # Add batch dimension
    mask_tensor = transform(mask).unsqueeze(0)
    
    # Move tensors to the GPU
    image_tensor = image_tensor.to("cuda")
    mask_tensor = mask_tensor.to("cuda")

    
    # Inpainting mit Stable Diffusion
    result = pipeline(
        prompt='background',
        image=image,
        mask_image=mask,
        generator=generator
    ).images[0]

    return result

    

In [None]:
real_images = []
sd_images = []

# Schleife durch alle Bilder und Masken

image_gen = image_mask_generator(images_dir, masks_dir, image_files)
for fname, image, mask in image_gen:

    result = sd_inpaint(image, mask) # Infill mit sd und neuer maske

    real_images.append(image)
    sd_images.append(result)

    # Append and save result
    save_result(image, result, fname, 'sd')

100%|██████████| 25/25 [00:00<00:00, 139.57it/s]
100%|██████████| 100/100 [00:05<00:00, 17.02it/s]


Ergebnis für Bild ADE_train_00001504.jpg gespeichert: results/sd/ADE_train_00001504.jpg


100%|██████████| 25/25 [00:00<00:00, 146.14it/s]
100%|██████████| 100/100 [00:04<00:00, 21.11it/s]


Ergebnis für Bild ADE_train_00003093.jpg gespeichert: results/sd/ADE_train_00003093.jpg


100%|██████████| 25/25 [00:00<00:00, 141.12it/s]
100%|██████████| 100/100 [00:04<00:00, 21.13it/s]


Ergebnis für Bild ADE_train_00001850.jpg gespeichert: results/sd/ADE_train_00001850.jpg


100%|██████████| 25/25 [00:00<00:00, 147.09it/s]
100%|██████████| 100/100 [00:04<00:00, 21.00it/s]


Ergebnis für Bild ADE_train_00008819.jpg gespeichert: results/sd/ADE_train_00008819.jpg


100%|██████████| 25/25 [00:00<00:00, 141.68it/s]
100%|██████████| 100/100 [00:04<00:00, 20.96it/s]


Ergebnis für Bild ADE_train_00008798.jpg gespeichert: results/sd/ADE_train_00008798.jpg


100%|██████████| 25/25 [00:00<00:00, 140.72it/s]
100%|██████████| 100/100 [00:04<00:00, 21.09it/s]


Ergebnis für Bild ADE_train_00025047.jpg gespeichert: results/sd/ADE_train_00025047.jpg


100%|██████████| 25/25 [00:00<00:00, 140.68it/s]
100%|██████████| 100/100 [00:04<00:00, 21.12it/s]


Ergebnis für Bild ADE_train_00005350.jpg gespeichert: results/sd/ADE_train_00005350.jpg


100%|██████████| 25/25 [00:00<00:00, 140.63it/s]
100%|██████████| 100/100 [00:04<00:00, 21.08it/s]


Ergebnis für Bild ADE_train_00022999.jpg gespeichert: results/sd/ADE_train_00022999.jpg


100%|██████████| 25/25 [00:00<00:00, 140.89it/s]
100%|██████████| 100/100 [00:04<00:00, 21.15it/s]


Ergebnis für Bild ADE_train_00002279.jpg gespeichert: results/sd/ADE_train_00002279.jpg


100%|██████████| 25/25 [00:00<00:00, 141.66it/s]
100%|██████████| 100/100 [00:04<00:00, 21.14it/s]


Ergebnis für Bild ADE_train_00019283.jpg gespeichert: results/sd/ADE_train_00019283.jpg


100%|██████████| 25/25 [00:00<00:00, 140.70it/s]
100%|██████████| 100/100 [00:04<00:00, 21.24it/s]


Ergebnis für Bild ADE_train_00008815.jpg gespeichert: results/sd/ADE_train_00008815.jpg


100%|██████████| 25/25 [00:00<00:00, 140.18it/s]
100%|██████████| 100/100 [00:04<00:00, 21.24it/s]


Ergebnis für Bild ADE_train_00003088.jpg gespeichert: results/sd/ADE_train_00003088.jpg


100%|██████████| 25/25 [00:00<00:00, 140.29it/s]
100%|██████████| 100/100 [00:04<00:00, 21.00it/s]


Ergebnis für Bild ADE_train_00016147.jpg gespeichert: results/sd/ADE_train_00016147.jpg


100%|██████████| 25/25 [00:00<00:00, 141.30it/s]
100%|██████████| 100/100 [00:04<00:00, 21.14it/s]


Ergebnis für Bild ADE_train_00002461.jpg gespeichert: results/sd/ADE_train_00002461.jpg


100%|██████████| 25/25 [00:00<00:00, 140.94it/s]
100%|██████████| 100/100 [00:04<00:00, 20.86it/s]


Ergebnis für Bild ADE_train_00003089.jpg gespeichert: results/sd/ADE_train_00003089.jpg


100%|██████████| 25/25 [00:00<00:00, 141.01it/s]
100%|██████████| 100/100 [00:04<00:00, 20.72it/s]


Ergebnis für Bild ADE_train_00001502.jpg gespeichert: results/sd/ADE_train_00001502.jpg


100%|██████████| 25/25 [00:00<00:00, 142.41it/s]
100%|██████████| 100/100 [00:04<00:00, 20.98it/s]


Ergebnis für Bild ADE_train_00002296.jpg gespeichert: results/sd/ADE_train_00002296.jpg


100%|██████████| 25/25 [00:00<00:00, 140.67it/s]
100%|██████████| 100/100 [00:04<00:00, 21.18it/s]


Ergebnis für Bild ADE_train_00002287.jpg gespeichert: results/sd/ADE_train_00002287.jpg


100%|██████████| 25/25 [00:00<00:00, 141.13it/s]
100%|██████████| 100/100 [00:04<00:00, 21.05it/s]


Ergebnis für Bild ADE_train_00001490.jpg gespeichert: results/sd/ADE_train_00001490.jpg


100%|██████████| 25/25 [00:00<00:00, 141.32it/s]
100%|██████████| 100/100 [00:04<00:00, 20.97it/s]


Ergebnis für Bild ADE_train_00019290.jpg gespeichert: results/sd/ADE_train_00019290.jpg


100%|██████████| 25/25 [00:00<00:00, 141.49it/s]
100%|██████████| 100/100 [00:04<00:00, 21.21it/s]


Ergebnis für Bild ADE_train_00000651.jpg gespeichert: results/sd/ADE_train_00000651.jpg


100%|██████████| 25/25 [00:00<00:00, 140.94it/s]
100%|██████████| 100/100 [00:04<00:00, 21.23it/s]


Ergebnis für Bild ADE_train_00002293.jpg gespeichert: results/sd/ADE_train_00002293.jpg


100%|██████████| 25/25 [00:00<00:00, 139.25it/s]
100%|██████████| 100/100 [00:04<00:00, 21.29it/s]


Ergebnis für Bild ADE_train_00008801.jpg gespeichert: results/sd/ADE_train_00008801.jpg


100%|██████████| 25/25 [00:00<00:00, 139.27it/s]
100%|██████████| 100/100 [00:04<00:00, 21.39it/s]


Ergebnis für Bild ADE_train_00002281.jpg gespeichert: results/sd/ADE_train_00002281.jpg


100%|██████████| 25/25 [00:00<00:00, 141.14it/s]
100%|██████████| 100/100 [00:04<00:00, 21.02it/s]


Ergebnis für Bild ADE_train_00001499.jpg gespeichert: results/sd/ADE_train_00001499.jpg


100%|██████████| 25/25 [00:00<00:00, 145.46it/s]
100%|██████████| 100/100 [00:04<00:00, 21.39it/s]


Ergebnis für Bild ADE_train_00002140.jpg gespeichert: results/sd/ADE_train_00002140.jpg


100%|██████████| 25/25 [00:00<00:00, 140.51it/s]
100%|██████████| 100/100 [00:04<00:00, 20.97it/s]


Ergebnis für Bild ADE_train_00003094.jpg gespeichert: results/sd/ADE_train_00003094.jpg


100%|██████████| 25/25 [00:00<00:00, 140.84it/s]
100%|██████████| 100/100 [00:04<00:00, 21.16it/s]


Ergebnis für Bild ADE_train_00000663.jpg gespeichert: results/sd/ADE_train_00000663.jpg


100%|██████████| 25/25 [00:00<00:00, 139.79it/s]
100%|██████████| 100/100 [00:04<00:00, 21.37it/s]


Ergebnis für Bild ADE_train_00002291.jpg gespeichert: results/sd/ADE_train_00002291.jpg


100%|██████████| 25/25 [00:00<00:00, 140.03it/s]
100%|██████████| 100/100 [00:04<00:00, 21.35it/s]


Ergebnis für Bild ADE_train_00019294.jpg gespeichert: results/sd/ADE_train_00019294.jpg


100%|██████████| 25/25 [00:00<00:00, 139.53it/s]
100%|██████████| 100/100 [00:04<00:00, 20.97it/s]


Ergebnis für Bild ADE_train_00011671.jpg gespeichert: results/sd/ADE_train_00011671.jpg


100%|██████████| 25/25 [00:00<00:00, 145.15it/s]
100%|██████████| 100/100 [00:04<00:00, 21.01it/s]


Ergebnis für Bild ADE_train_00001500.jpg gespeichert: results/sd/ADE_train_00001500.jpg


100%|██████████| 25/25 [00:00<00:00, 140.16it/s]
100%|██████████| 100/100 [00:04<00:00, 20.94it/s]


Ergebnis für Bild ADE_train_00011663.jpg gespeichert: results/sd/ADE_train_00011663.jpg


100%|██████████| 25/25 [00:00<00:00, 140.60it/s]
 55%|█████▌    | 55/100 [00:02<00:01, 23.32it/s]

In [None]:
# Preprocess the images (resize and convert to tensors)
real_images_tensor = preprocess_images(real_images, target_size)
sd_images_tensor = preprocess_images(sd_images, target_size)

fid_score = calculate_fid(real_images_tensor, sd_images_tensor)

lpips_score = calculate_lpips(real_images_tensor, sd_images_tensor)

print(f"FID: {fid_score}")
print(f"LPIPS: {lpips_score}")