# Mask Generation #

In [None]:
# First, install PyTorch with CUDA support
!pip install torch torchvision --extra-index-url https://download.pytorch.org/whl/cu118

# Install required packages
!pip install transformers
!pip install opencv-python
!pip install scipy
!pip install setuptools==59.5.0
!pip install pillow
!pip install timm

# Install OneFormer specific dependencies
!pip install -q git+https://github.com/cocodataset/panopticapi.git
!pip install -q git+https://github.com/mcordts/cityscapesScripts.git
!pip install -q pytorchvideo

# Optional: for better visualization
!pip install matplotlib seaborn

# Verify CUDA is available
import torch
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA device: {torch.cuda.get_device_name(0)}")

# Import and verify transformers installation
from transformers import OneFormerProcessor, OneFormerForUniversalSegmentation

# Test model loading
processor = OneFormerProcessor.from_pretrained("shi-labs/oneformer_ade20k_swin_large")
model = OneFormerForUniversalSegmentation.from_pretrained("shi-labs/oneformer_ade20k_swin_large")

print("Setup completed successfully!")

Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cu118
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting

  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for panopticapi (setup.py) ... [?25l[?25hdone
  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.6/78.6 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.0/46.0 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.8/86.8 kB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for cityscapesScripts (setup.py) ... [?25l[?25hdone
  Building wheel for typing (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m132.7/132.7 kB[0m [31m10.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.2/50.2 kB[0m [31m4.9 MB/s

KeyboardInterrupt: 

In [None]:
import os
import numpy as np
from PIL import Image
import torch
import matplotlib.pyplot as plt
from transformers import OneFormerProcessor, OneFormerForUniversalSegmentation

def process_image(image_path, core_mask_dir=None, overlay_output_dir=None):
  """Process a single image and generate segmentation masks using OneFormer"""
  processor = OneFormerProcessor.from_pretrained("shi-labs/oneformer_ade20k_swin_large")
  model = OneFormerForUniversalSegmentation.from_pretrained("shi-labs/oneformer_ade20k_swin_large")

  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  model = model.to(device)
  model.eval()

  image = Image.open(image_path).convert('RGB')
  print(f"Processing image: {image_path}")
  print(f"Image size: {image.size}")

  inputs = processor(images=image, task_inputs=["semantic"], return_tensors="pt")
  inputs = {k: v.to(device) if hasattr(v, "to") else v for k, v in inputs.items()}

  with torch.no_grad():
      outputs = model(**inputs)

  segmentation_maps = processor.post_process_semantic_segmentation(
      outputs,
      target_sizes=[(image.size[1], image.size[0])]
  )[0]

  segmentation = segmentation_maps.cpu().numpy()

  # Define background classes (architectural elements to exclude)
  background_classes = [0, 1, 3, 5, 4, 6, 8, 9, 14, 38]  # 0 is original background

  if overlay_output_dir:
    os.makedirs(overlay_output_dir, exist_ok=True)

  if core_mask_dir:
      os.makedirs(core_mask_dir, exist_ok=True)
      base_filename = os.path.splitext(os.path.basename(image_path))[0]

      # First, let's print all detected classes for debugging
      all_classes = np.unique(segmentation)
      id2label = model.config.id2label

      print("\nAll detected classes:")
      for class_id in all_classes:
          print(f"- Class {class_id}: {id2label[int(class_id)]}")

      # Create foreground mask - FIXED VERSION
      foreground_mask = np.ones_like(segmentation, dtype=np.uint8) * 255
      for bg_class in background_classes:  # <-- Changed to iterate over background_classes
          foreground_mask[segmentation == bg_class] = 0

      # Create masked segmentation
      masked_segmentation = segmentation.copy()
      masked_segmentation[foreground_mask == 0] = 0

      # Get unique classes in masked result
      unique_classes = np.unique(masked_segmentation)

      print("\nClasses in final mask (after excluding background elements):")
      for class_id in unique_classes:
          if class_id > 0:  # Skip 0 class
              print(f"- Class {class_id}: {id2label[int(class_id)]}")

      # Rest of visualization code remains the same
      num_classes = len(model.config.id2label)
      colors = []
      for i in range(num_classes):
          hue = i / num_classes
          colors.append(plt.cm.hsv(hue))
      colors = np.array(colors)

      colored_segmentation = colors[masked_segmentation]
      original_image = np.array(image).astype(float) / 255
      overlay = 0.6 * original_image + 0.4 * colored_segmentation[:,:,:3]

      plt.figure(figsize=(20, 20))
      plt.imshow(overlay)

      from scipy import ndimage

      for class_id in unique_classes:
          if class_id > 0:  # Skip 0 class
              mask = masked_segmentation == class_id
              if mask.any():
                  labeled_mask, num_features = ndimage.label(mask)
                  for label_idx in range(1, num_features + 1):
                      component_mask = labeled_mask == label_idx
                      cy, cx = ndimage.center_of_mass(component_mask)
                      plt.text(cx, cy, id2label[int(class_id)],
                              color='black',
                              bbox=dict(facecolor='white', alpha=0.7),
                              ha='center',
                              va='center',
                              fontsize=12)

      plt.axis('off')

      overlay_path = os.path.join(overlay_output_dir, f"{base_filename}_overlay_labeled.png")
      plt.savefig(overlay_path, bbox_inches='tight', pad_inches=0)
      plt.close()
      print(f"Saved labeled overlay to: {overlay_path}")

      mask_path = os.path.join(core_mask_dir, f"{base_filename}_foreground_mask.png")
      Image.fromarray(foreground_mask).save(mask_path)
      print(f"Saved mask to: {mask_path}")

  return segmentation

def main():
    # Define paths
    input_dir = "/content/objects/"
    core_mask_dir = "/content/core_masks"
    overlay_output_dir = "/content/overlay_images"

    # Create output directory
    os.makedirs(core_mask_dir, exist_ok=True)
    os.makedirs(overlay_output_dir, exist_ok=True)

    # Get list of images
    valid_extensions = ('.png', '.jpg', '.jpeg', '.tiff', '.bmp')
    image_files = [
        f for f in os.listdir(input_dir)
        if os.path.isfile(os.path.join(input_dir, f))
        and f.lower().endswith(valid_extensions)
    ]

    if not image_files:
        print(f"No image files found in {input_dir}")
        return

    print(f"Found {len(image_files)} images to process")

    # Process each image
    for idx, image_file in enumerate(image_files, 1):
        print(f"\nProcessing image {idx}/{len(image_files)}")
        image_path = os.path.join(input_dir, image_file)

        try:
            process_image(image_path=image_path, core_mask_dir=core_mask_dir, overlay_output_dir=overlay_output_dir)
            print(f"Successfully processed {image_file}")
        except Exception as e:
            print(f"Error processing {image_file}: {str(e)}")
            import traceback
            traceback.print_exc()

if __name__ == "__main__":
    main()

Found 2 images to process

Processing image 1/2


  return func(*args, **kwargs)


Processing image: /content/objects/595043039.png
Image size: (1500, 1000)

All detected classes:
- Class 0: wall
- Class 3: floor
- Class 5: ceiling
- Class 7: bed
- Class 8: window 
- Class 15: table
- Class 19: chair
- Class 36: lamp
- Class 39: cushion
- Class 57: pillow
- Class 64: coffee table
- Class 148: clock

Classes in final mask (after excluding background elements):
- Class 7: bed
- Class 15: table
- Class 19: chair
- Class 36: lamp
- Class 39: cushion
- Class 57: pillow
- Class 64: coffee table
- Class 148: clock
Saved labeled overlay to: /content/overlay_images/595043039_overlay_labeled.png
Saved mask to: /content/core_masks/595043039_foreground_mask.png
Successfully processed 595043039.png

Processing image 2/2
Processing image: /content/objects/903254896.png
Image size: (1500, 1000)

All detected classes:
- Class 0: wall
- Class 3: floor
- Class 5: ceiling
- Class 7: bed
- Class 8: window 
- Class 10: cabinet
- Class 14: door
- Class 17: plant
- Class 22: painting, pict

In [None]:
import os
import numpy as np
import cv2
from PIL import Image
import torch
import matplotlib.pyplot as plt
from transformers import OneFormerProcessor, OneFormerForUniversalSegmentation
from google.colab import drive
drive.mount('/content/drive')

def process_image(image_path, core_mask_dir=None, overlay_output_dir=None, margin_mask_dir=None, margin_size=40):
    """Process a single image and generate segmentation masks using OneFormer"""
    processor = OneFormerProcessor.from_pretrained("shi-labs/oneformer_ade20k_swin_large")
    model = OneFormerForUniversalSegmentation.from_pretrained("shi-labs/oneformer_ade20k_swin_large")

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    model.eval()

    image = Image.open(image_path).convert('RGB')
    image_np = np.array(image)  # Convert to NumPy array for processing

    print(f"Processing image: {image_path}")
    print(f"Image size: {image.size}")

    inputs = processor(images=image, task_inputs=["semantic"], return_tensors="pt")
    inputs = {k: v.to(device) if hasattr(v, "to") else v for k, v in inputs.items()}

    with torch.no_grad():
        outputs = model(**inputs)

    segmentation_maps = processor.post_process_semantic_segmentation(
        outputs,
        target_sizes=[(image.size[1], image.size[0])]
    )[0]

    segmentation = segmentation_maps.cpu().numpy()

    # Define background classes (architectural elements to exclude)
    background_classes = [0, 1, 3, 5, 4, 6, 8, 9, 14, 38]

    if core_mask_dir:
        os.makedirs(core_mask_dir, exist_ok=True)
    if overlay_output_dir:
        os.makedirs(overlay_output_dir, exist_ok=True)
    if margin_mask_dir:
        os.makedirs(margin_mask_dir, exist_ok=True)

    base_filename = os.path.splitext(os.path.basename(image_path))[0]

    # Create foreground mask
    foreground_mask = np.ones_like(segmentation, dtype=np.uint8) * 255
    for bg_class in background_classes:
        foreground_mask[segmentation == bg_class] = 0

    # Save foreground mask
    mask_path = os.path.join(core_mask_dir, f"{base_filename}_foreground_mask.png")
    Image.fromarray(foreground_mask).save(mask_path)
    print(f"Saved mask to: {mask_path}")

    # Generate margin mask
    kernel = np.ones((margin_size, margin_size), np.uint8)
    dilated_mask = cv2.dilate(foreground_mask, kernel, iterations=1)
    margin_mask = dilated_mask - foreground_mask

    # Create RGBA margin mask (copy real pixels with transparency)
    margin_rgba = np.zeros((*image_np.shape[:2], 4), dtype=np.uint8)
    margin_rgba[:, :, :3] = image_np  # Copy RGB channels
    margin_rgba[:, :, 3] = (margin_mask > 0) * 128  # Set alpha to 128 for the margin

    # Save margin mask
    margin_mask_path = os.path.join(margin_mask_dir, f"{base_filename}_margin_mask.png")
    Image.fromarray(margin_rgba).save(margin_mask_path)
    print(f"Saved margin mask to: {margin_mask_path}")

    return segmentation

def main():
    # Define paths
    # input_dir = "/content/objects/"
    # core_mask_dir = "/content/core_masks"
    # overlay_output_dir = "/content/overlay_images"
    # margin_mask_dir = "/content/margin_masks"  # New directory for margin masks

    # Define paths for input and output directories
    input_dir = "/content/drive/My Drive/Colab Notebooks/DATASETS/PhotosWithObjects/Uncompressed"
    core_mask_dir = "/content/drive/My Drive/Colab Notebooks/DATASETS/AutomaticallyGeneratedCoreMasks"
    margin_mask_dir = "/content/drive/My Drive/Colab Notebooks/DATASETS/AutomaticallyGeneratedMarginMasks"
    overlay_output_dir = "/content/drive/My Drive/Colab Notebooks/DATASETS/AutomaticallyGeneratedMaskOverlays"

    # Create output directories
    os.makedirs(core_mask_dir, exist_ok=True)
    os.makedirs(overlay_output_dir, exist_ok=True)
    os.makedirs(margin_mask_dir, exist_ok=True)

    # Get list of images
    valid_extensions = ('.png', '.jpg', '.jpeg', '.tiff', '.bmp')
    image_files = [
        f for f in os.listdir(input_dir)
        if os.path.isfile(os.path.join(input_dir, f))
        and f.lower().endswith(valid_extensions)
    ]

    if not image_files:
        print(f"No image files found in {input_dir}")
        return

    print(f"Found {len(image_files)} images to process")

    # Process each image
    for idx, image_file in enumerate(image_files, 1):
        print(f"\nProcessing image {idx}/{len(image_files)}")
        image_path = os.path.join(input_dir, image_file)

        try:
            process_image(image_path=image_path,
                          core_mask_dir=core_mask_dir,
                          overlay_output_dir=overlay_output_dir,
                          margin_mask_dir=margin_mask_dir)
            print(f"Successfully processed {image_file}")
        except Exception as e:
            print(f"Error processing {image_file}: {str(e)}")
            import traceback
            traceback.print_exc()

if __name__ == "__main__":
    main()


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Found 323 images to process

Processing image 1/323


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
  return func(*args, **kwargs)


Processing image: /content/drive/My Drive/Colab Notebooks/DATASETS/PhotosWithObjects/Uncompressed/1011640917.png
Image size: (1500, 2000)


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


Saved mask to: /content/drive/My Drive/Colab Notebooks/DATASETS/AutomaticallyGeneratedCoreMasks/1011640917_foreground_mask.png
Saved margin mask to: /content/drive/My Drive/Colab Notebooks/DATASETS/AutomaticallyGeneratedMarginMasks/1011640917_margin_mask.png
Successfully processed 1011640917.png

Processing image 2/323
Processing image: /content/drive/My Drive/Colab Notebooks/DATASETS/PhotosWithObjects/Uncompressed/1011640934.png
Image size: (1500, 1125)
Saved mask to: /content/drive/My Drive/Colab Notebooks/DATASETS/AutomaticallyGeneratedCoreMasks/1011640934_foreground_mask.png
Saved margin mask to: /content/drive/My Drive/Colab Notebooks/DATASETS/AutomaticallyGeneratedMarginMasks/1011640934_margin_mask.png
Successfully processed 1011640934.png

Processing image 3/323
Processing image: /content/drive/My Drive/Colab Notebooks/DATASETS/PhotosWithObjects/Uncompressed/1033248940.png
Image size: (1500, 2670)
Saved mask to: /content/drive/My Drive/Colab Notebooks/DATASETS/AutomaticallyGener

# Finetuning #

In [None]:
!git clone https://github.com/lsunol/casalimpia.git

Cloning into 'casalimpia'...
remote: Enumerating objects: 13781, done.[K
remote: Counting objects: 100% (162/162), done.[K
remote: Compressing objects: 100% (122/122), done.[K
remote: Total 13781 (delta 73), reused 106 (delta 33), pack-reused 13619 (from 2)[K
Receiving objects: 100% (13781/13781), 2.39 GiB | 27.57 MiB/s, done.
Resolving deltas: 100% (298/298), done.


In [None]:
!pip install -r /content/casalimpia/back/requirements.txt

Collecting torchmetrics (from -r /content/casalimpia/back/requirements.txt (line 10))
  Downloading torchmetrics-1.6.3-py3-none-any.whl.metadata (20 kB)
Collecting lpips (from -r /content/casalimpia/back/requirements.txt (line 12))
  Downloading lpips-0.1.4-py3-none-any.whl.metadata (10 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch->-r /content/casalimpia/back/requirements.txt (line 1))
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch->-r /content/casalimpia/back/requirements.txt (line 1))
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch->-r /content/casalimpia/back/requirements.txt (line 1))
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch->-r /content/casalimpia

In [None]:
##upload code with changes here
##and change single mask dataset and mask if necessary
!pip install accelerate
!pip install prodigyopt

Collecting prodigyopt
  Downloading prodigyopt-1.1.2-py3-none-any.whl.metadata (4.8 kB)
Downloading prodigyopt-1.1.2-py3-none-any.whl (10 kB)
Installing collected packages: prodigyopt
Successfully installed prodigyopt-1.1.2


In [None]:
!sed -i 's|inputs_dir="./back/data/singleImageDataset/emptyRoom"|inputs_dir="./data/singleImageDataset/emptyRoom"|' /content/casalimpia/back/empty_rooms_dataset.py
!sed -i 's|masks_dir="./back/data/singleImageDataset/emptyMask"|masks_dir="./data/singleImageDataset/emptyMask"|' /content/casalimpia/back/empty_rooms_dataset.py

# Create a new cell and run this command to fix the paths
!sed -i 's|"./back/data/singleImageDataset/emptyRoom"|"./data/singleImageDataset/emptyRoom"|' /content/casalimpia/back/empty_rooms_dataset.py
!sed -i 's|"./back/data/singleImageDataset/emptyMask"|"./data/singleImageDataset/emptyMask"|' /content/casalimpia/back/empty_rooms_dataset.py

In [None]:
##without margin mask
from google.colab import drive
drive.mount('/content/drive')
%cd /content/casalimpia/back

!python /content/casalimpia/back/main.py \
--empty-rooms-dir '/content/drive/My Drive/DATASETS/EmptyRooms/unique_unwatermarked' \
--masks-dir '/content/drive/My Drive/DATASETS/AutomaticallyGeneratedCoreMasks' \
--output-dir '/content/casalimpia/back/data' \
--model 'stability-ai' \
--epochs 15 \
--batch-size 10 \
--initial-learning-rate 1e-3 \
--img-size 512 \
--lora-rank 16 \
--lora-alpha 8 \
--lora-dropout 0.1 \
--lora-target-modules to_k to_q to_v to_out.0 \
--img-size 512 \
--lr-scheduler 'cosine' \
--dtype 'float32' \
--save-latent-representations

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/casalimpia/back
2025-03-03 20:24:11.527872: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-03-03 20:24:11.546568: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1741033451.569177   20261 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1741033451.575959   20261 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been 

In [None]:
####with margin mask
from google.colab import drive
drive.mount('/content/drive')
%cd /content/casalimpia/back

!python /content/casalimpia/back/main.py \
--empty-rooms-dir '/content/drive/My Drive/DATASETS/EmptyRooms/unique_unwatermarked' \
--masks-dir '/content/drive/My Drive/DATASETS/AutomaticallyGeneratedCoreMasks' \
--margin-masks-dir='/content/drive/My Drive/DATASETS/AutomaticallyGeneratedMarginMasks' \
--output-dir '/content/casalimpia/back/data' \
--model 'stability-ai' \
--epochs 500 \
--batch-size 20 \
--initial-learning-rate 4e-4 \
--img-size 512 \
--lora-rank 16 \
--lora-alpha 8 \
--lora-dropout 0.1 \
--lora-target-modules to_q to_v to_out.0 \
--img-size 512 \
--dtype 'float32' \
--save-latent-representations

Mounted at /content/drive
/content/casalimpia/back
The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.
0it [00:00, ?it/s]
2025-02-28 12:04:29.118666: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-02-28 12:04:29.139763: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1740744269.162736    2652 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1740744

In [None]:
####with margin + gradient accumulation + accelerate
from google.colab import drive
drive.mount('/content/drive')
%cd /content/casalimpia/back

!python /content/casalimpia/back/main.py \
--empty-rooms-dir '/content/drive/My Drive/DATASETS/EmptyRooms/unique_unwatermarked' \
--masks-dir '/content/drive/My Drive/DATASETS/AutomaticallyGeneratedCoreMasks' \
--margin-masks-dir='/content/drive/My Drive/DATASETS/AutomaticallyGeneratedMarginMasks' \
--output-dir '/content/casalimpia/back/data' \
--model 'stability-ai' \
--epochs 10 \
--batch-size 10 \
--initial-learning-rate 4e-4 \
--gradient-accumulation-steps 5 \
--img-size 512 \
--lora-rank 16 \
--lora-alpha 8 \
--lora-target-modules to_k to_q to_v to_out.0 \
--lora-dropout 0.1 \
--dtype 'float32' \
--save-latent-representations


[1;30;43mSe han truncado las últimas 5000 líneas del flujo de salida.[0m
 40% 4/10 [00:04<00:06,  1.13s/it][A
 50% 5/10 [00:05<00:05,  1.08s/it][A
 60% 6/10 [00:06<00:04,  1.05s/it][A
 70% 7/10 [00:07<00:03,  1.03s/it][A
 80% 8/10 [00:08<00:02,  1.02s/it][A
 90% 9/10 [00:09<00:01,  1.01s/it][A
100% 10/10 [00:10<00:00,  1.09s/it]
2025-03-02 01:03:46 - INFO - Batch 160 PSNR: 16.23782730102539
2025-03-02 01:03:47 - INFO - Batch loss: 0.06493721157312393
Epoch 7:  18% 162/902 [04:54<1:20:53,  6.56s/it]2025-03-02 01:03:49 - INFO - Batch loss: 0.05156615003943443
Epoch 7:  18% 163/902 [04:55<1:01:34,  5.00s/it]2025-03-02 01:03:50 - INFO - Batch loss: 0.0577072873711586
Epoch 7:  18% 164/902 [04:56<48:03,  3.91s/it]  2025-03-02 01:03:51 - INFO - Batch loss: 0.033477868884801865
Epoch 7:  18% 165/902 [04:58<38:36,  3.14s/it]2025-03-02 01:03:53 - INFO - Batch loss: 0.0750812217593193
Epoch 7:  18% 166/902 [04:59<31:59,  2.61s/it]2025-03-02 01:03:54 - INFO - Batch loss: 0.059016712009906

In [None]:
####with margin + gradient accumulation + accelerate + prodigy
from google.colab import drive
drive.mount('/content/drive')
%cd /content/casalimpia/back

!python /content/casalimpia/back/main.py \
--empty-rooms-dir '/content/drive/My Drive/DATASETS/EmptyRooms/unique_unwatermarked' \
--masks-dir '/content/drive/My Drive/DATASETS/AutomaticallyGeneratedCoreMasks' \
--margin-masks-dir='/content/drive/My Drive/DATASETS/AutomaticallyGeneratedMarginMasks' \
--output-dir '/content/casalimpia/back/data' \
--model 'stability-ai' \
--epochs 15 \
--batch-size 15 \
--initial-learning-rate 1.0 \
--gradient-accumulation-steps 5 \
--img-size 512 \
--lora-rank 32\
--lora-alpha 64 \
--lora-target-modules to_k to_q to_v to_out.0 \
--lora-dropout 0.1 \
--dtype 'float32' \
--save-latent-representations

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/casalimpia/back
2025-03-18 09:08:17.459482: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1742288897.486328    5695 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1742288897.494975    5695 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-03-18 09:08:17.539464: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow 

In [None]:
from google.colab import files
import shutil

# Specify the full path to your folder
folder_path = '/content/casalimpia/back/data/lora_trains'

# Create a ZIP archive with the same name as the last folder in the path
folder_name = folder_path.split('/')[-1]  # This gets 'lora_trains'
zip_name = f'{folder_name}_backup'

# Create the ZIP archive from your folder
shutil.make_archive(zip_name, 'zip', folder_path)

# Initiate the download
files.download(f'{zip_name}.zip')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
import os
from google.colab import drive
import cv2
import numpy as np
from tqdm import tqdm
import re

def mount_drive():
    """Mount Google Drive."""
    drive.mount('/content/drive')

def extract_number_from_filename(filename):
    """Extract the number from the mask filename."""
    match = re.match(r'(\d+)_', filename)
    if match:
        return match.group(1)
    return None

def load_images_from_folder(folder_path):
    """Load all images from a folder."""
    images = []
    filenames = []
    for filename in sorted(os.listdir(folder_path)):
        if filename.lower().endswith('.png'):
            img_path = os.path.join(folder_path, filename)
            # Read PNG with alpha channel
            img = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)
            if img is not None:
                images.append(img)
                filenames.append(filename)
        elif filename.lower().endswith(('.jpg', '.jpeg')):
            img_path = os.path.join(folder_path, filename)
            img = cv2.imread(img_path)
            if img is not None:
                images.append(img)
                filenames.append(filename)
    return images, filenames

def merge_images(room, contour):
    """Overlay contour on room after resizing, using only visible contour pixels."""
    # Resize contour to match room dimensions
    contour_resized = cv2.resize(contour, (room.shape[1], room.shape[0]))

    # Create output image starting with room
    result = room.copy()

    if contour_resized.shape[2] == 4:  # If image has alpha channel
        # Create mask from alpha channel
        alpha = contour_resized[:, :, 3]
        # Only consider pixels with significant alpha values (visible pixels)
        visible_mask = alpha > 10  # Threshold can be adjusted

        # Copy the RGB channels where the mask is visible
        result[visible_mask] = contour_resized[:, :, :3][visible_mask]
    else:
        # If no alpha channel, use intensity to determine contour pixels
        gray = cv2.cvtColor(contour_resized, cv2.COLOR_BGR2GRAY)
        visible_mask = gray > 10  # Threshold can be adjusted
        result[visible_mask] = contour_resized[visible_mask]

    return result

def process_images(rooms_folder, masks_folder, output_folder):
    """Process all images in the folders."""
    # Create output folder if it doesn't exist
    os.makedirs(output_folder, exist_ok=True)

    # Load images
    rooms, room_filenames = load_images_from_folder(rooms_folder)
    masks, mask_filenames = load_images_from_folder(masks_folder)

    print(f"Found {len(rooms)} room images and {len(masks)} mask images")

    # Process each pair
    for i in tqdm(range(min(len(rooms), len(masks)))):
        room = rooms[i]
        contour = masks[i]
        mask_filename = mask_filenames[i]

        # Extract number from mask filename
        number = extract_number_from_filename(mask_filename)
        if number is None:
            print(f"Warning: Could not extract number from filename {mask_filename}")
            output_filename = f"merged_{i}.jpg"
        else:
            output_filename = f"{number}_merged.jpg"

        # Merge images
        merged = merge_images(room, contour)

        # Save result
        output_path = os.path.join(output_folder, output_filename)
        cv2.imwrite(output_path, merged)
        print(f"Saved: {output_filename}")

        # Also save the mask we're actually using (for verification)
        if contour.shape[2] == 4:
            mask_vis = (contour[:,:,3] > 10).astype(np.uint8) * 255
        else:
            mask_vis = cv2.cvtColor(contour, cv2.COLOR_BGR2GRAY)
        cv2.imwrite(os.path.join(output_folder, f"{number}_mask_used.jpg"), mask_vis)

def main():
    # Mount Google Drive
    mount_drive()

    # Define paths
    base_path = '/content/drive/MyDrive/DATASETS'
    rooms_folder = os.path.join(base_path, 'sampleEmptyRooms')
    masks_folder = os.path.join(base_path, 'sampleMarginMasks')
    output_folder = os.path.join(base_path, 'sampleMerged')

    # Process images
    process_images(rooms_folder, masks_folder, output_folder)
    print("Processing complete! Check the sampleMerged folder for results.")

if __name__ == "__main__":
    main()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Found 50 room images and 50 mask images


  4%|▍         | 2/50 [00:00<00:02, 17.96it/s]

Saved: 1299846779_merged.jpg
Saved: 1299846829_merged.jpg
Saved: 1299856041_merged.jpg
Saved: 1299873811_merged.jpg


 10%|█         | 5/50 [00:00<00:01, 23.52it/s]

Saved: 1299873850_merged.jpg


 16%|█▌        | 8/50 [00:00<00:01, 22.30it/s]

Saved: 1299873931_merged.jpg
Saved: 1299873954_merged.jpg
Saved: 1299874320_merged.jpg
Saved: 1299879829_merged.jpg
Saved: 1299913325_merged.jpg


 22%|██▏       | 11/50 [00:00<00:01, 21.21it/s]

Saved: 1299913368_merged.jpg
Saved: 1299913449_merged.jpg
Saved: 1299913469_merged.jpg


 28%|██▊       | 14/50 [00:00<00:01, 20.63it/s]

Saved: 1299913504_merged.jpg


 34%|███▍      | 17/50 [00:00<00:01, 20.27it/s]

Saved: 1299918059_merged.jpg
Saved: 1299939175_merged.jpg
Saved: 1299939179_merged.jpg
Saved: 1299945688_merged.jpg


 40%|████      | 20/50 [00:00<00:01, 20.30it/s]

Saved: 1299945690_merged.jpg
Saved: 1299949295_merged.jpg
Saved: 1299952656_merged.jpg
Saved: 1299953937_merged.jpg


 50%|█████     | 25/50 [00:01<00:01, 18.68it/s]

Saved: 1299953939_merged.jpg
Saved: 1299953974_merged.jpg
Saved: 1299953995_merged.jpg
Saved: 1299960017_merged.jpg
Saved: 1299961850_merged.jpg


 56%|█████▌    | 28/50 [00:01<00:01, 19.93it/s]

Saved: 1299963326_merged.jpg
Saved: 1299963327_merged.jpg
Saved: 1299963348_merged.jpg


 62%|██████▏   | 31/50 [00:01<00:00, 20.29it/s]

Saved: 1299963911_merged.jpg
Saved: 1299975329_merged.jpg


 68%|██████▊   | 34/50 [00:01<00:00, 20.46it/s]

Saved: 1299975353_merged.jpg
Saved: 1299980577_merged.jpg
Saved: 1299998591_merged.jpg


 74%|███████▍  | 37/50 [00:01<00:00, 21.39it/s]

Saved: 1299999270_merged.jpg
Saved: 1299999327_merged.jpg


 80%|████████  | 40/50 [00:01<00:00, 21.04it/s]

Saved: 1299999328_merged.jpg
Saved: 1299999428_merged.jpg
Saved: 1299999434_merged.jpg
Saved: 595043039_merged.jpg


 86%|████████▌ | 43/50 [00:02<00:00, 20.43it/s]

Saved: 700125260_merged.jpg
Saved: 773740891_merged.jpg
Saved: 773740909_merged.jpg
Saved: 773740910_merged.jpg


 96%|█████████▌| 48/50 [00:02<00:00, 17.65it/s]

Saved: 773810658_merged.jpg
Saved: 903254896_merged.jpg
Saved: 903254897_merged.jpg
Saved: 905212034_merged.jpg


100%|██████████| 50/50 [00:02<00:00, 19.77it/s]

Saved: 905212054_merged.jpg
Processing complete! Check the sampleMerged folder for results.





In [None]:
import shutil
from google.colab import files

# Define folder path and zip file name
folder_path = "/content/casalimpia/back/blending_visualizations"
zip_path = "/content/blending_visualizations.zip"

# Zip the folder
shutil.make_archive(zip_path.replace(".zip", ""), 'zip', folder_path)

# Download the zip file
files.download(zip_path)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>