# Image Segmentation
## Removing the rulers, measuring tapes and other background noise from the images using LangSAM

**This notebook contains code running locally or on Google Colab, as indicated in subsections**

Required for this notebook:
* Folder containing raw images

The images were preprocessed as follows:

### First segmentation round:
* Preclustered using image features extracted with same model, PCA with 256 principal components and KMeans using 40 clusters
* Selected 13 out of 40 clusters with noise (rulers, measuring tapes etc.)
* Ran LangSAM using "parts" prompt to leave only the part, applied the mask with the highest probability (logit) and put on white background
* Joined the segmented images with the remaining original images

### Second segmentation round:
* Clustered the segmented images, selected badly segmented clusters
* Segmented them again using "part" prompt using area of the boxes and chosing the box with smalles area
* Joined the segmented images with the remaining original images

In [None]:
# helper functions
import sys
sys.path.append("../../src")
from image_feature_extraction import dump_to_pickle_file, load_from_pickle_file

In [None]:
# Paths
image_folder = "/content/SyrusImage"
save_path = "/content/SyrusImageProcessed"
logs_pkl_file = "/content/logs.pkl"
logs_subset_pkl_file = "/content/logs15.pkl"
clustered_files_pkl_file = "/content/vgg16_original_imagesize_pca256_kmeans40_cluster_to_files.pkl"
processed_images_zip = "/content/SyrusImageProcessed.zip"
processed_images_subset = "/content/SyrusImageProcessed15"
processed_images_subset_zip = processed_images_subset + ".zip"

### 1. The following segmentation code runs on the local system but requires GPU with 7+gb of RAM preferably 12gb. (The langsam conda environment has the necessary packages and libraries). Selecting the mask with the highest logit value.

In [1]:
import numpy as np
from PIL import Image
from lang_sam import LangSAM
import os
from tqdm.auto import tqdm
import torch
print(torch.cuda.is_available())  # Should return True
print(torch.__version__)  # Note the PyTorch version
print(torch.version.cuda)  # Should match 12.0 or show compatible CUDA version

In [3]:
model = LangSAM()

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


final text_encoder_type: bert-base-uncased
Model loaded from /home/constructor_munich/.cache/huggingface/hub/models--ShilongLiu--GroundingDINO/snapshots/a94c9b567a2a374598f05c584e96798a170c56fb/groundingdino_swinb_cogcoor.pth 
 => _IncompatibleKeys(missing_keys=[], unexpected_keys=['label_enc.weight', 'bert.embeddings.position_ids'])


In [7]:
def segment(folder_path, save_path, text_prompt="parts", threshold=0.5):
    '''
    Segments images in a folder using the LangSAM() segmentation model. It selects the mask with the highest logit value.

    Parameters:
    - folder_path (str): Path to the folder containing images.
    - save_path (str): Path to save the segmented images.
    - text_prompt (str): Text prompt for segmentation model (default: "parts").
    - threshold (float): Threshold for selecting segmentation masks (default: 0.5).

    Returns:
    - None
    '''
    logs = {}
    # Sorting files in alphanumeric order
    files = sorted(os.listdir(image_folder),
                   key=lambda x: int(''.join(filter(str.isdigit, x))))[:100]
    for filename in tqdm(files):
        tqdm.write(f'Segmenting {filename}')
        if filename.endswith(('.jpg', '.jpeg')):
            image_path = os.path.join(folder_path, filename)
            try:
                # Open image and convert it to RGB if it's in other format
                image_pil = Image.open(image_path).convert("RGB")
                # Run prediction
                masks, _, _, logits = model.predict(image_pil, text_prompt)
                # get masks as a list of numpy arrays 
                masks_np = [mask.squeeze().cpu().numpy() for mask in masks]
                # Put the logits as numpy array to the logs dictionary
                logits_np = np.array(logits)
                logs[filename] = logits_np
                if(len(logits_np) > 0 and logits_np[np.argmax(logits_np)] > threshold ):
                    # select the mask with the highest logit
                    selected_mask = masks_np[np.argmax(logits_np)]
                    # make a plane white background image of same size as the original image
                    white_background = np.ones_like(image_pil) * 255
                    # image_np = np.array(image_pil)
                    # apply the mask and the white background on the original image
                    result_image_np = np.where(selected_mask[:, :, None], image_pil, white_background)
                    # convert the numpy image to PIL image
                    segmented_image_pil = Image.fromarray(result_image_np)
                    # save it to the file
                    segmented_image_pil.save(os.path.join(save_path, filename), format='JPEG', quality=90)
                else:
                    print(f"{filename}: no masks or too low logits, saving the original image")
                    image_pil.save(os.path.join(save_path, filename), format='JPEG', quality=90)
            except Exception as e:
                print(f"Error reading '{filename}': {str(e)}")
    # Save the logs
    dump_to_pickle_file(logs, logs_pkl_file)

In [9]:
segment(image_folder, save_path)

Segmenting Images..:   0%|          | 0/7578 [00:00<?, ?it/s]

Error reading 'A1000.jpeg': CUDA out of memory. Tried to allocate 1024.00 MiB (GPU 0; 6.00 GiB total capacity; 4.57 GiB already allocated; 0 bytes free; 4.77 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
Error reading 'A10000.jpeg': CUDA out of memory. Tried to allocate 1024.00 MiB (GPU 0; 6.00 GiB total capacity; 4.57 GiB already allocated; 0 bytes free; 4.74 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
Error reading 'A10001.jpeg': CUDA out of memory. Tried to allocate 1024.00 MiB (GPU 0; 6.00 GiB total capacity; 4.57 GiB already allocated; 0 bytes free; 4.75 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentati

KeyboardInterrupt: 

### 2. The following segmentation code runs on the Google Colab. Selecting the mask with the highest logit value.

In [None]:
!pip install torch torchvision
!pip install -U git+https://github.com/luca-medeiros/lang-segment-anything.git

In [None]:
import numpy as np
from PIL import Image
import os

In [None]:
!unzip /content/SyrusImage.zip

In [None]:
model = LangSAM()

In [None]:
def segment(image_folder="/content/SyrusImage", save_path="/content/SyrusImageProcessed",
            dump_path="/content/logs.pkl", text_prompt="parts", threshold=0.5):
    '''
    Segments images in a folder using the LangSAM() segmentation model. It selects the mask with the highest logit value.

    Parameters:
    - image_folder (str): Path to the folder containing images (default: "/content/SyrusImage").
    - save_path (str): Path to save the segmented images (default: "/content/SyrusImageProcessed").
    - dump_path (str): Path to save segmentation logs (default: "/content/logs.pkl").
    - text_prompt (str): Text prompt for segmentation model (default: "parts").
    - threshold (float): Threshold for selecting segmentation masks (default: 0.5).

    Returns:
    - None
    '''
    logs = {}
    # Sorting files in alphanumeric order
    files = os.listdir(image_folder)
    pbar = tqdm(total=len(files), desc='Segmenting')
    for filename in files:
        pbar.set_description(f'Segmenting {filename}')
        pbar.update(1)
        if filename.endswith(('.jpg', '.jpeg')):
            image_path = os.path.join(image_folder, filename)
            try:
                # Open image and convert it to RGB if it's in other format
                image_pil = Image.open(image_path).convert("RGB")
                # Run prediction
                masks, _, _, logits = model.predict(image_pil, text_prompt)
                # get masks as a list of numpy arrays
                masks_np = [mask.squeeze().cpu().numpy() for mask in masks]
                # Put the logits as numpy array to the logs dictionary
                logits_np = np.array(logits)
                logs[filename] = logits_np
                if(len(logits_np) > 0 and logits_np[np.argmax(logits_np)] > threshold ):
                    # select the mask with the highest logit
                    selected_mask = masks_np[np.argmax(logits_np)]
                    # make a plane white background image of same size as the original image
                    white_background = np.ones_like(image_pil) * 255
                    # image_np = np.array(image_pil)
                    # apply the mask and the white background on the original image
                    result_image_np = np.where(selected_mask[:, :, None], image_pil, white_background)
                    # convert the numpy image to PIL image
                    segmented_image_pil = Image.fromarray(result_image_np)
                    # save it to the file
                    segmented_image_pil.save(os.path.join(save_path, filename), format='JPEG', quality=90)
                else:
                    print(f"{filename}: no masks or too low logits, saving the original image")
                    image_pil.save(os.path.join(save_path, filename), format='JPEG', quality=90)
            except Exception as e:
                print(f"Error reading '{filename}': {str(e)}")
    # Save the logs
    dump_to_pickle_file(logs, dump_path)

In [None]:
# Run the LangSAM model only on the files from the Selected Clusters from the Clustering algorithm
clustered_files = load_from_pickle_file(clustered_files_pkl_file)
cluster_num_with_rulers_tapes =  [0, 3, 4, 6, 9, 13, 16, 19, 24, 26, 29, 32, 39]
print(len(cluster_num_with_rulers_tapes))
selected_files = []
for i in cluster_num_with_rulers_tapes:
    selected_files.extend(clustered_files[i])

selected_files

In [None]:
# Call the segment model, then zip and save the model
segment()
!zip -r /content/SyrusImageProcessed.zip /content/SyrusImageProcessed
files.download(processed_images_zip)
files.download(logs_pkl_file)

### 3. Yet another implementation of the 2. code above but with slight modifications

In [None]:
!pip install torch torchvision
!pip install -U git+https://github.com/luca-medeiros/lang-segment-anything.git

In [None]:
import numpy as np
from PIL import Image
import os
from tqdm.auto import tqdm
from google.colab import files
from  PIL  import  Image
from lang_sam import LangSAM

In [None]:
!unzip /content/SyrusImage.zip

In [None]:
model = LangSAM()

In [None]:
def check_image_format(selected_files):
    '''
    Checks if all selected files have valid image file extensions.

    Parameters:
    - selected_files (list): List of file names to be checked.

    Returns:
    - bool: True if all files have valid image extensions, False otherwise.
    '''
    for selected_file in selected_files:
        if not selected_file.endswith(('.jpeg', '.jpg')):
            return False

In [None]:
def segment(image_file_list, image_folder="/content/SyrusImage", save_path="/content/SyrusImageProcessed",
            dump_path="/content/logs.pkl", text_prompt="parts", threshold=0.5):
    '''
    Segments a list of images using a segmentation model. It selects the mask with the highest logit value.

    Parameters:
    - image_file_list (list): List of image filenames to be segmented.
    - image_folder (str): Path to the folder containing images (default: "/content/SyrusImage").
    - save_path (str): Path to save the segmented images (default: "/content/SyrusImageProcessed").
    - dump_path (str): Path to save segmentation logs (default: "/content/logs.pkl").
    - text_prompt (str): Text prompt for segmentation model (default: "parts").
    - threshold (float): Threshold for selecting segmentation masks (default: 0.5).

    Returns:
    - None
    '''
    if check_image_format(image_file_list):
        logs = {}
        pbar = tqdm(total=len(image_file_list), desc='Segmenting')
        for image_file in image_file_list:
            pbar.set_description(f'Segmenting {image_file}')
            pbar.update(1)
            image_path = os.path.join(image_folder, image_file)
            try:
                # Open image and convert it to RGB if it's in other format
                image_pil = Image.open(image_path).convert("RGB")
                # Run prediction
                masks, _, _, logits = model.predict(image_pil, text_prompt)
                # get masks as a list of numpy arrays
                masks_np = [mask.squeeze().cpu().numpy() for mask in masks]
                # Put the logits as numpy array to the logs dictionary
                logits_np = np.array(logits)
                logs[image_file] = logits_np
                if(len(logits_np) > 0 and logits_np[np.argmax(logits_np)] > threshold ):
                    # select the mask with the highest logit
                    selected_mask = masks_np[np.argmax(logits_np)]
                    # make a plane white background image of same size as the original image
                    white_background = np.ones_like(image_pil) * 255
                    # image_np = np.array(image_pil)
                    # apply the mask and the white background on the original image
                    result_image_np = np.where(selected_mask[:, :, None], image_pil, white_background)
                    # convert the numpy image to PIL image
                    segmented_image_pil = Image.fromarray(result_image_np)
                    # save it to the file
                    segmented_image_pil.save(os.path.join(save_path, image_file), format='JPEG', quality=90)
                else:
                    print(f"{image_file}: no masks or too low logits, saving the original image")
                    image_pil.save(os.path.join(save_path, image_file), format='JPEG', quality=90)
            except Exception as e:
                print(f"Error reading '{image_file}': {str(e)}")
    # Save the logs
    dump_to_pickle_file(logs, dump_path)

In [None]:
clustered_files = load_from_pickle_file(clustered_files_pkl_file)
cluster_num_with_rulers_tapes =  [0, 3, 4, 6, 9, 13, 16, 19, 24, 26, 29, 32, 39]
selected_files = []
for i in cluster_num_with_rulers_tapes:
    selected_files.extend(clustered_files[i])

In [None]:
segment()
!zip -r /content/SyrusImageProcessed.zip /content/SyrusImageProcessed
files.download(processed_images_zip)
files.download(logs_pkl_file)

### 4. The following segmentation code runs on the Google Colab. Selecting the mask whose box is of smaller area.

In [None]:
!pip install torch torchvision
!pip install -U git+https://github.com/luca-medeiros/lang-segment-anything.git

In [None]:
import numpy as np
from PIL import Image
import os
from tqdm.auto import tqdm
from google.colab import files
import shutil
from  PIL  import  Image
from lang_sam import LangSAM

In [None]:
!unzip /content/SyrusImageSelected15.zip

In [None]:
model = LangSAM()

In [None]:
def segment(image_folder="/content/SyrusImageSelected15", save_path="/content/SyrusImageProcessed15",
            dump_path="/content/logs15.pkl", text_prompt="parts"):
    '''
    Segments images in a folder using the LangSAM() segmentation model. It uses the boxes parameter returned by the LangSAM() model,
    calculates the area of the boxes if two boxes are present and selects the mask whose box is of smaller area.

    Parameters:
    - image_folder (str): Path to the folder containing images (default: "/content/SyrusImage").
    - save_path (str): Path to save the segmented images (default: "/content/SyrusImageProcessed").
    - dump_path (str): Path to save segmentation logs (default: "/content/logs.pkl").
    - text_prompt (str): Text prompt for segmentation model (default: "parts").

    Returns:
    - None
    '''
    logs = {}
    files = os.listdir(image_folder)
    pbar = tqdm(total=len(files), desc='Segmenting')
    for filename in files:
        pbar.set_description(f'Segmenting {filename}')
        pbar.update(1)
        if filename.endswith(('.jpg', '.jpeg')):
            image_path = os.path.join(image_folder, filename)
            try:
                # Open image and convert it to RGB if it's in other format
                image_pil = Image.open(image_path).convert("RGB")
                # Run prediction
                masks, boxes, labels, logits = model.predict(image_pil, text_prompt)
                # get masks as a list of numpy arrays
                masks_np = [mask.squeeze().cpu().numpy() for mask in masks]
                # Put the logits as numpy array to the logs dictionary
                logits_np = np.array(logits)
                logs[filename] = logits_np
                if(len(logits_np) > 0):
                    # select the right mask
                    if(len(logits_np) == 2):
                        boxes = np.array(boxes)
                        areas = [(x[2]-x[0])*(x[3]-x[1]) for x in boxes]
                        selected_mask = masks_np[np.argmin(areas)]
                    else:
                        selected_mask = masks_np[np.argmax(logits_np)]
                    # make a plane white background image of same size as the original image
                    white_background = np.ones_like(image_pil) * 255
                    # image_np = np.array(image_pil)
                    # apply the mask and the white background on the original image
                    result_image_np = np.where(selected_mask[:, :, None], image_pil, white_background)
                    # convert the numpy image to PIL image
                    segmented_image_pil = Image.fromarray(result_image_np)
                    # save it to the file
                    segmented_image_pil.save(os.path.join(save_path, filename), format='JPEG', quality=90)
                else:
                    print(f"{filename}: no masks or too low logits, saving the original image")
                    image_pil.save(os.path.join(save_path, filename), format='JPEG', quality=90)
            except Exception as e:
                print(f"Error reading '{filename}': {str(e)}")
    # Save the logs
    dump_to_pickle_file(logs, dump_path)

In [None]:
segment()
# Create a zip archive from a folder
shutil.make_archive(processed_images_subset, 'zip', processed_images_subset)
files.download(processed_images_subset)
files.download(logs_subset_pkl_file)

In [None]:
!gdown https://drive.usercontent.google.com/download?id=1ZDhKM30ovFmK3kIz6WOTtEVT7gRM0g3z&authuser=0