# Brainstation Capstone: Image Segmentation

##### This notebook utilizes Meta's Segment Anything Model (SAM) to segment concrete structure to pass onto Crack_Detection model


### Before Starting

In [2]:
# Check if GPU is available
!nvidia-smi

Tue May  9 13:33:47 2023       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 531.14                 Driver Version: 531.14       CUDA Version: 12.1     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                      TCC/WDDM | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf            Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GeForce RTX 3060 L...  WDDM | 00000000:01:00.0  On |                  N/A |
| N/A   55C    P8               19W /  N/A|    882MiB /  6144MiB |     90%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

To make it easier to manage datasets, images, and models, we create a <span style="background-color: black; color: white">home</span> directory



In [3]:
import os
HOME = os.getcwd()
print("Home:", HOME)

Home: \\sepehrnas\Thick Volume\CAREER\SEPEHR\EDUCATION\Brainstation\Data Science\Deliverables\Capstone\segment_anything


## Install Meta's Segment Anything Model (SAM) and other dependencies

In [4]:
%cd (HOME)

import sys
import pkg_resources

# Check if package exists
if 'segment-anything' not in {pkg.key for pkg in pkg_resources.working_set}:
    !{sys.executable} -m pip install git+https://github.com/facebookresearch/segment-anything.git
else:
    print('Package already installed')

[WinError 2] The system cannot find the file specified: '(HOME)'
\\sepehrnas\Thick Volume\CAREER\SEPEHR\EDUCATION\Brainstation\Data Science\Deliverables\Capstone\segment_anything
Package already installed


In [5]:
import importlib

# jupyter bbox widget - allows users to interactively draw bounding boxes around objects in an image
if importlib.util.find_spec('jupyter_bbox_widget') is None:
    # Install package
    !pip install -q jupyter_bbox_widget
else:
   print('jupyter_bbox_widget is already installed')

# roboflow - upload and preprocess own datasets, visualize and label images, and generate ML models
if importlib.util.find_spec('roboflow') is None:
    # Install package
    !pip install -q roboflow
else:
   print('roboflow is already installed')    

if importlib.util.find_spec('supervision') is None:
    # Install package
    !pip install -q supervision
else:
   print('supervision is already installed')    
   
# dataclass-json - allows user to convert between JSON and Python objects
# try:
#     import dataclass_json # if exists, will not download. Download takes a VERY long time
# except ImportError:
#     !pip install git+https://github.com/lidatong/dataclass-json.git

jupyter_bbox_widget is already installed
roboflow is already installed
supervision is already installed


## Download SAM weights

In [6]:
import os

# Check if directory exists, create if necessary
if not os.path.exists(f"{HOME}/models"):
    os.mkdir(f"{HOME}/models")

# Change directory to models folder
%cd {HOME}/models

# Check if file already exists
if not os.path.exists("sam_vit_h_4b8939.pth"):
    # Download file
    !wget -q https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth
else:
    print("File already exists, no need to download.")

\\sepehrnas\Thick Volume\CAREER\SEPEHR\EDUCATION\Brainstation\Data Science\Deliverables\Capstone\segment_anything\models
File already exists, no need to download.


In [7]:
# Sanity check: confirming file exists

CHECKPOINT_PATH = os.path.join(HOME, 'models', 'sam_vit_h_4b8939.pth')
print(CHECKPOINT_PATH, "; exist: \n", os.path.isfile(CHECKPOINT_PATH))

\\sepehrnas\Thick Volume\CAREER\SEPEHR\EDUCATION\Brainstation\Data Science\Deliverables\Capstone\segment_anything\models\sam_vit_h_4b8939.pth ; exist: 
 True


## Load Data

In [8]:
# os.chdir() if .py file
%cd {HOME}

# !mkdir = os.mkdir() if.py file
!mkdir {HOME}/data

%cd {HOME}/data

\\sepehrnas\Thick Volume\CAREER\SEPEHR\EDUCATION\Brainstation\Data Science\Deliverables\Capstone\segment_anything


The syntax of the command is incorrect.


\\sepehrnas\Thick Volume\CAREER\SEPEHR\EDUCATION\Brainstation\Data Science\Deliverables\Capstone\segment_anything\data


In [9]:
# ensure file type is correct, for example .jpg vs .jpeg

IMAGE_NAME = 'image001.jpg'
IMAGE_PATH = os.path.join(HOME, "Data", 'Drone Shots', IMAGE_NAME)
print(f'File path is: \n{IMAGE_PATH}')

File path is: 
\\sepehrnas\Thick Volume\CAREER\SEPEHR\EDUCATION\Brainstation\Data Science\Deliverables\Capstone\segment_anything\Data\Drone Shots\image001.jpg


### Load Model & Image(s)

In [10]:
import matplotlib.pyplot as plt
import cv2
import supervision as sv
import numpy as np
import torch

In [11]:
# DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else "cpu")
DEVICE = "cpu"
MODEL_TYPE = 'vit_h'

cuda_status = torch.cuda.is_available() # check if CUDA is available
print(f'Cuda Status: {cuda_status}')

Cuda Status: True


In [12]:
# from Meta's segment_anything:

# model SamPredictor
from segment_anything import SamPredictor

# for selected mask generation
from segment_anything import sam_model_registry

# for automatic mask generation
from segment_anything import SamAutomaticMaskGenerator

# Model type parameter, location, and cpu/gpu selection
sam = sam_model_registry[MODEL_TYPE](checkpoint = CHECKPOINT_PATH).to(device=DEVICE)

In [13]:
# Read image
image_bgr = cv2.imread(IMAGE_PATH)

# Resize image to 1920x1080
image_bgr = cv2.resize(image_bgr, (1920,1080))

#convert from BGR to RGB
image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB) 

# copy for non destructive
img = image_bgr.copy()

# Generate Segmenation with Mouse Clicks

SamPredictor class provides an easy interface for user to set an image using the set_image method. Input is both point (and if needed can be tweaked to have box) prompts

#### Instructions: 
 1) Once image is loaded, select multiple points with left click for foreground to be included in mask
 2) And select background points with right click to be excluded from mask
 3) Once desired points are selected, press any keyboard key to continue
 4) Wait until mask is generated
 5) Once mask is generated, press any keyboard key to finish. If mask is acceptable, continue to next cell to save masked image, otherwise redo this cell

In [14]:

# Function to record user mouse click coordinates
def onMouse(event, x, y, flags, param):
   
   #make input_label and input_coordinates global to access after function callback
   global input_label
   global input_coordinates
        
   # foreground selection with left click     
   if event == cv2.EVENT_LBUTTONDOWN:
   
      # this is a foreground selection, save new coordinates
      new_label = np.array([1])
      
      # new coordinates saved   
      new_coordinates = np.array([[x, y]]) 
      
      # if no coordinates have been selected, save coordinates
      if(len(input_coordinates)==0):
         input_label = new_label
         input_coordinates = new_coordinates
         print(f'Foreground Coordinates Saved: {x,y}')
      
      # if coordinates have already been selected, append new coordinates to a new row
      else:
         input_label = np.append(input_label, new_label, axis = 0)
         input_coordinates = np.append(input_coordinates,new_coordinates, axis = 0)
         print(f'Foreground Coordinates Appended: {x,y}')
         
   # background selection with right click
   elif event == cv2.EVENT_RBUTTONDOWN:
      
      # this is a background selection
      new_label = np.array([0])
      
      # new coordinates saved   
      new_coordinates = np.array([[x, y]]) 
      
      # if no coordinates have been selected, save coordinates
      if(len(input_coordinates)==0):
         input_label = new_label
         input_coordinates = new_coordinates
         print(f'Background Coordinates Saved: {x,y}')
      
      # if coordinates have already been selected, append new coordinates to a new row
      else:
         input_label = np.append(input_label, new_label, axis = 0)
         input_coordinates = np.append(input_coordinates,new_coordinates, axis = 0 )
         print(f'Background Coordinates Appended: {x,y}')
         
# create a named window, always on top, and display the image
cv2.namedWindow('Image', cv2.WINDOW_NORMAL)
cv2.setWindowProperty('Image', cv2.WND_PROP_TOPMOST, 1)
cv2.imshow('Image', image_rgb)

# Initialize coordinate array for mouse callback, and input label, then use onMouse
input_coordinates = []
input_label = []
cv2.setMouseCallback('Image', onMouse) 

# press a button to exit
cv2.waitKey(0)
cv2.destroyWindow('Image')
print('\nPlease wait... Generating Masks\n')

# Save input coordinates and labels into a numpy array
input_coordinates = np.array(input_coordinates)
input_label = np.array(input_label)

# Select mask predictor model
mask_predictor = SamPredictor(sam)

# set image for mask predictor
mask_predictor.set_image(image_rgb)

# Use the mask predictor to generate mask
masks, scores, logits = mask_predictor.predict(
   point_coords=input_coordinates,
   point_labels=input_label,
   multimask_output=False,
)

# loop in case multimask output is set to True. However, output is only last mask, needs modification
for mask in masks:
   # Convert mask to binary
   mask = np.where(masks == True, 0, 255).astype('uint8')

   # resize mask to original image size
   mask = np.reshape(mask,(img.shape[0], img.shape[1]))
      
   # Mask applied to R, G, and B Channels
   mask = np.dstack([np.array(mask, dtype=np.uint8)]*3)

   # Mask weighted at 100%, and image at 0% to get black and white
   mask = cv2.addWeighted(mask,1,img,0,0)

   # Window Name
   window_name = 'Masked Image'

   # create a named window for the image display
   cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)

   # Set the windows to be always on top
   cv2.setWindowProperty(window_name, cv2.WND_PROP_TOPMOST, 1)

   # Display the image
   cv2.imshow(window_name, mask)

   # Press button to exit
   cv2.waitKey(0)
   cv2.destroyWindow(window_name)

Foreground Coordinates Saved: (128, 410)
Foreground Coordinates Appended: (136, 474)
Foreground Coordinates Appended: (153, 435)
Foreground Coordinates Appended: (98, 584)
Foreground Coordinates Appended: (61, 652)
Foreground Coordinates Appended: (23, 513)
Foreground Coordinates Appended: (18, 407)
Foreground Coordinates Appended: (755, 174)
Foreground Coordinates Appended: (469, 108)
Foreground Coordinates Appended: (264, 164)
Foreground Coordinates Appended: (204, 485)
Foreground Coordinates Appended: (66, 776)
Foreground Coordinates Appended: (293, 880)
Foreground Coordinates Appended: (688, 887)
Foreground Coordinates Appended: (835, 824)
Foreground Coordinates Appended: (743, 502)
Foreground Coordinates Appended: (470, 500)
Foreground Coordinates Appended: (306, 942)
Foreground Coordinates Appended: (97, 956)
Foreground Coordinates Appended: (775, 985)
Foreground Coordinates Appended: (989, 905)
Foreground Coordinates Appended: (939, 751)
Foreground Coordinates Appended: (919, 57

In [15]:
# Create folder if it doesn't exist
if not os.path.exists('Drone Shots/Supervised Masks'):
    os.makedirs('Drone Shots/Supervised Masks')

# Save the mask with the same name as the original image
IMAGE_NAME_WITHOUT_EXTENSION = os.path.splitext(IMAGE_NAME)[0]
mask_name = IMAGE_NAME_WITHOUT_EXTENSION + "_mask.png"
mask_path = os.path.join("Drone Shots/Supervised Masks", mask_name)
cv2.imwrite(mask_path, mask)

# To be used in next python script
mask_np_name = IMAGE_NAME_WITHOUT_EXTENSION + "_mask.npy"
mask_np_path = os.path.join("Drone Shots/Supervised Masks", mask_np_name)
np.save(mask_np_path, mask)

print(f'Success!\nMask saved to: {os.path.join(HOME, mask_path)}')

Success!
Mask saved to: \\sepehrnas\Thick Volume\CAREER\SEPEHR\EDUCATION\Brainstation\Data Science\Deliverables\Capstone\segment_anything\Drone Shots/Supervised Masks\image001_mask.png


# Automated Mask Generation

Uses a SAM model to the SamAutomaticGenerator class. Path is based on 'CHECKPOINT_PATH'. Running on CUDA is recommended.

In [None]:
mask_generator = SamAutomaticMaskGenerator(sam)

## Generate masks with SAM

In [None]:
sam_result = mask_generator.generate(image_rgb)

## Output Format

SamAutomaticGenerator returns a list of masks, where each mask is a dict containing various information about the mask:

- Segmentation - [np.ndarray] - the mask with (W,H) shape, and bool type

- area - [int] - the area of the mask in pixels

- bbox - [List[int]] - the boundary box of the mask in xywh format

- predicted_iou - [float] - the models' own prediction for the quality of the mask

- point_coords - [List[List[float]]] - the sampled input point that generated this mask

- stability_score - [float] - an additional measure of mask quality

- crop_box - List[int] - the crop of the image used to generate this mask in xywh format

In [None]:
print(sam_result[0].keys()) # first mask on the list

## Results Visualization with Supervision

In [None]:
mask_annotator = sv.MaskAnnotator()

detections = sv.Detections.from_sam(sam_result=sam_result)

annotated_image = mask_annotator.annotate(scene =image_bgr.copy(), detections = detections)

sv.plot_images_grid(
   images = [image_bgr, annotated_image],
   grid_size = (1,2),
   titles = ['Source Image', 'Segmented Image']
)

In [None]:
masks = [
   mask['segmentation']
   for mask
   in sorted(sam_result, key = lambda x: x['area'], reverse = True)
]

num_masks = len(masks)
rows = int(num_masks / 8) + (1 if num_masks % 8 > 0 else 0)  # calculate number of rows needed
grid_size = (8, rows)
size = (16, 16)

sv.plot_images_grid(images=masks, grid_size=grid_size, size=size)

In [None]:
# which mask chosen
chosen_mask = 0

In [None]:
mask = masks[chosen_mask]

# Invert the mask and convert it to uint8
mask = np.where(mask, 0, 255).astype('uint8')

# Create folder if it doesn't exist
if not os.path.exists('Drone Shots/Automatic Masks'):
    os.makedirs('Drone Shots/Automatic Masks')

# Save the mask with the same name as the original image
IMAGE_NAME_WITHOUT_EXTENSION = os.path.splitext(IMAGE_NAME)[0]
mask_name = IMAGE_NAME_WITHOUT_EXTENSION + "_mask.png"
mask_path = os.path.join("Drone Shots/Automatic Masks", mask_name)
cv2.imwrite(mask_path, mask)

# Display the segmented image
mask_annotator = sv.MaskAnnotator()
detections = sv.Detections.from_sam(sam_result=sam_result)
annotated_image = mask_annotator.annotate(scene=image_bgr.copy(), detections=detections)
sv.plot_images_grid(images=[image_bgr, annotated_image], grid_size=(1,2), titles=['Source Image', 'Segmented Image'])