In [1]:
import os

from skimage import io
from skimage import color
from skimage import measure, morphology
from skimage import util

from scipy import signal

import matplotlib.pyplot as plt

In [None]:
def display_image(image, cmap='gray'):
    plt.imshow(image, cmap=cmap)
    plt.axis('off')
    plt.title("Shape: {}".format(image.shape))
    plt.show()
    
data_path = "C:/Users/Sadek/Documents/GitHub/Project/offline-signature/data"

image_path = os.path.join(data_path + "/input/doc1.jpg")
# read the input image
image = io.imread(image_path)
display_image(image)

In [None]:
# Convert the image to grayscale
image_gray = color.rgb2gray(image)
display_image(image_gray)

This code performs background subtraction on a 2D grayscale image, which is commonly used in image processing to enhance the contrast and improve the visibility of objects of interest.

In [None]:
# Estimate the 'background' color by a median filter
# Smoothing out the image and reducing noise, while preserving the edges and contours of objects. 
bg = signal.medfilt2d(image_gray, (11,11))

plt.subplot(1, 2, 1)
plt.imshow(image, cmap='gray')
plt.title('Original Image')

plt.subplot(1, 2, 2)
plt.imshow(bg, cmap='gray')
plt.title('Estimated Background')

plt.show()

Pixels in `image_gray` that are below the threshold are assigned a value of 1 in the binary mask, while pixels that are equal to or above the threshold are assigned a value of 0. This has the effect of highlighting the areas of the image that are significantly darker than their surrounding background, which often correspond to the objects of interest in the image.

The reason for subtracting **0.1** from the **bg** variable in the second line of code is to adjust the threshold for generating the binary mask. The **bg** variable represents the smoothed image, which can be thought of as an estimate of the background intensity levels in the image. By subtracting **0.1** from **bg**, we effectively **lower the threshold** for detecting foreground objects in the image.

In other words, pixels in the original image that are at least **0.1** units darker than the estimated background in the smoothed image will be considered part of the foreground in the resulting binary mask. This small adjustment to the threshold can help to ensure that the binary mask accurately captures the regions of the image that correspond to the objects of interest, while minimizing the inclusion of noise or other artifacts in the mask.

To some-up, the mask variable is generated using a simple thresholding operation based on the median filter of the grayscale input image.

In [None]:
mask = image_gray < bg - 0.01
display_image(mask)

In [None]:
# connected component analysis by scikit-learn framework
blobs = mask > mask.mean()
display_image(blobs)

The `mask` and `blobs` are both binary masks, but they represent different information in the image.

The `mask` is created by thresholding the original grayscale image and then applying a median filter to remove noise and smooth the image. The resulting `mask` binary image represents the regions of the original image that are above a certain intensity threshold, after smoothing out the noise.

The `blobs` binary mask is created by thresholding the `mask` image again, but this time using the mean intensity value of the mask image as the threshold. The resulting blobs binary `mask` represents the connected regions in the `mask` image that have intensities greater than the mean intensity value of the entire `mask` image. In other words, `blobs` identifies areas of the `mask` image that have a relatively high intensity compared to the rest of the image.

Therefore, while `mask` is a binary image that highlights regions of the original image that are above a certain threshold, `blobs` is a binary image that highlights connected regions of the mask image that have a relatively high intensity compared to the mean intensity of the entire `mask` image. `blobs` can be useful for identifying specific features of interest in the `mask` image, such as text or signatures, that have higher intensities than the surrounding areas.

The `mask` variable includes both foreground and background pixels, while the `blobs` variable includes only the most prominent foreground objects in the image.

In [None]:
blobs_labels = measure.label(blobs, background=0)
image_label_overlay = color.label2rgb(blobs_labels, image=image)

This line uses the Scikit-image library to label the connected components in the binary mask `blobs`. Specifically, the `measure.label()` function assigns a unique integer label to each connected component in `blobs` and returns a new 2D array `blobs_labels` with the same shape as `blobs`, where each pixel is assigned the integer label of its corresponding connected component.

The `background` argument in this function call indicates the label value assigned to the background pixels, which are those that are not part of any connected component. In this case, a value of 1 is used to indicate that background pixels should be labeled with the integer value 1.

In [None]:
fig, ax = plt.subplots(figsize=(10, 6))

# plot the connected components (for debugging)
display_image(image_label_overlay)

In [None]:
len(np.unique(blobs_labels, return_index=False))

In [None]:
the_biggest_component = 0
total_area = 0
counter = 0
average = 0.0

for region in measure.regionprops(blobs_labels):
    if (region.area > 10):
        total_area = total_area + region.area
        counter = counter + 1
    # take regions with large enough areas
    if (region.area >= 250):
        if (region.area > the_biggest_component):
            the_biggest_component = region.area

average = (total_area/counter)

print("the_biggest_component: " + str(the_biggest_component))
print("average: " + str(average))

In [None]:
# Get the properties of each labeled region, including its area and centroid
regions = measure.regionprops(blobs_labels)

# Find the largest connected component by area
largest_region = max(regions, key=lambda x: x.area)

# Extract the bounding box coordinates of the largest component
minr, minc, maxr, maxc = largest_region.bbox

# Display the original image with the largest component outlined in red
fig, ax = plt.subplots()
ax.imshow(image, cmap='gray')
ax.add_patch(plt.Rectangle((minc, minr), maxc - minc, maxr - minr,
                            fill=False, edgecolor='red', linewidth=2))
plt.show()

In [None]:
# Get the coordinates of the largest component's contour
contours = measure.find_contours(blobs_labels == largest_region.label, 0.5)

# Display the original image with the largest component's contour overlaid in red
fig, ax = plt.subplots()
ax.imshow(image, cmap='gray')
for contour in contours:
    ax.plot(contour[:, 1], contour[:, 0], linewidth=2, c='red')
plt.show()

In [None]:
# the parameters are used to remove small size connected pixels outliar 
constant_parameter_1 = 10
constant_parameter_2 = 100
constant_parameter_3 = 100

# the parameter is used to remove big size connected pixels outliar
constant_parameter_4 = 18

# a4_small_size_outliar_constant is used as a threshold value to remove connected outliar connected pixels
# are smaller than a4_small_size_outliar_constant for A4 size scanned documents
a4_small_size_outliar_constant = ((average/constant_parameter_1)*constant_parameter_2)+constant_parameter_3
print("a4_small_size_outliar_constant: " + str(a4_small_size_outliar_constant))

In [None]:
# a4_big_size_outliar_constant is used as a threshold value to remove outliar connected pixels
# are bigger than a4_big_size_outliar_constant for A4 size scanned documents
a4_big_size_outliar_constant = a4_small_size_outliar_constant*constant_parameter_4
print("a4_big_size_outliar_constant: " + str(a4_big_size_outliar_constant))

In [None]:
# remove the connected pixels are smaller than a4_small_size_outliar_constant
pre_version = morphology.remove_small_objects(blobs_labels, a4_small_size_outliar_constant)
display_image(pre_version)

Choosing a threshold of 0.5 means that any pixel value less than 0.5 is considered part of the background and any pixel value greater than or equal to 0.5 is considered part of the foreground.

In [None]:
threshold = 0.5
binary_image = util.img_as_bool(np.array(pre_version > threshold))
display_image(binary_image)

In [None]:
label_img = measure.label(binary_image)
regions = measure.regionprops(label_img, intensity_image=image)

In [None]:
import matplotlib.patches as mpatches

# Plot the original image
fig, ax = plt.subplots(figsize=(10,10))
ax.imshow(image, cmap='gray')

# Plot the contours of each component on the image
for region in regions:
    label = region.label
    coords = region.coords
    x = coords[:, 1]
    y = coords[:, 0]
    color = np.random.rand(3,)
    ax.plot(x, y, '.', color=color, markersize=1)
    ax.plot(x, y, color=color, linewidth=0.5)


# Add legend
handles = [mpatches.Patch(color=np.random.rand(3,), label=f'Component {i+1}') for i in range(len(regions))]
plt.legend(handles=handles, bbox_to_anchor=(1, 1), loc='upper left')

plt.axis('off')
plt.show()

In [None]:
# Find the bounding box of the signature
bb = regions[0].bbox
x1, y1, x2, y2 = bb

# Crop the image
target = image[x1:x2, y1:y2, :]

# Remove single-dimensional entries from the shape of the array
target = target.squeeze()

display_image(target)

In [None]:
target_mask = binary_image[x1:x2, y1:y2]
display_image(target_mask)

In [None]:
selection = np.ones_like(target) * 255
selection[target_mask == 1] = target[target_mask == 1]
plt.imshow(selection)

In [None]:
import plotly
import plotly.express as px
import plotly.graph_objects as go
from skimage.measure import label

binary_image = util.img_as_bool(np.array(pre_version > 0.5))
label_img = label(binary_image)
props = regionprops(label_img, intensity_image=image)

fig = px.imshow(image, binary_string=True)
fig.update_traces(hoverinfo='skip') # hover is only for label info

properties = ['area', 'eccentricity', 'perimeter', 'intensity_mean']

# For each label, add a filled scatter trace for its contour,
# and display the properties of the label in the hover of this trace.
for index in range(label_img.max()):
    label_i = props[index].label
    contour = measure.find_contours(label_img == label_i, 0.5)[0]
    y, x = contour.T
    hoverinfo = ''
    for prop_name in properties:
        prop_value = getattr(props[index], prop_name)
        if type(prop_value) == np.ndarray:
            prop_value = prop_value[0]
        hoverinfo += f'<b>{prop_name}: {prop_value:.2f}</b><br>'
    fig.add_trace(go.Scatter(
        x=x, y=y, name=label_i,
        mode='lines', fill='toself', showlegend=False,
        hovertemplate=hoverinfo, hoveron='points+fills'))

plotly.io.show(fig)

In [None]:
import plotly
import plotly.express as px
import plotly.graph_objects as go

fig = px.imshow(image, binary_string=True)
fig.update_traces(hoverinfo='skip') # hover is only for label info

properties = ['area', 'eccentricity', 'perimeter', 'intensity_mean']

# For each label, add a filled scatter trace for its contour,
# and display the properties of the label in the hover of this trace.
for index in range(labels.max()):
    label_i = props[index].label
    contour = measure.find_contours(labels == label_i, 0.5)[0]
    y, x = contour.T
    hoverinfo = ''
    for prop_name in properties:
        prop_value = getattr(props[index], prop_name)
        if type(prop_value) == np.ndarray:
            prop_value = prop_value[0]
        hoverinfo += f'<b>{prop_name}: {prop_value:.2f}</b><br>'
    fig.add_trace(go.Scatter(
        x=x, y=y, name=label_i,
        mode='lines', fill='toself', showlegend=False,
        hovertemplate=hoverinfo, hoveron='points+fills'))

plotly.io.show(fig)

In [None]:
threshold = 0.5
binary_image = util.img_as_bool(np.array(pre_version > threshold))
label_img = label(binary_image)
regions = regionprops(label_img, intensity_image=image)

import plotly
import plotly.express as px
import plotly.graph_objects as go

fig = px.imshow(image, binary_string=True)
fig.update_traces(hoverinfo='skip')

properties = ['area', 'eccentricity', 'perimeter', 'mean_intensity']

for region in regions:
    label_i = region.label
    contour = measure.find_contours(label_img == label_i, 0.5)[0]
    y, x = contour.T
    hoverinfo = ''
    for prop_name in properties:
        prop_value = getattr(region, prop_name)
        if type(prop_value) == np.ndarray:
            prop_value = prop_value[0]
        hoverinfo += f'<b>{prop_name}: {prop_value:.2f}</b><br>'
    fig.add_trace(go.Scatter(
        x=x, y=y, name=label_i,
        mode='lines', fill='toself', showlegend=False,
        hovertemplate=hoverinfo, hoveron='points+fills'))

plotly.io.show(fig)


In [None]:
regions

In [None]:
blobs_labels = measure.label(blobs, background=1)
image_label_overlay = color.label2rgb(blobs_labels, image=blobs)

fig, ax = plt.subplots(figsize=(10, 6))

# plot the connected components (for debugging)
ax.imshow(image_label_overlay)
ax.set_axis_off()
plt.tight_layout()
plt.show()

In [None]:
import matplotlib.pyplot as plt
from skimage.measure import label, regionprops
from skimage.segmentation import mark_boundaries

# Label the connected components
labels = label(blobs)

# Extract properties of the components
props = regionprops(labels)

# Display the original image with outlines around the components
fig, ax = plt.subplots()
ax.imshow(image_gray, cmap='gray')
for prop in props:
    minr, minc, maxr, maxc = prop.bbox
    rect = plt.Rectangle((minc, minr), maxc - minc, maxr - minr, fill=False, edgecolor='red', linewidth=2)
    ax.add_patch(rect)
plt.show()