# Robotics, Vision & Control 3e: for Python
## Chapter 12: Image Feature Extraction

Copyright (c) 2021- Peter Corke

In [None]:
try:
    import google.colab
    print('Running on CoLab')
    !pip install matplotlib
    !pip install machinevision-toolbox-python
    COLAB = True
except:
    COLAB = False

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "last_expr_or_assign"

import numpy as np
import matplotlib.pyplot as plt
import math
from math import pi
np.set_printoptions(
    linewidth=120, formatter={
        'float': lambda x: f"{0:8.4g}" if abs(x) < 1e-10 else f"{x:8.4g}"})
np.random.seed(0)
from machinevisiontoolbox.base import *
from machinevisiontoolbox import *
from spatialmath.base import *
from spatialmath import *

# 12.1 Region Features
## 12.1.1 Pixel Classification
### 12.1.1.1 Monochrome Image Classification


In [None]:
castle = Image.Read("castle.png", dtype="float");

In [None]:
(castle >= 0.7).disp();

In [None]:
# castle.ithresh()


In [None]:
castle.hist().plot();

In [None]:
t = castle.otsu()

In [None]:
castle2 = Image.Read("castle2.png", dtype="float");

In [None]:
t = castle2.otsu()

In [None]:
castle2.threshold_adaptive(h=15).disp();

### 12.1.1.2 Color Image Classification


In [None]:
targets = Image.Read("yellowtargets.png", dtype="float", gamma="sRGB");
targets.disp();

In [None]:
garden = Image.Read("tomato_124.png", dtype="float", gamma="sRGB");
garden.disp();

In [None]:
ab = targets.colorspace("L*a*b*").plane("a*:b*")

In [None]:
ab.plane("b*:").disp();

In [None]:
targets_labels, targets_centroids, resid = ab.kmeans_color(k=2, seed=0)

In [None]:
targets_labels.disp(colormap="jet", colorbar=True);

In [None]:
targets_centroids

In [None]:
with plt.ioff():
    plot_chromaticity_diagram(colorspace="a*b*");
    plot_point(targets_centroids, marker="*", text="{}");

In [None]:
[color2name(c, "a*b*") for c in targets_centroids.T]

In [None]:
resid / ab.npixels

In [None]:
labels = ab.kmeans_color(centroids=targets_centroids)

In [None]:
objects = (labels == 0)

In [None]:
objects.disp();

In [None]:
ab = garden.colorspace("L*a*b*").plane("a*:b*")
garden_labels, garden_centroids, resid = ab.kmeans_color(k=3, seed=0);
garden_centroids

In [None]:
[color2name(c, "a*b*") for c in garden_centroids.T]

In [None]:
tomatoes = (garden_labels == 2);

In [None]:
data = np.random.rand(500, 2);  # 500 x 2D data points

In [None]:
from scipy.cluster.vq import kmeans2
centroids, labels = kmeans2(data, k=3)

In [None]:
for i in range(3):
  plot_point(data[labels==i, :].T, color="rgb"[i], marker=".", markersize=10)

In [None]:
tomatoes_binary = tomatoes.close(Kernel.Circle(radius=15));
tomatoes_binary.disp();

### 12.1.1.3 Semantic Classification


In [None]:
scene = Image.Read("image3.jpg")
scene.disp();

In [None]:
try:
    import torch
    import torchvision as tv
except ModuleNotFoundError:
        print("please install PyTorch:  pip install torch torchvision")

In [None]:
transform = tv.transforms.Compose([
   tv.transforms.ToTensor(),
   tv.transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                           std=[0.229, 0.224, 0.225])]);
in_tensor = transform(scene.image);

In [None]:
model = tv.models.segmentation.fcn_resnet50(pretrained=True).eval();
outputs = model(torch.stack([in_tensor]));

In [None]:
labels = Image(torch.argmax(outputs["out"].squeeze(), dim=0).detach().cpu().numpy());
labels.disp(colormap="viridis", ncolors=20, colorbar=True);

In [None]:
(labels == 15).disp();
scene.choose([255, 255, 255], labels != 15).disp();

## 12.1.2 Object Instance Representation
### 12.1.2.1 Creating Binary Blobs


In [None]:
sharks = Image.Read("sharks.png");
sharks.disp();

In [None]:
labels, m = sharks.labels_binary()
m

In [None]:
labels.disp(colorbar=True);

In [None]:
right_shark = (labels == 3);
right_shark.disp();

### 12.1.2.2 Maximally Stable Extremal Regions (MSER)


In [None]:
labels, m = castle2.labels_MSER()

In [None]:
m

In [None]:
labels.disp(colormap="viridis_r", ncolors=m);

### 12.1.2.3 Graph-Based Segmentation


In [None]:
grain = Image.Read("58060.png")
grain.disp();

In [None]:
labels, m = grain.labels_graphseg()
m

In [None]:
labels.disp(colormap="viridis_r", ncolors=m);

## 12.1.3 Object Instance Description
### 12.1.3.1 Area


In [None]:
right_shark.sum()

### 12.1.3.2 Bounding Boxes


In [None]:
u, v = right_shark.nonzero()

In [None]:
u.shape

In [None]:
umin = u.min()
umax = u.max()
vmin = v.min()
vmax = v.max()

In [None]:
right_shark.disp(block=None);  # display it again, it was a few cells back
plot_box(lrbt=[umin, umax, vmin, vmax], color="g");

### 12.1.3.3 Moments


In [None]:
m00 = right_shark.mpq(0, 0)

In [None]:
uc = right_shark.mpq(1, 0) / m00
vc = right_shark.mpq(0, 1) / m00

In [None]:
right_shark.disp(block=None);  # display it again, it was a few cells back
plot_point((uc, vc), ["bo", "bx"]);

In [None]:
u20 = right_shark.upq(2, 0); u02 = right_shark.upq(0, 2); u11 = right_shark.upq(1, 1);
J = np.array([[u20, u11], [u11, u02]])

In [None]:
right_shark.disp(block=None);  # display it again, it was a few cells back
plot_ellipse(4 * J  / m00, centre=(uc, vc), inverted=True, color="blue");

In [None]:
lmbda, x = np.linalg.eig(J)
lmbda

In [None]:
a = 2 * np.sqrt(lmbda.max() / m00)
b = 2 * np.sqrt(lmbda.min() / m00)

In [None]:
b / a

In [None]:
x

In [None]:
i = np.argmax(lmbda)  # get index of largest eigenvalue
v = x[:, i]

In [None]:
np.rad2deg(np.arctan2(v[1], v[0]))

### 12.1.3.4 Blob Descriptors


In [None]:
blobs = sharks.blobs();

In [None]:
blobs

In [None]:
len(blobs)

In [None]:
blobs[3]

In [None]:
blobs[3].area
blobs[3].umin
blobs[3].aspect
blobs[3].centroid

In [None]:
blobs[3].moments.m00   # moment p=q=0
blobs[3].moments.mu11  # central moment p=q=1
blobs[3].moments.nu03  # normalized central moment p=0, q=3

In [None]:
blobs.area

In [None]:
sharks.disp(block=None)
blobs[3].plot_box(color="red")
blobs[:2].plot_box(color="red")
blobs.plot_centroid(marker="+", color="blue")
blobs.plot_box(color="red")

In [None]:
sharks.roi(blobs[1].bbox).rotate(blobs[1].orientation).disp();

In [None]:
blobs[blobs.area > 10_000]

In [None]:
tomato_blobs = tomatoes_binary.blobs()

In [None]:
tomato_blobs.filter(area=(1_000, 5_000))

In [None]:
tomato_blobs.filter(touch=False)

In [None]:
tomato_blobs.filter(area=[1000, 5000], touch=False, color=1)

### 12.1.3.5 Blob Hieararchy


In [None]:
multiblobs = Image.Read("multiblobs.png");
multiblobs.disp();

In [None]:
labels, m = multiblobs.labels_binary()
m

In [None]:
blobs = multiblobs.blobs()

In [None]:
blobs[1].children


In [None]:
blobs[1].parent

In [None]:
blobs.label_image().disp();

In [None]:
blobs.dotfile(show=True);

### 12.1.3.6 Shape from Moments


In [None]:
blobs = sharks.blobs()

In [None]:
blobs.aspect

In [None]:
blobs.humoments()

### 12.1.3.7 Shape from Perimeter


In [None]:
blobs[1].perimeter[:, :5]

In [None]:
blobs[1].perimeter.shape

In [None]:
sharks.disp(block=None)
blobs[1].plot_perimeter(color="orange")

In [None]:
sharks.disp(block=None);
blobs.plot_perimeter(color="orange")
blobs.plot_centroid()

In [None]:
p = blobs[1].perimeter_length

In [None]:
blobs.circularity

In [None]:
p = Polygon2(blobs[1].perimeter).moment(0, 0)

In [None]:
r, th = blobs[1].polar();
plt.plot(r, "r", th, "b");

In [None]:
for blob in blobs:
  r, theta = blob.polar()
  plt.plot(r / r.sum());

In [None]:
similarity, _ = blobs.polarmatch(1)
similarity

## 12.1.4 Object Detection using Deep Learning


In [None]:
scene = Image.Read("image3.jpg")
scene.disp();

In [None]:
import torch
import torchvision as tv
transform = tv.transforms.ToTensor();
in_tensor = transform(scene.image);

In [None]:
model = tv.models.detection.fasterrcnn_resnet50_fpn(pretrained=True).eval();
outputs = model(torch.stack([in_tensor]));

In [None]:
scores = outputs[0]["scores"].detach().numpy(); # list of confidence scores
labels = outputs[0]["labels"].detach().numpy(); # list of class names as strings
boxes = outputs[0]["boxes"].detach().numpy();   # list of boxes as array([x1, y1, x2, y2])

In [None]:
len(scores)

In [None]:
scene.disp(block=None);
classname_dict = {1: "person", 2: "bicycle", 3: "car", 4: "motorcycle", 18: "dog"};
for score, label, box in zip(scores, labels, boxes):
  if score > 0.5:  # only confident detections
    plot_labelbox(classname_dict[label], lbrt=box, filled=True, alpha=0.3, 
                  color="yellow", linewidth=2);

## 12.1.5 Summary
# 12.2 Line Features


In [None]:
points5 = Image.Read("5points.png", dtype="float");

In [None]:
square = Image.Squares(number=1, size=256, fg=128).rotate(0.3)

In [None]:
edges = square.canny();

In [None]:
h = edges.Hough();

In [None]:
h.plot_accumulator()

In [None]:
plt.plot(h.votes);
plt.yscale("log");

In [None]:
lines = h.lines(60)

In [None]:
church = Image.Read("church.png", mono=True)
edges = church.canny()
h = edges.Hough();
lines = h.lines_p(100, minlinelength=200, maxlinegap=5, seed=0);

In [None]:
church.disp(block=None);
h.plot_lines(lines, "r--")

## 12.2.1 Summary
# 12.3 Point Features
## 12.3.1 Classical Corner Detectors


In [None]:
view1 = Image.Read("building2-1.png", mono=True);
view1.disp();

In [None]:
harris1 = view1.Harris(nfeat=500)

In [None]:
len(harris1)

In [None]:
harris1[0]

In [None]:
harris1[0].p
harris1[0].strength

In [None]:
harris1[:5].p
harris1[:5].strength

In [None]:
view1.disp(block=None, darken=True);
harris1.plot();

In [None]:
view1.disp(block=None, darken=True);
harris1[::5].plot()

In [None]:
view1.disp(block=None, darken=True);
harris1.subset(20).plot()

In [None]:
harris1 = view1.Harris(nfeat=500, scale=15)

In [None]:
view1.Harris_corner_strength().disp();

In [None]:
view2 = Image.Read("building2-2.png", mono=True);

In [None]:
harris2 = view2.Harris(nfeat=250);
view2.disp(block=None, darken=True);
harris2.plot();

## 12.3.2 Scale-Space Corner Detectors


In [None]:
foursquares = Image.Read("scale-space.png", dtype="float");

In [None]:
G, L, s = foursquares.scalespace(60, sigma=2); 

In [None]:
L[5].disp(colormap="signed");

In [None]:
s[5]

In [None]:
plt.plot(s[:-1], [-Ls.image[63, 63] for Ls in L]);

In [None]:
features = findpeaks3d(np.stack([np.abs(Lk.image) for Lk in L], axis=2), npeaks=4)

In [None]:
foursquares.disp(block=None);
for feature in features:
  plt.plot(feature[0], feature[1], 'k+')
  scale = s[int(feature[2])]
  plot_circle(radius=scale * np.sqrt(2), centre=feature[:2], color="y")

In [None]:
mona = Image.Read("monalisa.png", dtype="float");

In [None]:
G, L, _ = mona.scalespace(8, sigma=8);

In [None]:
Image.Hstack(G).disp();
Image.Hstack(L).disp();

### 12.3.2.1 Scale-Space Point Feature


In [None]:
sift1 = view1.SIFT(nfeat=200)

In [None]:
sift1[0]

Actually, those SIFT features are very small and associated with the leafs on the trees along the left-hand edge.  Let's select out the bigger and strong SIFT features.

In [None]:
sift1 = view1.SIFT().filter(percentstrength=50, minscale=5)

In [None]:
view1.disp(block=None, darken=True);
sift1.plot(filled=True, color="y", hand=True, alpha=0.3)

In [None]:
plt.hist(sift1.scale, bins=100);

# 12.4 Applications
## 12.4.1 Character Recognition


In [None]:
if COLAB:
    !sudo apt install tesseract-ocr
    !pip install pytesseract

try:
    import pytesseract as tess
except ModuleNotFoundError:
    print("please install pytesseract:\n * install tesseract binary, see https://tesseract-ocr.github.io/tessdoc/Installation.html\n * pip install pytesseract")
penguins = Image.Read("penguins.png");
ocr = tess.image_to_data(penguins.image < 100, output_type=tess.Output.DICT);

In [None]:
for confidence, text in zip(ocr["conf"], ocr["text"]):
  if text.strip() != "" and float(confidence) > 0:
    print(confidence, text)

In [None]:
penguins.disp(block=None)
for i, (text, confidence) in enumerate(zip(ocr["text"], ocr["conf"])):
  if text.replace(" ", "") != "" and float(confidence) > 50:
    plot_labelbox(text,
       lb=(ocr["left"][i], ocr["top"][i]), wh=(ocr["width"][i], ocr["height"][i]),
       color="y", filled=True, alpha=0.2)

## 12.4.2 Image Retrieval


In [None]:
images = ImageCollection("campus/*.png", mono=True);

In [None]:
features = [];
for image in images:
  features += image.SIFT()
features.sort(by="scale", inplace=True);

In [None]:
len(features)

In [None]:
features[:10].table()

In [None]:
supports = [];
for feature in features[:400]:
   supports.append(feature.support(images))
Image.Tile(supports, columns=20).disp(plain=True);

In [None]:
feature = features[108]

In [None]:
images[feature.id].disp(block=None);
feature.plot(filled=True, color="y", hand=True, alpha=0.5)

In [None]:
bag = BagOfWords(features, 2_000, seed=0)

In [None]:
w = bag.word(108)

In [None]:
bag.occurrence(w)

In [None]:
bag.contains(w)

In [None]:
bag.exemplars(w, images).disp();

In [None]:
word, freq = bag.wordfreq();

In [None]:
np.max(freq)
np.median(freq)

In [None]:
plt.bar(word, -np.sort(-freq), width=1);  # sort in descending order

In [None]:
bag = BagOfWords(features, 2_000, nstopwords=50, seed=0)

In [None]:
v10 = bag.wwfv(10);
v10.shape

In [None]:
sim_10 = bag.similarity(v10);

In [None]:
k = np.argsort(-sim_10)

In [None]:
query = ImageCollection("campus/holdout/*.png", mono=True);

In [None]:
S = bag.similarity(query);

In [None]:
Image(S).disp(colorbar=True);

In [None]:
np.argmax(S, axis=1)

In [None]:
bag.retrieve(query[0])
bag.retrieve(query[1])