In [None]:
import os
import sys
from argparse import ArgumentParser
from glob import glob
from loguru import logger
from time import time
from tqdm import tqdm
from yaml import load, FullLoader

import numpy as np
import rasterio as rio
from matplotlib import pyplot as plt
from skimage import color
from skimage.feature import hog, match_descriptors, plot_matches, SIFT
from sklearn import svm
from sklearn.metrics import classification_report,accuracy_score

from math import floor

In [None]:
sys.path.insert(1,'..')
import functions.fct_misc as misc

logger = misc.format_logger(logger)

## Functions

In [None]:
def im_list_to_hog(im_list, channel_axis=None):
    hog_images = {}
    hog_features = {}
    for name, image in im_list.items():
        ppc = floor(min(image.shape)/8)
        fd, hog_image = hog(image, orientations=8, pixels_per_cell=(ppc,ppc), cells_per_block=(4, 4), block_norm= 'L2', visualize=True, channel_axis=channel_axis)
        hog_images[name] = hog_image
        hog_features[name] = fd

    return hog_images, hog_features

## Processing

Argument and parameter specification

In [None]:
with open('../../config/config_symbol_classif.yaml') as fp:
    cfg = load(fp, Loader=FullLoader)['hog.py']

Load input parameters

In [None]:
WORKING_DIR = cfg['working_dir']
OUTPUT_DIR = cfg['output_dir']
TILE_DIR = cfg['tile_dir']

In [None]:
os.chdir(WORKING_DIR)
os.makedirs(OUTPUT_DIR, exist_ok=True)

In [None]:
logger.info('Read data...')
tile_list = glob(os.path.join(TILE_DIR, '*.tif'))

In [None]:
image_data = {}
for tile_path in tile_list:
    with rio.open(tile_path) as src:
        image_data[os.path.basename(tile_path)] = src.read().transpose(1, 2, 0)

In [None]:
image_dict = image_data
f, axarr = plt.subplots(3,3)
axarr[0,0].imshow(image_dict['0_2570184_1148461.tif'])
axarr[0,1].imshow(image_dict['4_2569767_1149331.tif'])
axarr[0,2].imshow(image_dict['1_2571614_1152259.tif'])
axarr[1,0].imshow(image_dict['5_2569300_1148157.tif'])
axarr[1,1].imshow(image_dict['0_2570190_1148492.tif'])
axarr[1,2].imshow(image_dict['10_2580845_1165703.tif'])
axarr[2,0].imshow(image_dict['4_2569484_1149035.tif'])
axarr[2,1].imshow(image_dict['5_2569282_1148151.tif'])
axarr[2,2].imshow(image_dict['6_2567727_1147671.tif'])

In [None]:
data_gray = {key: color.rgb2gray(i) for key, i in image_data.items()}

In [None]:
image_dict = data_gray
vmin = 0
vmax = 1
f, axarr = plt.subplots(3,3)
axarr[0,0].imshow(image_dict['0_2570184_1148461.tif'], vmin=0, vmax=vmax)
axarr[0,1].imshow(image_dict['4_2569767_1149331.tif'], vmin=0, vmax=vmax)
axarr[0,2].imshow(image_dict['1_2571614_1152259.tif'], vmin=0, vmax=vmax)
axarr[1,0].imshow(image_dict['5_2569300_1148157.tif'], vmin=0, vmax=vmax)
axarr[1,1].imshow(image_dict['0_2570190_1148492.tif'], vmin=0, vmax=vmax)
axarr[1,2].imshow(image_dict['10_2580845_1165703.tif'], vmin=0, vmax=vmax)
axarr[2,0].imshow(image_dict['4_2569484_1149035.tif'], vmin=0, vmax=vmax)
axarr[2,1].imshow(image_dict['5_2569282_1148151.tif'], vmin=0, vmax=vmax)
axarr[2,2].imshow(image_dict['6_2567727_1147671.tif'], vmin=0, vmax=vmax)

In [None]:
data_ratio = {key: np.divide(i[:,:, 2], i[:,:, 0], out=i[:,:, 2].astype(np.float64), where=i[:,:, 0]!=0) for key, i in image_data.items()}

In [None]:
norm_data_ratio = {key: (i-np.min(i))/(np.max(i)-np.min(i))*255 for key, i in data_ratio.items()}

In [None]:
image_dict = data_ratio
f, axarr = plt.subplots(3,3)
axarr[0,0].imshow(image_dict['0_2570184_1148461.tif'], vmin=0, vmax=image_dict['0_2570184_1148461.tif'].max())
axarr[0,1].imshow(image_dict['4_2569767_1149331.tif'], vmin=0, vmax=image_dict['4_2569767_1149331.tif'].max())
axarr[0,2].imshow(image_dict['1_2571614_1152259.tif'], vmin=0, vmax=image_dict['1_2571614_1152259.tif'].max())
axarr[1,0].imshow(image_dict['5_2569300_1148157.tif'], vmin=0, vmax=image_dict['5_2569300_1148157.tif'].max())
axarr[1,1].imshow(image_dict['0_2570190_1148492.tif'], vmin=0, vmax=image_dict['0_2570190_1148492.tif'].max())
axarr[1,2].imshow(image_dict['10_2580845_1165703.tif'], vmin=0, vmax=image_dict['10_2580845_1165703.tif'].max())
axarr[2,0].imshow(image_dict['4_2569484_1149035.tif'], vmin=0, vmax=image_dict['4_2569484_1149035.tif'].max())
axarr[2,1].imshow(image_dict['5_2569282_1148151.tif'], vmin=0, vmax=image_dict['5_2569282_1148151.tif'].max())
axarr[2,2].imshow(image_dict['6_2567727_1147671.tif'], vmin=0, vmax=image_dict['6_2567727_1147671.tif'].max())

In [None]:
image_dict = norm_data_ratio
v_max = 255
f, axarr = plt.subplots(3,3)
axarr[0,0].imshow(image_dict['0_2570184_1148461.tif'], vmin=0, vmax=v_max)
axarr[0,1].imshow(image_dict['4_2569767_1149331.tif'], vmin=0, vmax=v_max)
axarr[0,2].imshow(image_dict['1_2571614_1152259.tif'], vmin=0, vmax=v_max)
axarr[1,0].imshow(image_dict['5_2569300_1148157.tif'], vmin=0, vmax=v_max)
axarr[1,1].imshow(image_dict['0_2570190_1148492.tif'], vmin=0, vmax=v_max)
axarr[1,2].imshow(image_dict['10_2580845_1165703.tif'], vmin=0, vmax=v_max)
axarr[2,0].imshow(image_dict['4_2569484_1149035.tif'], vmin=0, vmax=v_max)
axarr[2,1].imshow(image_dict['5_2569282_1148151.tif'], vmin=0, vmax=v_max)
axarr[2,2].imshow(image_dict['6_2567727_1147671.tif'], vmin=0, vmax=v_max)

In [None]:
np.histogram(data_ratio['10_2580845_1165703.tif'], bins=25)

In [None]:
np.histogram(norm_data_ratio['10_2580845_1165703.tif'], bins=50)

In [None]:
np.unique(data_ratio['10_2580845_1165703.tif'])[-2]

In [None]:
image_dict = norm_data_ratio
v_max = 255
f, axarr = plt.subplots(3,3)
axarr[0,0].imshow(image_dict['0_2570184_1148461.tif'], vmin=0, vmax=v_max)
axarr[0,1].imshow(image_dict['4_2569767_1149331.tif'], vmin=0, vmax=v_max)
axarr[0,2].imshow(image_dict['1_2571614_1152259.tif'], vmin=0, vmax=v_max)
axarr[1,0].imshow(image_dict['5_2569300_1148157.tif'], vmin=0, vmax=v_max)
axarr[1,1].imshow(image_dict['0_2570190_1148492.tif'], vmin=0, vmax=v_max)
axarr[1,2].imshow(image_dict['10_2580845_1165703.tif'], vmin=0, vmax=5)
axarr[2,0].imshow(image_dict['4_2569484_1149035.tif'], vmin=0, vmax=v_max)
axarr[2,1].imshow(image_dict['5_2569282_1148151.tif'], vmin=0, vmax=v_max)
axarr[2,2].imshow(image_dict['6_2567727_1147671.tif'], vmin=0, vmax=v_max)

In [None]:
# Normalized based on the second highest value
second_norm_data = {key: np.divide((i-np.min(i)), (np.unique(i)[-2]-np.min(i)), out=np.ones_like(i), where=i<=np.unique(i)[-2])*255 for key, i in data_ratio.items()}

In [None]:
np.histogram(second_norm_data['10_2580845_1165703.tif'], bins=50)

In [None]:
image_dict = second_norm_data
v_max = 255
f, axarr = plt.subplots(3,3)
axarr[0,0].imshow(image_dict['0_2570184_1148461.tif'])
axarr[0,1].imshow(image_dict['4_2569767_1149331.tif'])
axarr[0,2].imshow(image_dict['1_2571614_1152259.tif'])
axarr[1,0].imshow(image_dict['5_2569300_1148157.tif'])
axarr[1,1].imshow(image_dict['0_2570190_1148492.tif'])
axarr[1,2].imshow(image_dict['10_2580845_1165703.tif'])
axarr[2,0].imshow(image_dict['4_2569484_1149035.tif'])
axarr[2,1].imshow(image_dict['5_2569282_1148151.tif'])
axarr[2,2].imshow(image_dict['6_2567727_1147671.tif'])

In [None]:
# Normalized all value between 0 and 2 to a range of 0 to 255
third_norm_data = {key: np.divide((i-np.min(i)), (2-np.min(i)), out=np.ones_like(i), where=i<2)*255 for key, i in data_ratio.items()}

In [None]:
np.histogram(third_norm_data['6_2567727_1147671.tif'], bins=50)

In [None]:
image_dict = third_norm_data
v_max = 255
f, axarr = plt.subplots(3,3)
axarr[0,0].imshow(image_dict['0_2570184_1148461.tif'], vmin=0, vmax=v_max)
axarr[0,1].imshow(image_dict['4_2569767_1149331.tif'], vmin=0, vmax=v_max)
axarr[0,2].imshow(image_dict['1_2571614_1152259.tif'], vmin=0, vmax=v_max)
axarr[1,0].imshow(image_dict['5_2569300_1148157.tif'], vmin=0, vmax=v_max)
axarr[1,1].imshow(image_dict['0_2570190_1148492.tif'], vmin=0, vmax=v_max)
axarr[1,2].imshow(image_dict['10_2580845_1165703.tif'], vmin=0, vmax=v_max)
axarr[2,0].imshow(image_dict['4_2569484_1149035.tif'], vmin=0, vmax=v_max)
axarr[2,1].imshow(image_dict['5_2569282_1148151.tif'], vmin=0, vmax=v_max)
axarr[2,2].imshow(image_dict['6_2567727_1147671.tif'], vmin=0, vmax=v_max)

### Hog on scaled data

In [None]:
hog_scaled_images, hog_scaled_features = im_list_to_hog(third_norm_data)

In [None]:
np.histogram(hog_scaled_images['6_2567727_1147671.tif'], bins=50)

In [None]:
image_dict = hog_scaled_images
v_max = 25
f, axarr = plt.subplots(3,3)
axarr[0,0].imshow(image_dict['0_2570184_1148461.tif'], vmin=0, vmax=v_max)
axarr[0,1].imshow(image_dict['4_2569767_1149331.tif'], vmin=0, vmax=v_max)
axarr[0,2].imshow(image_dict['1_2571614_1152259.tif'], vmin=0, vmax=v_max)
axarr[1,0].imshow(image_dict['5_2569300_1148157.tif'], vmin=0, vmax=v_max)
axarr[1,1].imshow(image_dict['0_2570190_1148492.tif'], vmin=0, vmax=v_max)
axarr[1,2].imshow(image_dict['10_2580845_1165703.tif'], vmin=0, vmax=v_max)
axarr[2,0].imshow(image_dict['4_2569484_1149035.tif'], vmin=0, vmax=v_max)
axarr[2,1].imshow(image_dict['5_2569282_1148151.tif'], vmin=0, vmax=v_max)
axarr[2,2].imshow(image_dict['6_2567727_1147671.tif'], vmin=0, vmax=v_max)

### HOG on grey images

In [None]:
hog_gray_images, hog_gray_features = im_list_to_hog(data_gray)

In [None]:
np.histogram(hog_gray_images['6_2567727_1147671.tif'], bins=50)

In [None]:
image_dict = hog_gray_images
v_max = 0.25
f, axarr = plt.subplots(3,3)
axarr[0,0].imshow(image_dict['0_2570184_1148461.tif'], vmin=0, vmax=v_max)
axarr[0,1].imshow(image_dict['4_2569767_1149331.tif'], vmin=0, vmax=v_max)
axarr[0,2].imshow(image_dict['1_2571614_1152259.tif'], vmin=0, vmax=v_max)
axarr[1,0].imshow(image_dict['5_2569300_1148157.tif'], vmin=0, vmax=v_max)
axarr[1,1].imshow(image_dict['0_2570190_1148492.tif'], vmin=0, vmax=v_max)
axarr[1,2].imshow(image_dict['10_2580845_1165703.tif'], vmin=0, vmax=v_max)
axarr[2,0].imshow(image_dict['4_2569484_1149035.tif'], vmin=0, vmax=v_max)
axarr[2,1].imshow(image_dict['5_2569282_1148151.tif'], vmin=0, vmax=v_max)
axarr[2,2].imshow(image_dict['6_2567727_1147671.tif'], vmin=0, vmax=v_max)

In [None]:
test_list = [len(ft) for ft in hog_gray_features.values()]
(min(test_list), max(test_list))

### Scale-invariant features on scale data

In [None]:
descriptor_extractor = SIFT(n_octaves=1, n_scales=1, n_bins=50)
keypoints = {}
descriptors = {}
for name, image in third_norm_data.items():
    try:
        descriptor_extractor.detect_and_extract(image)
    except RuntimeError as e:
        if 'SIFT found no features.' in str(e):
            continue
        else:
            raise
    keypoints[name] = descriptor_extractor.keypoints
    descriptors[name] = descriptor_extractor.descriptors

In [None]:
test_match_self = match_descriptors(descriptors['0_2570184_1148461.tif'], descriptors['0_2570184_1148461.tif'], max_ratio=0.8, cross_check=True)

In [None]:
test_match_010 = match_descriptors(descriptors['0_2570184_1148461.tif'], descriptors['10_2580845_1165703.tif'], max_ratio=0.99, cross_check=True)

In [None]:
test_match_05 = match_descriptors(descriptors['0_2570184_1148461.tif'], descriptors['5_2569300_1148157.tif'], max_ratio=0.99, cross_check=True)

In [None]:
test_match_04 = match_descriptors(descriptors['0_2570184_1148461.tif'], descriptors['4_2569484_1149035.tif'], max_ratio=0.99, cross_check=True)

In [None]:
test_match_1b = match_descriptors(descriptors['2_2571561_1150040.tif'], descriptors['2_2571713_1150165.tif'], max_ratio=0.99, cross_check=True)


In [None]:
test_match_110 = match_descriptors(descriptors['1_2571614_1152259.tif'], descriptors['10_2580845_1165703.tif'], max_ratio=0.8, cross_check=True)

In [None]:
fig, ax = plt.subplots(nrows=2, ncols=3, figsize=(8, 5))

plot_matches(ax[0, 0], third_norm_data['0_2570184_1148461.tif'], third_norm_data['0_2570184_1148461.tif'], keypoints['0_2570184_1148461.tif'], keypoints['0_2570184_1148461.tif'], test_match_self)
ax[0, 0].axis('off')
ax[0, 0].set_title("Blue marker vs. self\n" "(all keypoints and matches)")

plot_matches(ax[0, 1], third_norm_data['0_2570184_1148461.tif'], third_norm_data['10_2580845_1165703.tif'], keypoints['0_2570184_1148461.tif'], keypoints['10_2580845_1165703.tif'], test_match_010)
ax[0, 1].axis('off')
ax[0, 1].set_title("Blue marker vs. blue marker")

plot_matches(ax[0, 2], third_norm_data['2_2571561_1150040.tif'], third_norm_data['2_2571713_1150165.tif'], keypoints['2_2571561_1150040.tif'], keypoints['2_2571713_1150165.tif'], test_match_010)
ax[0, 2].axis('off')
ax[0, 2].set_title("Black marker vs. black marker\n" "from the same image")


plot_matches(ax[1, 0], third_norm_data['0_2570184_1148461.tif'], third_norm_data['5_2569300_1148157.tif'], keypoints['0_2570184_1148461.tif'], keypoints['5_2569300_1148157.tif'], test_match_05)
ax[1, 0].axis('off')
ax[1, 0].set_title("Blue marker vs. blue marker\n" " turned into a cross")

plot_matches(ax[1, 1], third_norm_data['0_2570184_1148461.tif'], third_norm_data['4_2569484_1149035.tif'], keypoints['0_2570184_1148461.tif'], keypoints['4_2569484_1149035.tif'], test_match_04)
ax[1, 1].axis('off')
ax[1, 1].set_title("Blue marker vs. blue cross")


plot_matches(ax[1, 2], third_norm_data['1_2571614_1152259.tif'], third_norm_data['10_2580845_1165703.tif'], keypoints['1_2571614_1152259.tif'], keypoints['10_2580845_1165703.tif'], test_match_010)
ax[1, 2].axis('off')
ax[1, 2].set_title("Blue marker vs. black marker")

plt.tight_layout()