# Histogram Submodule

The purpose of this notebook is to generate RGB histograms of images in a directory, compute the distances between all enumerated pairs, and return the results in an output directory.

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import cv2
import glob
import os


## Determine dataset path and output path


In [2]:
path = "Datasets/Images-20220614T161502Z-001/Images"
output_dir = "hist_res/"

os.makedirs(os.path.dirname(output_dir), exist_ok=True)

index = {}
images = {}
filenames = []


## Iterate through dataset

Add images to `images` and create their corresponding RGB histograms to add to `index`

In [8]:
for imagePath in glob.glob(path + "/*.jpg"):
    filename = imagePath[imagePath.rfind("\\") + 1:]
    filenames.append(filename)
    
    image = cv2.imread(imagePath)
    images[filename] = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    hist = cv2.calcHist([image], [0,1,2], None, [8,8,8], [0, 256, 0 , 256, 0, 256])
    hist = cv2.normalize(hist, hist).flatten()
    index[filename] = hist

[3.59001219e-01 4.92417037e-01 2.63277981e-02 6.04444067e-04
 6.28784110e-05 0.00000000e+00 0.00000000e+00 0.00000000e+00
 4.29398660e-03 2.11245090e-01 2.98418909e-01 3.57778147e-02
 2.02833576e-06 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 1.00199785e-03 2.25368384e-02 3.40172201e-02
 1.54762017e-03 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 3.04250370e-05 9.93884532e-05
 9.12751071e-04 1.01416790e-05 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 2.43400282e-05 2.839670

[5.90315700e-01 1.38888285e-01 7.70427613e-03 6.64336432e-04
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 1.98721979e-03 2.18232334e-01 1.84002370e-02 3.47365451e-04
 1.01314918e-05 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 5.53758396e-03 5.72081916e-02 3.65457381e-03
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 4.48680366e-05 4.00772877e-03
 2.02629835e-05 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 4.34206777e-06
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.000000

[7.30141461e-01 3.92812818e-01 3.89109589e-02 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 1.63158096e-04 6.27500042e-02 2.68130124e-01 6.69846265e-03
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 7.48431603e-06 1.55673770e-03 2.26325728e-03
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 1.04780429e-05 1.077741

[9.5879734e-01 2.1986030e-01 2.9519466e-03 5.9293047e-06 0.0000000e+00
 0.0000000e+00 0.0000000e+00 0.0000000e+00 1.4603030e-03 9.3573742e-02
 1.1310064e-01 2.6004235e-04 0.0000000e+00 0.0000000e+00 0.0000000e+00
 0.0000000e+00 0.0000000e+00 6.8610527e-05 9.9527615e-04 4.0149863e-04
 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 6.9457565e-05
 1.5755009e-04 3.4728782e-05 8.4704352e-06 0.0000000e+00 0.0000000e+00
 0.000

[9.78718400e-01 1.72131032e-01 4.48386651e-03 2.31550675e-05
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 4.41440148e-04 5.92127331e-02 4.58507687e-02 3.45085195e-04
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 3.73468829e-05 7.03615253e-04 1.26979403e-05
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 1.09052897e-04 4.698237


## Write to output

Using distance functions, determine distance between every pair of histograms, and write the output to file

In [4]:
DIST_METHOD = {
    "Correlation"  : cv2.HISTCMP_CORREL,
    "Intersection" : cv2.HISTCMP_INTERSECT,
    "Chi-Sq"       : cv2.HISTCMP_CHISQR,
    "Hellinger"    : cv2.HISTCMP_BHATTACHARYYA
}

for method in DIST_METHOD.keys():
    with open(output_dir + 'pair_dist_' + method + '.txt', 'w') as f:
        for i in range(len(filenames)):
            for j in range(i + 1, len(filenames)):
                d = cv2.compareHist(index[filenames[i]], index[filenames[j]], DIST_METHOD[method])

                f.write(filenames[i] + ' ' + filenames[j] + ' ' + '{}'.format(d) + '\n')