# Preparation

## Import Used Libraries

In [411]:
import matplotlib.pyplot as plt
import matplotlib.image as img
import os
import math
import json

## Read Image

In [412]:
def readimage(path):
    return img.imread(path)

def showimage(image):
    plt.imshow(image)

## RGB to Hue Conversion

In [413]:
def RGB2Hue(r, g, b):
    r = r / 255
    g = g / 255
    b = b / 255
    
    maxc = max(r, g, b)
    minc = min(r, g, b)
    rang = maxc - minc
    hue = 0
    
    if rang == 0:
        return 0
    
    if maxc == r:
        hue = (g-b)/rang
    elif maxc == g:
        hue = 2.0 + (b-r)/rang
    else:
        hue = 4.0 + (r-g)/rang
    
    return (round(hue * 60) + 360) % 360

## Calculate Histogram

In [414]:
def histogram(image):
    R = [0] * 256
    G = [0] * 256
    B = [0] * 256
    H = [0] * 360
    
    unit = 1.0 / (len(image) * len(image[0]))
    
    for row in image:
        for pixel in row:
            r, g, b = pixel
            h = RGB2Hue(r, g, b)
            R[r] += unit
            G[g] += unit
            B[b] += unit
            H[h] += unit
        
    return (R, G, B, H)

## Calculate Distances   

In [415]:
def eucdist(vecA, vecB):
    pairs = list(zip(vecA, vecB))
    total = sum((a-b)**2 for a, b in pairs)
    return math.sqrt(total)

def histdist(histsA, histsB):
    histpairs = list(zip(histsA, histsB))
    histdists = list(map(lambda p: eucdist(*p), histpairs))
    return histdists

def imagedist(histsA, histsB):
    histdists = histdist(histsA, histsB)
    rgbdist = eucdist([0,0,0], histdists[:3]) / math.sqrt(3) # max distance
    huedist = histdists[-1]
    return (rgbdist, huedist)

def similarity(histsA, histsB):
    rgbdist, huedist = imagedist(histsA, histsB)
    return (1 - rgbdist, 1 - huedist)

## Test

In [416]:
# img1 = readimage('../images/elephant/image_0001.jpg')
# img2 = readimage('../images/elephant/image_0002.jpg')

# hists1 = histogram(img1)
# hists2 = histogram(img2)

# rgbdist, huedist = imagedist(hists1, hists2)

# print(rgbdist)
# print(huedist)

## Collect Histograms of Train Images

In [417]:
def getfiles(dirpath):
    files = next(os.walk(dirpath), (None, None, []))[2]
    files = [file for file in files if file[0] != '.']
    files.sort()
    return files

def train():
    size = 20
    root = '../images'
    folders = ['elephant', 'flamingo', 'kangaroo', 'leopards', 'octopus', 'seahorse']
    data = []
    
    for folder in folders:
        dirpath = os.path.join(root, folder)
        files = getfiles(dirpath)
        
        for file in files[:size]:
            abspath = os.path.join(dirpath, file)
            print(f"Training {abspath}") 
            
            image = readimage(abspath)
            hists = histogram(image)
            
            data.append({
                'path': abspath,
                'category': folder,
                'histograms': hists
            })
            
    with open('model.json', 'w') as outfile:
        json.dump(data, outfile)
        
    print("Training done, model saved.")
    
def loadmodel():
    with open('model.json') as jsonfile:
        data = json.load(jsonfile)
        
    return data;

In [418]:
# train()

## Retrieve Most Similar N Images From Model

In [431]:
def retrieve(model, image, n):
    targethists = histogram(image)
    similars = []
    
    for source in model:
        sourcehists = source['histograms']
        closeness = similarity(targethists, sourcehists)
        
        similars.append({
            'rgb': closeness[0],
            'hue': closeness[1],
            'path': source['path'],
            'category': source['category']
        })
    
    rgb = sorted(similars, key=lambda x: x['rgb'], reverse=True)
    hue = sorted(similars, key=lambda x: x['hue'], reverse=True)
    
    return (rgb[:n], hue[:n])
        
model = loadmodel()
image = readimage('../images/elephant/image_0021.jpg')
mostsimilars = retrieve(model, image, 5)

print(json.dumps(mostsimilars, indent=2))

[
  [
    {
      "rgb": 0.9261940321506392,
      "hue": 0.8795608348124292,
      "path": "../images/seahorse/image_0013.jpg",
      "category": "seahorse"
    },
    {
      "rgb": 0.9040790920303821,
      "hue": 0.7955747892785371,
      "path": "../images/elephant/image_0010.jpg",
      "category": "elephant"
    },
    {
      "rgb": 0.9010390034147857,
      "hue": 0.7954804421905373,
      "path": "../images/seahorse/image_0004.jpg",
      "category": "seahorse"
    },
    {
      "rgb": 0.8869140118132379,
      "hue": 0.7901952821625763,
      "path": "../images/elephant/image_0018.jpg",
      "category": "elephant"
    },
    {
      "rgb": 0.8802197348152281,
      "hue": 0.8431860925677345,
      "path": "../images/seahorse/image_0019.jpg",
      "category": "seahorse"
    }
  ],
  [
    {
      "rgb": 0.8118929921635794,
      "hue": 0.9028346667631683,
      "path": "../images/elephant/image_0008.jpg",
      "category": "elephant"
    },
    {
      "rgb": 0.92619403215

## Collect Histograms of Test Images

In [420]:
# img1 = readimage('../images/elephant/image_0001.jpg')
# img2 = readimage('../images/elephant/image_0002.jpg')

# print(len(img1), len(img1[0]), len(img1[0][0]))

# hists1 = histogram(img1)
# hists2 = histogram(img2)

# print(hists1[0][100:120])

# histpairs = list(zip(hists1, hists2))
# histdists = list(map(lambda p: eucdist(*p), histpairs))

# huedist = histdists[-1]
# rgbdist = eucdist([0,0,0], histdists[:3])

# print(rgbdist)
# print(huedist)    