# Preparation

## Import Used Libraries

In [67]:
import matplotlib.pyplot as plt
import matplotlib.image as img
import os
import math
import json

## Read Image

In [68]:
def readimage(path):
    return img.imread(path)

def showimage(image):
    plt.imshow(image)

## RGB to Hue Conversion

In [69]:
def RGB2Hue(r, g, b):
    r = r / 255
    g = g / 255
    b = b / 255
    
    maxc = max(r, g, b)
    minc = min(r, g, b)
    rang = maxc - minc
    hue = 0
    
    if rang == 0:
        return 0
    
    if maxc == r:
        hue = (g-b)/rang
    elif maxc == g:
        hue = 2.0 + (b-r)/rang
    else:
        hue = 4.0 + (r-g)/rang
    
    return (round(hue * 60) + 360) % 360

## Calculate Histogram

In [70]:
def histogram(image):
    R = [0] * 256
    G = [0] * 256
    B = [0] * 256
    H = [0] * 360
    
    unit = 1.0 / (len(image) * len(image[0]))
    
    for row in image:
        for pixel in row:
            r, g, b = pixel
            h = RGB2Hue(r, g, b)
            R[r] += unit
            G[g] += unit
            B[b] += unit
            H[h] += unit
        
    return (R, G, B, H)

## Calculate Distances   

In [71]:
def eucdist(vecA, vecB):
    pairs = list(zip(vecA, vecB))
    total = sum((a-b)**2 for a, b in pairs)
    return math.sqrt(total)

def histdist(histsA, histsB):
    histpairs = list(zip(histsA, histsB))
    histdists = list(map(lambda p: eucdist(*p), histpairs))
    return histdists

def imagedist(histsA, histsB):
    histdists = histdist(histsA, histsB)
    rgbdist = eucdist([0,0,0], histdists[:3]) / math.sqrt(3) # max distance
    huedist = histdists[-1]
    return (rgbdist, huedist)

def similarity(histsA, histsB):
    rgbdist, huedist = imagedist(histsA, histsB)
    return (1 - rgbdist, 1 - huedist)

## Test Distance

In [106]:
# img1 = readimage('../images/seahorse/image_0021.jpg')
# img2 = readimage('../images/octopus/image_0030.jpg')

# hists1 = histogram(img1)
# hists2 = histogram(img2)

# rgbsim, huesim = similarity(hists1, hists2)

# print(rgbsim)
# print(huesim)

0.6794588592400552
0.9045732711694615


## Collect Histograms of Train Images

In [73]:
def getfiles(dirpath):
    files = next(os.walk(dirpath), (None, None, []))[2]
    files = [file for file in files if file[0] != '.']
    files.sort()
    return files

def train(root, folders, size):
    data = []
    
    for folder in folders:
        dirpath = os.path.join(root, folder)
        files = getfiles(dirpath)
        
        for file in files[:size]:
            abspath = os.path.join(dirpath, file)
            print(f"Training {abspath}") 
            
            image = readimage(abspath)
            hists = histogram(image)
            
            data.append({
                'path': abspath,
                'category': folder,
                'histograms': hists
            })
            
    with open('model.json', 'w') as outfile:
        json.dump(data, outfile)
        
    print("Training done, model saved.")
    
def loadmodel():
    with open('model.json') as jsonfile:
        data = json.load(jsonfile)
        
    return data;

## Retrieve Most Similar N Images From Model

In [74]:
def retrieve(model, image, n):
    targethists = histogram(image)
    similars = []
    
    for source in model:
        sourcehists = source['histograms']
        closeness = similarity(targethists, sourcehists)
        
        similars.append({
            'rgb': closeness[0],
            'hue': closeness[1],
            'path': source['path'],
            'category': source['category']
        })
    
    rgb = sorted(similars, key=lambda x: x['rgb'], reverse=True)
    hue = sorted(similars, key=lambda x: x['hue'], reverse=True)
    
    return (rgb[:n], hue[:n])

# Test Retrieval

In [107]:
# model = loadmodel()
# image = readimage('../images/octopus/image_0030.jpg')
# mostsimilars = retrieve(model, image, 5)

# print(json.dumps(mostsimilars, indent=2))

[
  [
    {
      "rgb": 0.9230668772861562,
      "hue": 0.903471890964705,
      "path": "../images/seahorse/image_0004.jpg",
      "category": "seahorse"
    },
    {
      "rgb": 0.9203094464494688,
      "hue": 0.8253982267193216,
      "path": "../images/elephant/image_0014.jpg",
      "category": "elephant"
    },
    {
      "rgb": 0.8916960967265211,
      "hue": 0.8649403721435833,
      "path": "../images/elephant/image_0013.jpg",
      "category": "elephant"
    },
    {
      "rgb": 0.857854968529906,
      "hue": 0.769848524494094,
      "path": "../images/seahorse/image_0017.jpg",
      "category": "seahorse"
    },
    {
      "rgb": 0.832654119264067,
      "hue": 0.7515507942995956,
      "path": "../images/elephant/image_0010.jpg",
      "category": "elephant"
    }
  ],
  [
    {
      "rgb": 0.9230668772861562,
      "hue": 0.903471890964705,
      "path": "../images/seahorse/image_0004.jpg",
      "category": "seahorse"
    },
    {
      "rgb": 0.690020423857445,

## Find Most Similar Images of Test Images

In [97]:
def test(root, folders, after):
    model = loadmodel()
    results = []
    
    for category in folders:
        dirpath = os.path.join(root, category)
        files = getfiles(dirpath)
        
        for file in files[after:]:
            abspath = os.path.join(dirpath, file)
            
            result = {
                'path': abspath,
                'category': category,
                'rgb':{'success':False, 'results':[]},
                'hue':{'success':False, 'results':[]}
            }
            
            image = readimage(abspath)
            rgb, hue = retrieve(model, image, 5)
            
            result['rgb']['success'] = any(match['category'] == category for match in rgb)
            result['hue']['success'] = any(match['category'] == category for match in hue)
            result['rgb']['results'] = rgb
            result['hue']['results'] = hue
            
            # print(f"Testing {abspath} {result['rgb']['success']} {result['hue']['success']}") 
            
            results.append(result)
            
    return results

## Calculate Test Report

## Main Flow

**Configure Dataset**

In [98]:
ds_root = '../images'
ds_folders = ['elephant', 'flamingo', 'kangaroo', 'leopards', 'octopus', 'seahorse']
ds_train_size = 20

**Train Model**

In [99]:
# train(ds_root, ds_folders, ds_train_size)

**Find Results**

In [100]:
results = test(ds_root, ds_folders, ds_train_size)

Testing ../images/elephant/image_0021.jpg True True
Testing ../images/elephant/image_0022.jpg False True
Testing ../images/elephant/image_0023.jpg True False
Testing ../images/elephant/image_0024.jpg False False
Testing ../images/elephant/image_0025.jpg True True
Testing ../images/elephant/image_0026.jpg False True
Testing ../images/elephant/image_0027.jpg True True
Testing ../images/elephant/image_0028.jpg True False
Testing ../images/elephant/image_0029.jpg True True
Testing ../images/elephant/image_0030.jpg True False
Testing ../images/flamingo/image_0021.jpg True False
Testing ../images/flamingo/image_0022.jpg False True
Testing ../images/flamingo/image_0023.jpg False False
Testing ../images/flamingo/image_0024.jpg False True
Testing ../images/flamingo/image_0025.jpg False False
Testing ../images/flamingo/image_0026.jpg False True
Testing ../images/flamingo/image_0027.jpg False True
Testing ../images/flamingo/image_0028.jpg False True
Testing ../images/flamingo/image_0029.jpg True 

In [143]:
def analyze(results):
    total = 0
    success = {'rgb':0, 'hue':0}
    report = {'total':0, 'success':success, 'categories':{}}
    cats = []
    
    for result in results:
        report['total'] += 1
        
        cat = result['category']
        if cat not in report['categories']:
            cats.append(cat)
            report['categories'][cat] = {
                'total':0,
                'success':{'rgb':0, 'hue':0},
                'rates':{'rgb':0, 'hue':0}
            }
        
        catobj = report['categories'][cat]
        rgbsuccess = int(result['rgb']['success'])
        huesuccess = int(result['hue']['success'])
        
        success['rgb'] += rgbsuccess
        success['hue'] += huesuccess
        catobj['total'] += 1
        catobj['success']['rgb'] += rgbsuccess
        catobj['success']['hue'] += huesuccess
    
    for cat in cats:
        obj = report['categories'][cat]
        total = obj['total']
        obj['rates']['rgb'] = obj['success']['rgb'] / total
        obj['rates']['hue'] = obj['success']['hue'] / total
    
    return report

**Generate Report**

In [144]:
report = analyze(results)
print(json.dumps(report, indent=2))

{
  "total": 60,
  "success": {
    "rgb": 37,
    "hue": 40
  },
  "categories": {
    "elephant": {
      "total": 10,
      "success": {
        "rgb": 7,
        "hue": 6
      },
      "rates": {
        "rgb": 0.7,
        "hue": 0.6
      }
    },
    "flamingo": {
      "total": 10,
      "success": {
        "rgb": 3,
        "hue": 7
      },
      "rates": {
        "rgb": 0.3,
        "hue": 0.7
      }
    },
    "kangaroo": {
      "total": 10,
      "success": {
        "rgb": 8,
        "hue": 7
      },
      "rates": {
        "rgb": 0.8,
        "hue": 0.7
      }
    },
    "leopards": {
      "total": 10,
      "success": {
        "rgb": 10,
        "hue": 8
      },
      "rates": {
        "rgb": 1.0,
        "hue": 0.8
      }
    },
    "octopus": {
      "total": 10,
      "success": {
        "rgb": 7,
        "hue": 5
      },
      "rates": {
        "rgb": 0.7,
        "hue": 0.5
      }
    },
    "seahorse": {
      "total": 10,
      "success": {
     