<h1>Imports </h1>

In [None]:
import os as os        # for iteration throug directories
import pandas as pd # for Series and DataFrames
import cv2          # for OpenCV 
import numpy as np  # for arrays
import time       # for time calculations
from feature_extraction_try import imgCrawl, getClassLabels #for fetching images
from skimage.feature import hog #calculates HOGs 
from sklearn.cluster import MiniBatchKMeans #for clustering

<h1>HOG computation :</h1>


In this file, we aim to compute a HOG descriptor thas has the same dimension for each image. So we used a bag of word approach : 
<ul>
    <li>Resizing images to a 0mod5 height and width</li>
    <li>Sequencing each image in 5*5 cells</li>
    <li>Computing local histograms on each cell</li>
    <li>Clustering the local histograms</li>
    <li>Binning the local histograms to create our feature</li>
</ul>

<h2>Resizing images : </h2>

In [None]:
def reSize(image, CELL_DIMENSION):
  height, width, channels = image.shape
  if height%CELL_DIMENSION==0 and width%CELL_DIMENSION==0:
    resizedImage = image
  elif width%CELL_DIMENSION==0:
    missingPixels = CELL_DIMENSION-height%CELL_DIMENSION
    resizedImage = cv2.copyMakeBorder(image,0,missingPixels,0,0,cv2.BORDER_REPLICATE)
  elif height%CELL_DIMENSION==0:
    missingPixels = CELL_DIMENSION-width%CELL_DIMENSION
    resizedImage = cv2.copyMakeBorder(image,0,0,0,missingPixels,cv2.BORDER_REPLICATE)
  else:
    missingWidthPixels = CELL_DIMENSION-width%CELL_DIMENSION
    missingHeightPixels = CELL_DIMENSION-height%CELL_DIMENSION
    resizedImage = cv2.copyMakeBorder(image,0,missingHeightPixels,0,missingWidthPixels,cv2.BORDER_REPLICATE)
  return resizedImage

<h2>Sequencing images : </h2>

In [None]:
def imageSequencing(npImages, CELL_DIMENSION):
  blocks=[]
  for k in range(len(npImages)):
    image = cv2.imread(npImages[k][1])
    resizedImage = reSize(image, CELL_DIMENSION)
    height, width, channels = resizedImage.shape
    blocks.append(\
      np.array([\
        resizedImage[\
          j*CELL_DIMENSION:j*CELL_DIMENSION+CELL_DIMENSION,\
          i*CELL_DIMENSION:i*CELL_DIMENSION+CELL_DIMENSION] \
        for i in range(width/CELL_DIMENSION) \
        for j in range(height/CELL_DIMENSION)\
      ])\
    )
  return np.array(blocks)

<h2>Compute HOG descriptor on each cell : </h2>

In [None]:
def hogAllBlocks(blocks):
  print blocks[0][0].shape
  gradients = np.array([np.array([hog(cv2.cvtColor(block, cv2.COLOR_BGR2GRAY), orientations=8, pixels_per_cell=(5,5), cells_per_block=(1,1)) for block in image]) for image in blocks])
  print gradients.shape
  return gradients

<h2>Clustering local HOGs : </h2>

In [None]:
def clusterGradients(gradients, NB_CLUSTERS, MAXITER):
  sizes = np.array([len(gradient) for gradient in gradients])
  nbImages =  len(gradients)
  flattenedHogs = np.array([block for image in gradients for block in image])
  miniBatchKMeans = MiniBatchKMeans(n_clusters=NB_CLUSTERS, max_iter=MAXITER, compute_labels=True)
  hogsLabels = miniBatchKMeans.fit_predict(flattenedHogs)
  return hogsLabels, sizes

<h2>Binning local HOGs : </h2>

In [1]:
def makeHistograms(labels, NB_CLUSTERS, sizes):
  indiceInLabels = 0
  hogs = []
  for image in sizes:
    histogram = np.zeros(NB_CLUSTERS)
    for i in range(image):
      histogram[labels[indiceInLabels+i]] += 1
    hogs.append(histogram)
    indiceInLabels+=i 
  return np.array(hogs)

<h1>Test zone</h1>

In [None]:
if __name__ == '__main__':


  start = time.time()
  path ='../../03-jeux-de-donnees/101_ObjectCategories'
  testNpImages = [ [1,'testImage.jpg'] ]
  NB_CLUSTERS = 12
  MAXITER = 100
  print testNpImages[0][1]
  print "Fetching Images in " + path

  # get dictionary to link classLabels Text to Integers
  # sClassLabels = getClassLabels(path)

  # Get all path from all images inclusive classLabel as Integer
  # dfImages = imgCrawl(path, sClassLabels)
  # npImages = dfImages.values
  extractedTime = time.time()
  print "Extracted images in " + str(extractedTime-start) +'sec'
  print "Sequencing Images ..."
  blocks = imageSequencing(testNpImages, 5)
  sequencedTime = time.time()
  print "Sequenced images in " + str(sequencedTime-extractedTime) +'sec'
  print "Computing gradient on each block ..."
  gradients = hogAllBlocks(blocks)
  hogedTime = time.time()
  print "Computed gradients in " + str(hogedTime - sequencedTime) + 'sec'
  print "Clustering gradients ..."
  gradientLabels, sizes = clusterGradients(gradients, NB_CLUSTERS, MAXITER)
  clusteredItme = time.time()
  print "Clustered gradients in " + str(hogedTime - sequencedTime) + 'sec'
  print "Computing histograms ..."
  histograms = makeHistograms(gradientLabels, NB_CLUSTERS, sizes)
  end = time.time()
  print "Computed histograms in " + str(int(end - hogedTime)) + 'sec'
  print "Total time : " + str(end-start) + 'sec'
  print "Histogram shape : " +str(histograms.shape)