<a href="https://colab.research.google.com/github/ldeluigi/supermarket-2077-product-vision/blob/master/Store_Products_Classifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Preliminary Operations

### Download dataset

In [None]:
!rm -rf sample_data
!gdown --id 1C-9gj15K2d7nuwgxbE9cZ5lwUpAliLrA -O all.zip # Old dataset: 1fDr4g4wbnSRkuCYyS3wpuJS7Ax22bVB_
!unzip -oq all.zip

%matplotlib inline

### Install dependencies

In [None]:
!pip install opencv-contrib-python==4.4.0.44

### Imports

In [None]:
import csv
import os
from pathlib import Path
import re
import cv2
import matplotlib.pyplot as plt
import numpy as np
import math
import itertools
import shutil
import json
from tqdm.notebook import tqdm
from keras.preprocessing.image import ImageDataGenerator
import tensorflow as tf
from sklearn.metrics import confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.cluster import KMeans
from sklearn.neighbors import NearestNeighbors
from scipy.spatial.distance import cosine
from sklearn.utils.extmath import weighted_mode
from sklearn.metrics.pairwise import chi2_kernel
from scipy.stats import mode

# Data Visualization Utilities

In [None]:
def show_image(img):
  plt.axis('off')
  plt.imshow(img)

def show_grayscale_image(img):
  show_image(cv2.merge([img, img, img]))

def plot_grid(images, columns, show_axis=False, labels=None):
  if len(images) == 0 or columns <= 0:
    return
  height = 1 + math.ceil(len(images) / columns) * 2
  width = columns * 4
  dpi = max(images[0].shape[0], images[0].shape[1]) // 2
  fig = plt.figure(figsize=(width, height), dpi=dpi)
  fig.subplots_adjust(hspace=0.4)
  for index, img in enumerate(images, start=1):
    if 'float' in img.dtype.str:
      img = (img * 255).astype('uint8')
    sp = fig.add_subplot(math.ceil(len(images) / columns), columns, index)
    if not show_axis:
      plt.axis('off')
    plt.imshow(img)
    if labels is not None:
      l = len(labels)
      sp.set_title(labels[(index-1) % l], fontsize=10)
    else:
      sp.set_title(index, fontsize=10)

def bar_plot(ax, data, colors=None, total_width=0.5, single_width=1, legend=True):
  if colors is None:
    colors = plt.rcParams['axes.prop_cycle'].by_key()['color']

  n_bars = len(data)
  bar_width = total_width / n_bars
  bars = []

  for i, (name, values) in enumerate(data.items()):
    x_offset = (i - n_bars / 2) * bar_width + bar_width / 2
    for x, y in enumerate(values):
      bar = ax.bar(x + x_offset, y, width=bar_width * single_width, color=colors[i % len(colors)], )
    bars.append(bar[0])
    
  if legend:
    ax.legend(bars, data.keys())

def plot_histogram(histogram):
  fig, ax = plt.subplots()
  fig.set_size_inches(21, 9)
  fig.set_dpi(120)
  bar_plot(ax, { '0': histogram }, legend=False)
  plt.show()

def plot_histograms(histograms):
  fig, ax = plt.subplots()
  fig.set_size_inches(21 * len(histograms), 9)
  fig.set_dpi(120)
  bar_plot(ax, { str(k+1): v for k, v in enumerate(histograms) }, legend=True)
  plt.show()

def dataset_plot_grid(indexes, columns, dataset, draw_item):
  fig = plt.figure(figsize=(12, 6), dpi=120)
  # fig.subplots_adjust(hspace=0.2)
  for index, i_img in enumerate(indexes, start=1):
    sp = fig.add_subplot(math.ceil(len(indexes) / columns), columns, index)
    row = dataset[i_img]
    draw_item(row, sp)

# Raw image loading

## Utilities to read raw data from disk

In [None]:
def remove_prefix(text, prefix):
  if text.startswith(prefix):
    return text[len(prefix):]
  return text

In [None]:
training_dirname = 'Training'
store_dirname = 'Store'
store_csv = os.path.join(store_dirname, 'store.csv')

def create_class_label(class_index, class_name):
  return class_name

def read_classes():
  classes = set()
  for root, dirs, files in os.walk(training_dirname):
    if len(files) > 0:
      classes.add(remove_prefix(root, training_dirname + os.path.sep))
  classes = sorted(classes)
  return {k: v for k, v in enumerate(classes)}


def read_training_data(classes):
  images_by_index = {}
  for class_index, class_name in classes.items():
    dirname_images = os.path.join(training_dirname, class_name)
    directory_images = os.fsencode(dirname_images)
    for fname in os.listdir(directory_images):
      image_index = int(fname[:-4])
      img = cv2.imread(os.path.join(dirname_images, os.fsdecode(fname)))
      img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
      images_by_index[image_index] = (img_rgb, class_index)
  return [images_by_index[i][0] for i in range(len(images_by_index))], [images_by_index[i][1] for i in range(len(images_by_index))]

def read_store_data():
  store_images = {}
  store_data = {}
  with open(store_csv, 'r') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
      image_index = int(row['image_index'])
      bbox = json.loads(row['bounding_box'])
      product_label = int(row['product_label'])
      if image_index not in store_data:
        store_data[image_index] = []
      store_data[image_index].append((bbox, product_label))
  for fname in os.listdir(store_dirname):
    if os.path.isfile(os.path.join(store_dirname, fname)) and fname.endswith('.jpg'):
      image_index = int(fname[:-4])
      store_images[image_index] = cv2.cvtColor(cv2.imread(os.path.join(store_dirname, fname)), cv2.COLOR_BGR2RGB)
  store_images = [v for k, v in sorted(store_images.items(), key=lambda item: item[0])]
  store_data = [[] if k not in store_data else store_data[k] for k in range(len(store_images))]
  return store_images, store_data

## Prepare products class dictionary

In [None]:
classes = read_classes()

def class_name(class_index):
  return classes[class_index] if class_index >= 0 else None

classes

## Load training raw images

In [None]:
products, products_classes = read_training_data(classes)

## Products visualization

In [None]:
def show_products_with_class(indexes, columns, dataset):
  def show_single_product_with_class(row, sp):
    img, class_index = row
    plt.axis('off')
    plt.imshow(img)
    sp.set_title(class_name(class_index), fontsize=10)
  dataset_plot_grid(indexes, columns, dataset, show_single_product_with_class)

show_products_with_class(np.random.randint(0, len(products), 6), 3, list(zip(products, products_classes)))

# Raw Image preprocessing

## Dataset filtering and initial preprocessing

The initial preprocessing and filtering (dataset reduction) was done with the following notebook hosted on colab:

https://colab.research.google.com/drive/1kw36rUwx4lOgRAGMTywGsh0AtmVwVPAO

## Image resize

In [None]:
background_color = 255

def pad_image(img, size, color=[background_color, background_color, background_color]):
  target_w, target_h = size
  original_h, original_w, _ = img.shape
  target_ar = target_w / target_h
  original_ar = original_w / original_h

  scale_factor = target_h / original_h if target_ar > original_ar else target_w / original_w
  scaled_w = round(original_w * scale_factor)
  scaled_h = round(original_h * scale_factor)
  scaled_size = (scaled_w, scaled_h)
  resized = cv2.resize(img, scaled_size)

  delta_h = target_h - scaled_h
  delta_w = target_w - scaled_w
  top    = delta_h // 2
  left   = delta_w // 2
  bottom = delta_h - top
  right  = delta_w - left

  return cv2.copyMakeBorder(resized, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)

def resize_image(img, max_dim):
  original_h, original_w, _ = img.shape
  scale_factor = max_dim / original_h if original_h > original_w else max_dim / original_w
  scaled_w = round(original_w * scale_factor)
  scaled_h = round(original_h * scale_factor)
  scaled_size = (scaled_w, scaled_h)
  resized = cv2.resize(img, scaled_size)
  return resized

n = 1000
print(f'Index: {n}')
print(f'Class: {class_name(products_classes[n])}')
plot_grid([products[n], resize_image(products[n], 200), products[n], pad_image(products[n], (200, 200))], 2, show_axis=True)

## Dataset preparation



### Image cleaning

In [None]:
def preprocess_image(img):
  img = resize_image(img, 256)
  #img = cv2.GaussianBlur(img, (3, 3), 0)
  return img

def clean_image(img):
  #img = pad_image(img, (256, 256))
  img = preprocess_image(img)
  return img

### Prepare dataset

In [None]:
all_products_images = []
for image, class_index in zip(products, products_classes):
  cleaned_image = clean_image(image)
  all_products_images.append(cleaned_image)

print(len(all_products_images))

# Computer vision model

## Utilities setup

### Pickle setup

In [None]:
import pickle
model_file_name = 'model.pkl'

### Distance metrics

In [None]:
def cosine_distance(x, y):
  return cosine(x, y)

def euclidean_distance(x, y):
  return np.linalg.norm(x - y)

def chi2_distance(x, y):
  return 1 - chi2_kernel([x], [y], gamma = 0.3)[0][0]

## Feature extraction

In [None]:
def extract_dense_keypoints(img, extractor):
  patch_sizes = [4, 8, 16]
  for z in patch_sizes:
    for x in range(0, img.shape[1], 2 * z):
      for y in range(0, img.shape[0], 2 * z):
        yield cv2.KeyPoint(x, y, z, _class_id=0)

def extract_keypoints(img, extractor):
  return extractor.detect(img)

def extract_descriptors_from_image(img, extractor, dense=False):
  if dense:
    method = extract_dense_keypoints
  else:
    method = extract_keypoints
  keypoints = list(method(img, extractor))
  keypoints, descriptors = extractor.compute(img, keypoints)
  if descriptors is None:
    return [], []
  return keypoints, descriptors

def extract_descriptors(images, extractor, show_progress=False):
  img_iterator = tqdm(images, total=len(images)) if show_progress else images
  return [extract_descriptors_from_image(img, extractor)[1] for img in img_iterator]

extractor = cv2.SIFT_create(nfeatures = 1000)
descriptors = extract_descriptors(all_products_images, extractor, show_progress=True)

In [None]:
print("Numero di keypoint estratti in totale :", sum([len(d) for d in descriptors]))
print("Numero di componenti del descrittore per ogni keypoint :", len(descriptors[n][0]))
print("Numero di descrittori estratti in media da ogni immagine :", np.mean(list(map(lambda d: len(d), descriptors))))
print()

# Show keypoints on image
def show_keypoints(img, extractor):
  img = img.copy()
  keypoints, _ = extract_descriptors_from_image(img, extractor)
  show_image(cv2.drawKeypoints(img, keypoints, img, color=(255,255,0)))

n = 420
print("Numero di keypoint estratti dall'immagine", n, ":", len(descriptors[n]))
print("Posizioni dei keypoint dell'immagine", n, ":")
show_keypoints(all_products_images[n], extractor)

## Feature clusterization into Bag of Visual Words histograms

### Compute and save on disk
Execute the 'Load model from disk' to skip this.

In [None]:
def create_bovw(descriptors, n_clusters, kmeans_max_iter = 150):
  termination_criteria = (cv2.TERM_CRITERIA_MAX_ITER, kmeans_max_iter, 0.0)
  descriptors_flat = []
  for image_descriptors in descriptors:
    descriptors_flat.extend(image_descriptors)
  descriptors_as_nparray = np.array(descriptors_flat, dtype = np.float32)
  compactness, labels, centers = cv2.kmeans(descriptors_as_nparray, n_clusters, None, termination_criteria, 1, cv2.KMEANS_RANDOM_CENTERS)
  next_first_label_index = 0
  labels_grouped_by_image = []
  for image_descriptors in descriptors:
    curr_labels = []
    count = len(image_descriptors)
    for i in range(next_first_label_index, next_first_label_index + count):
      curr_labels.append(labels[i][0])
    next_first_label_index += count
    labels_grouped_by_image.append(curr_labels)
  return [[float(x) for x in vw] for vw in centers], labels_grouped_by_image, compactness

n_clusters = 256
bovw, bovw_labels, compactness = create_bovw(descriptors, n_clusters)

print("Compattezza =", compactness)
print("Numero di visual words =", len(bovw))
print("Numero di descrittori per ogni visual word =", len(bovw[0]))

#### Histogram functions definitions

In [None]:
def normalize_histogram(histogram):
  sum = np.sum(histogram)
  if sum == 0:
    return [0.0] * len(histogram)
  return [x / sum for x in histogram]

def create_histogram_for_image(descriptors, bovw, bovw_distance_metric):
  histogram = [0] * len(bovw)
  for d in descriptors:
    nearest = 0
    nearest_distance = bovw_distance_metric(d, bovw[0])
    for i in range(1, len(bovw)):
      vw = bovw[i]
      distance = bovw_distance_metric(d, vw)
      if distance < nearest_distance:
        nearest = i
        nearest_distance = distance
    histogram[nearest] += 1
  return normalize_histogram(histogram)

def create_histograms(descriptors, bovw, show_progress=False):
  histograms = []
  iterator = descriptors
  if show_progress:
    iterator = tqdm(iterator, total = len(descriptors), desc = "Calculating histograms...")
  for descriptors in iterator:
    histogram = create_histogram_for_image(descriptors, bovw, euclidean_distance)
    histograms.append(histogram)
  return histograms

def create_histogram_from_labels(n_clusters, labels):
  histogram = [0] * n_clusters
  for l in labels:
    histogram[l] += 1
  return normalize_histogram(histogram)

def create_histograms_from_labels(n_clusters, labels):
  return [create_histogram_from_labels(n_clusters, l) for l in labels]

def compute_histograms(images, bovw, extractor, show_progress=False):
  images = [preprocess_image(img) for img in images]
  image_descriptors = extract_descriptors(images, extractor)
  return create_histograms(image_descriptors, bovw, show_progress=show_progress)

#### Compute histograms look-up table

In [None]:
histograms = create_histograms_from_labels(n_clusters, bovw_labels)

#### Save model on disk

In [None]:
with open(model_file_name, 'wb') as model_file:
  data = {
    'histograms': histograms,
    'bovw': bovw,
    'bovw_labels': bovw_labels,
    'n_clusters': n_clusters
  }
  pickle.dump(data, model_file, pickle.HIGHEST_PROTOCOL)

## Load model from disk

In [None]:
with open(model_file_name, 'rb') as model_file:
  data = pickle.load(model_file)
  histograms = data['histograms']
  bovw = data['bovw']
  bovw_labels = data['bovw_labels']
  n_clusters = data['n_clusters']

## Histogram and keypoints visualization

In [None]:
n = np.random.randint(0, len(all_products_images))
img = all_products_images[n]
keypoints, _ = extract_descriptors_from_image(img, extractor)
labels = bovw_labels[n]
keypoints_by_label = {}

for k, l in zip(keypoints, labels):
  if l not in keypoints_by_label:
    keypoints_by_label[l] = []
  keypoints_by_label[l].append(k)

for l in keypoints_by_label:
  [r, g, b] = list(np.random.choice(range(256), size=3))
  cv2.drawKeypoints(img, keypoints_by_label[l], img, color = (int(b), int(g), int(r)))

print("Numero di keypoint =", len(keypoints))
show_image(resize_image(img, 256))
print("Histogram: ")
plot_histogram(histograms[n])

# Testing model on stores

In [None]:
def predict(images, bovw, nn, extractor, show_progress=False):
  image_histograms = compute_histograms(images, bovw, extractor, show_progress=show_progress)
  distances, indices = nn.kneighbors(image_histograms)
  return distances, indices

k = 10
nn = NearestNeighbors(n_neighbors=k, metric=chi2_distance).fit(histograms) # metric=euclidean_distance

In [None]:
store_images, store_bounding_boxes = read_store_data()

## Tests on cropped store images

In [None]:
def crop_bounding_boxes(store_images, bounding_boxes):
  cropped_images = []
  labels = []
  for store_image_raw, bboxes in zip(store_images, bounding_boxes):
    store_image = store_image_raw
    for bbox, label in bboxes:
      [xmin, xmax, ymin, ymax] = bbox
      #print(ymin, ymax, xmin, xmax)
      xmin, xmax = int(xmin * store_image.shape[1]), int(xmax * store_image.shape[1])
      ymin, ymax = int(ymin * store_image.shape[0]), int(ymax * store_image.shape[0])
      cropped_images.append(store_image[ymin:ymax, xmin:xmax])
      labels.append(label)
  return cropped_images, labels

cropped_images, cropped_images_labels = crop_bounding_boxes(store_images[:20], store_bounding_boxes[:20])
plot_grid([resize_image(i, 256) for i in cropped_images[:12]], 3)

In [None]:
distances, predictions = predict(cropped_images, bovw, nn, extractor, show_progress=True)

In [None]:
total = len(predictions)
category_hit = 0
product_hit = 0
category_hit_indexes = []
category_miss_indexes = []
product_hit_indexes = []
product_miss_indexes = []


for index, label, prediction, distance_vector in zip(range(total), cropped_images_labels, predictions, distances):
  weights = [1 / (d + 1e-5)  for d in distance_vector]
  predicted_product = prediction[0]
  predicted_classes = [products_classes[p] for p in prediction]
  most_voted_class = weighted_mode(predicted_classes, weights)[0][0]
  #print(products_classes[label], predicted_classes, weights, "->", most_voted_class)
  if label == predicted_product:
    product_hit += 1
    product_hit_indexes.append(index)
  else:
    product_miss_indexes.append(index)
  if products_classes[label] == most_voted_class:
    category_hit += 1
    category_hit_indexes.append(index)
  else:
    category_miss_indexes.append(index)

print('Numero totale di esempi:', total)
print('Numero di classificazioni di prodotto corrette:', product_hit)
print('Numero di classificazioni di prodotto errate:', total - product_hit)
print('Accuratezza di prodotto:', (product_hit / total) * 100, '%')
print('Numero di classificazioni di categoria corrette:', category_hit)
print('Numero di classificazioni di categoria errate:', total - category_hit)
print('Accuratezza di categoria:', (category_hit / total) * 100, '%')

In [None]:
def compare_histograms(n):
  cropped_bbox = cropped_images[n]
  label = cropped_images_labels[n]

  original_product_image = all_products_images[label]
  blurred = cv2.GaussianBlur(original_product_image, (3, 3), 0)

  images_to_compare = [cropped_bbox, original_product_image, blurred]
  histograms_to_compare = compute_histograms(images_to_compare, bovw, extractor)
  plot_grid(images_to_compare, 4)
  plot_histograms(histograms_to_compare)
  
compare_histograms(101)

## Tests on raw store images

In [None]:
def sliding_window(img):
  divide_wh = [(3, 2, 1, 1)]
  for wd, hd, ws, hs in divide_wh:
    window_w = img.shape[1] / wd
    window_h = img.shape[0] / hd
    for hi in range(0, hd, hs):
      y_min = int(hi * window_h)
      y_max = int(y_min + window_h)
      for wi in range(0, wd, ws):
        x_min = int(wi * window_w)
        x_max = int(x_min + window_w)
        #print("Shape:", img.shape, "Window division:", (hd, wd),"Window index: ", (hi, wi), "Pixel range:", (y_min, x_min), (y_max, x_max))
        window = img[y_min:y_max, x_min:x_max]
        yield window

### Performance evaluation


In [None]:
def predict_with_window(img, bovw, classifier, extractor):
  sw_images = list(sliding_window(img))
  distances, indices = predict(sw_images, bovw, classifier, extractor)
  #print(indices)
  votes = {}
  for vote_indexes, vote_distances in zip(indices, distances):
    for vote_index, vote_distance in zip(map(lambda i: products_classes[i], vote_indexes), vote_distances):
      if vote_index != 0: # Skip background
        vote_weight = 1 / (vote_distance + 1e-20)
        if vote_index not in votes:
          votes[vote_index] = 0  
        votes[vote_index] += vote_weight
  return sorted(votes.items(), key=lambda item: item[1], reverse=True)[0][0]



predictions = [predict_with_window(img, bovw, nn, extractor) for img in tqdm(store_images, total=len(store_images))]
score = [1 if prediction == store_class else 0 for store_class, prediction in zip(store_classes, predictions)]
print("Accuracy:", sum(score) / len(score))

In [None]:
sw_images = list(sliding_window(store_images[0]))
image_descriptors = extract_descriptors(sw_images, extractor)
image_histograms = create_histograms(image_descriptors, bovw, show_progress=True)
print(image_descriptors)
print(len(image_descriptors[0]))
# print(predict(sw_images, bovw, nn, extractor, show_progress=False))

print(histograms[100])