# Descritores de Imagens via Histograma de Cor

Exemplos baseados em https://github.com/xn2333/OpenCV/blob/master/Seminar_Image_Processing_in_Python.ipynb*







# Instalando Bibliotecas

In [None]:
import numpy as np
import pandas as pd
import cv2 as cv 
from google.colab.patches import cv2_imshow # for image display
from skimage import io
from PIL import Image 
import matplotlib.pylab as plt

# Vamos construir nosso dataset

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
def load_zero():
  import os

  path = '/content/drive/MyDrive/hourlyloonacatcher/'

  dataset = {"group": [], "filename": [], "image": [], "post": []}
  for filename in os.listdir(path + 'loona/'):
      img = cv.imread(path + 'loona/' + filename)
      if img is not None:
          dataset["group"].append('loona')
          dataset["filename"].append(filename)
          dataset["image"].append(img)
          dataset["post"].append(False)

  for filename in os.listdir(path + 'deukae/'):
      img = cv.imread(path + 'deukae/' + filename)
      if img is not None:
          dataset["group"].append('deukae')
          dataset["filename"].append(filename)
          dataset["image"].append(img)
          dataset["post"].append(False)

  return dataset

# Extraindo características do dataset usando Histograma de Cor

In [None]:
def prepareX(dataset):
  color = ('b','g','r')

  dataset_hist_r = []
  dataset_hist_g = []
  dataset_hist_b = []

  counter = 0
  for image in dataset["image"]:
    hists = {}
    for i,col in enumerate(color):
      histr = cv.calcHist([image],[i],None,[256],[0,256])
      if col == 'r': dataset_hist_r.append(histr)
      if col == 'g': dataset_hist_g.append(histr)
      if col == 'b': dataset_hist_b.append(histr)


  X_r = np.array(dataset_hist_r)
  length = np.sqrt((X_r**2).sum(axis=1))[:,None]
  X_r = X_r / length

  X_g = np.array(dataset_hist_g)
  length = np.sqrt((X_g**2).sum(axis=1))[:,None]
  X_g = X_g / length


  X_b = np.array(dataset_hist_b)
  length = np.sqrt((X_b**2).sum(axis=1))[:,None]
  X_b = X_b / length

  X = np.concatenate((X_r,X_g,X_g),axis=1)
  X.shape

  X = X.reshape(X.shape[0],X.shape[1])
  X.shape

  return X

# Agrupamento de Imagens

In [None]:
from sklearn.cluster import KMeans
import numpy as np

def do_kmeans(X, n_clusters=100):
  kmeans = KMeans(n_clusters=n_clusters).fit(X)
  return kmeans

## Escolher a imagem

In [None]:
def get_probs(loona_prob = [1.0/12] * 12, deukae_prob = [1.0/7] * 7):
  loona = [
      'heejin',
      'hyunjin',
      'haseul',
      'yeojin',
      'vivi',
      'kimlip',
      'jinsoul',
      'choerry',
      'yves',
      'chuu',
      'gowon',
      'oliviahye'
  ]

  deukae = [
      "jiu",
      "sua",
      "siyeon",
      "handong",
      "yoohyeon",
      "dami",
      "gahyeon"
  ]

  return loona, loona_prob, deukae, deukae_prob

In [None]:
def update_probs(loona_prob, deukae_prob, loona, deukae, loona_choice, deukae_choice):
  for i in range(len(loona)):
    if loona[i] not in loona_choice:
      loona_prob[i] += 1.0/12
    else:
      loona_prob[i] = 1.0/12

  for i in range(len(deukae)):
    if deukae[i] not in deukae_choice:
      deukae_prob[i] += 1.0/7
    else:
      deukae_prob[i] = 1.0/7

  return loona_prob, deukae_prob

In [None]:
import random 
from sklearn.metrics import silhouette_samples

n_clusters = 50

def first_run():  

  dataset = load_zero()
  X = prepareX(dataset)
  kmeans = do_kmeans(X, n_clusters) 
  cluster_labels = kmeans.labels_

  sample_silhouette_values = silhouette_samples(X, cluster_labels)

  means_lst = []
  for label in range(n_clusters):
      means_lst.append(sample_silhouette_values[cluster_labels == label].mean())

  clusterList = [x for _,x in sorted(zip(means_lst,range(n_clusters)))]

  imageLoona = None
  imageDeukae = None

  face_flag = 0

  images_to_post = {'loona': [], 'deukae': []}

  # first time

  loona, loona_prob, deukae, deukae_prob = get_probs()
  loona_choice = random.choices(loona, weights=loona_prob, k=1) 
  deukae_choice = random.choices(deukae, weights=deukae_prob, k=1) 
  loona_prob, deukae_prob = update_probs(loona_prob, deukae_prob, loona, deukae, loona_choice, deukae_choice)

  for i in range(192):
    print(loona_choice[0], deukae_choice[0]) 
    imageLoona, imageDeukae = -1, -1
    #get image pair for post
    for c in clusterList:
      for image_id, cluster in enumerate(cluster_labels):
          if not dataset["post"][image_id]:
            if cluster == c:
              if loona_choice[0] in dataset["filename"][image_id]:
                print(image_id)
                imageLoona = image_id
                break

      for image_id, cluster in enumerate(cluster_labels):
          if not dataset["post"][image_id]:
            if cluster == c:
              if deukae_choice[0] in dataset["filename"][image_id]:
                print(image_id)
                imageDeukae = image_id
                break

      if (imageLoona != -1) and (imageDeukae != -1):
        dataset["post"][imageLoona] = True
        dataset["post"][imageDeukae] = True
        # new pair
        loona_choice = random.choices(loona, weights=loona_prob, k=1) 
        deukae_choice = random.choices(deukae, weights=deukae_prob, k=1) 
        loona_prob, deukae_prob = update_probs(loona_prob, deukae_prob, loona, deukae, loona_choice, deukae_choice)
        # get out and choose right image
        break
    
    if (imageLoona != -1) and (imageDeukae != -1):
      images_to_post['loona'].append(dataset["filename"][imageLoona])
      images_to_post['deukae'].append(dataset["filename"][imageDeukae])
    else:
      print("No more image combinations for the pair")

    #cv2_imshow(dataset["image"][imageLoona])
    #cv2_imshow(dataset["image"][imageDeukae])

  # generate dfs for saving

  del dataset['image']
  df_dataset = pd.DataFrame(dataset)

  df_images_to_post = pd.DataFrame(images_to_post)

  df_loona = {'name': [], 'prob': []}
  df_loona['name'] = loona
  df_loona['prob'] = loona_prob
  df_loona = pd.DataFrame(df_loona)

  df_deukae = {'name': [], 'prob': []}
  df_deukae['name'] = deukae
  df_deukae['prob'] = deukae_prob
  df_deukae = pd.DataFrame(df_deukae)

  df_dataset.to_csv('/content/drive/MyDrive/hourlyloonacatcher_dfs/df_dataset.csv')
  df_images_to_post.to_csv('/content/drive/MyDrive/hourlyloonacatcher_dfs/images_to_post.csv')
  df_loona.to_csv('/content/drive/MyDrive/hourlyloonacatcher_dfs/df_loona.csv')
  df_deukae.to_csv('/content/drive/MyDrive/hourlyloonacatcher_dfs/df_deukae.csv')

In [None]:
# when no images pairs were generated previously
first_run()

jinsoul jiu
1887
3054
yves dami
343
2752
heejin siyeon
1033
2273
gowon gahyeon
760
3033
yves jiu
360
3076
hyunjin siyeon
1152
2314
vivi gahyeon
1559
1595
2907
oliviahye handong
822
2483
heejin siyeon
2009
2347
choerry sua
129
2164
chuu gahyeon
491
552
493
2908
kimlip dami
1830
2859
heejin handong
1048
2022
2398
gowon jiu
810
3160
oliviahye handong
910
835
861
2423
yeojin sua
1493
3206
haseul gahyeon
1293
2922
kimlip gahyeon
1798
1782
2967
jinsoul siyeon
1912
1952
2237
heejin dami
1048
2753
hyunjin yoohyeon
2648
1166
2562
kimlip jiu
1798
3141
yeojin sua
2061
1393
2069
vivi jiu
1559
3046
choerry handong
187
205
138
2531
hyunjin gahyeon
1216
1178
2977
gowon dami
673
2761
yves handong
306
386
2534
oliviahye gahyeon
910
835
904
970
865
2920
haseul yoohyeon
2648
2628
2564
1349
2661
gowon siyeon
717
723
2275
yeojin handong
1403
2527
jinsoul jiu
1912
1933
3086
hyunjin gahyeon
1216
1185
1164
2921
oliviahye dami
910
835
2789
vivi yoohyeon
1591
2648
kimlip siyeon
1825
1838
2301
jinsoul sua
1912
2

In [None]:
df = pd.read_csv('/content/drive/MyDrive/hourlyloonacatcher_dfs/df_dataset.csv').drop(columns='Unnamed: 0').reset_index().drop(columns='index')

In [None]:
len(df[df['post'] == True])

384