# Comunicação com Google Drive

In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


#Bibliotecas

In [2]:
import numpy as np
import cv2 as cv
from skimage import io
import os
import pandas as pd

In [3]:
def extract_feature_dataset(path, extractor='color'):
  images_path = os.listdir(path)
  category = os.path.basename(os.path.normpath(path))
  data = []
  images_name = []
  categories = []
  for n, image in enumerate(images_path):
    category = os.path.basename(os.path.normpath(path))
    img_name = category + image
    print('Extraindo: ', image, ' Category:', category, ' Quantidade: ', n, '/', len(images_path))
    image = cv.imread(os.path.join(path, image))
    image_rgb = cv.cvtColor(image, cv.COLOR_BGR2RGB)
    image_gray = cv.cvtColor(image, cv.COLOR_RGB2GRAY)

    features = []
    if extractor == 'color':
      dataset_hist_r = []
      dataset_hist_g = []
      dataset_hist_b = []

      color = ('r', 'g', 'b')
      counter = 0
      hist = {}
      for i, col in enumerate(color):
        histr = cv.calcHist([image_rgb], [i], None, [256], [0,256])
        if col == 'g':  
          dataset_hist_g.append(histr)
        if col == 'b':  
          dataset_hist_b.append(histr)
        if col == 'r':
          dataset_hist_r.append(histr)
          
      X_r = np.array(dataset_hist_r)
      length = np.sqrt((X_r**2).sum(axis=1))[:, None]
      X_r = X_r / length

      X_g = np.array(dataset_hist_g)
      length = np.sqrt((X_g**2).sum(axis=1))[:, None]
      X_g = X_g / length

      X_b = np.array(dataset_hist_b)
      length = np.sqrt((X_b**2).sum(axis=1))[:, None]
      X_b = X_b / length


      X = np.concatenate((X_r, X_g, X_b), axis=1)
      X.shape
      features = X.max(2)
    elif extractor == 'gray':
      histr = cv.calcHist([image_gray], [0], None, [32], [0,256])
      features = (histr).max(1)

    images_name.append(img_name)
    categories.append(category)
    features = np.append(features, category)
    data.append((features))
  return data, images_name

In [4]:
dataset_path = "DATASET PATH"
dataset_class = images_path = os.listdir(dataset_path)

paths = []

for index, value in enumerate(dataset_class):
  paths.append(os.path.join(dataset_path, value))

dataset = []
dataset_files = []
dataset_categories = []
for index, value in enumerate(paths):
  (extractions, files) = extract_feature_dataset(paths[index], extractor='gray')
  dataset = dataset + extractions
  dataset_files = dataset_files + files

print(dataset_categories)
df_data = pd.DataFrame(dataset)

Extraindo:  3.jpg  Category: peolpe_and_villages_in_Africa  Quantidade:  0 / 100
Extraindo:  6.jpg  Category: peolpe_and_villages_in_Africa  Quantidade:  1 / 100
Extraindo:  9.jpg  Category: peolpe_and_villages_in_Africa  Quantidade:  2 / 100
Extraindo:  2.jpg  Category: peolpe_and_villages_in_Africa  Quantidade:  3 / 100
Extraindo:  1.jpg  Category: peolpe_and_villages_in_Africa  Quantidade:  4 / 100
Extraindo:  4.jpg  Category: peolpe_and_villages_in_Africa  Quantidade:  5 / 100
Extraindo:  5.jpg  Category: peolpe_and_villages_in_Africa  Quantidade:  6 / 100
Extraindo:  8.jpg  Category: peolpe_and_villages_in_Africa  Quantidade:  7 / 100
Extraindo:  0.jpg  Category: peolpe_and_villages_in_Africa  Quantidade:  8 / 100
Extraindo:  7.jpg  Category: peolpe_and_villages_in_Africa  Quantidade:  9 / 100
Extraindo:  62.jpg  Category: peolpe_and_villages_in_Africa  Quantidade:  10 / 100
Extraindo:  53.jpg  Category: peolpe_and_villages_in_Africa  Quantidade:  11 / 100
Extraindo:  68.jpg  Cate

In [5]:
len(dataset_files)

1000

In [6]:
df_data['file'] = dataset_files
df_data = df_data.rename(columns={32: 'category'})
df_data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,24,25,26,27,28,29,30,31,category,file
0,574.0,2236.0,4637.0,3915.0,3379.0,2639.0,2625.0,2977.0,3028.0,3587.0,...,1909.0,2305.0,2723.0,3589.0,2642.0,34.0,0.0,0.0,peolpe_and_villages_in_Africa,peolpe_and_villages_in_Africa3.jpg
1,1081.0,8599.0,9969.0,9312.0,7402.0,6416.0,6238.0,5939.0,5444.0,5003.0,...,668.0,678.0,606.0,476.0,642.0,178.0,6.0,0.0,peolpe_and_villages_in_Africa,peolpe_and_villages_in_Africa6.jpg
2,708.0,6124.0,6721.0,5634.0,4761.0,4828.0,4520.0,5116.0,5312.0,4768.0,...,754.0,1242.0,2156.0,2187.0,1388.0,289.0,30.0,2.0,peolpe_and_villages_in_Africa,peolpe_and_villages_in_Africa9.jpg
3,1836.0,7185.0,7288.0,6203.0,4913.0,4638.0,4488.0,4612.0,4571.0,4341.0,...,2230.0,1281.0,1192.0,1957.0,2623.0,295.0,22.0,1.0,peolpe_and_villages_in_Africa,peolpe_and_villages_in_Africa2.jpg
4,474.0,5308.0,5365.0,5973.0,4633.0,3769.0,3831.0,4145.0,5076.0,6648.0,...,884.0,782.0,595.0,424.0,203.0,36.0,9.0,0.0,peolpe_and_villages_in_Africa,peolpe_and_villages_in_Africa1.jpg
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,7660.0,9472.0,5368.0,3287.0,2762.0,2396.0,2423.0,2674.0,3820.0,5185.0,...,888.0,1782.0,402.0,329.0,470.0,198.0,88.0,25.0,bus,bus318.jpg
996,19849.0,9179.0,6128.0,4480.0,3345.0,2730.0,2523.0,2446.0,2176.0,2026.0,...,463.0,632.0,1184.0,5786.0,2825.0,904.0,254.0,31.0,bus,bus351.jpg
997,8085.0,5542.0,4570.0,8360.0,5129.0,3658.0,2843.0,2523.0,2832.0,2357.0,...,5892.0,6448.0,4347.0,2595.0,1373.0,535.0,129.0,20.0,bus,bus313.jpg
998,3682.0,6198.0,5449.0,7251.0,10311.0,9382.0,6727.0,5500.0,3976.0,2508.0,...,2282.0,3116.0,1186.0,1480.0,2163.0,571.0,190.0,8.0,bus,bus314.jpg


In [7]:
output_path = "OUTPUT_PATH/name_dataset.csv"
df_data.to_csv(output_path, index=False)