In [1]:
#-----------------------------------------
# DOWNLOAD AND ORGANIZE FLOWERS17 DATASET
#-----------------------------------------
import os
import glob
import datetime
import tarfile
import urllib.request

def download_dataset(filename, url, work_dir):
  if not os.path.exists(filename):
    print("[INFO] Downloading flowers17 dataset....")
    filename, _ = urllib.request.urlretrieve(url + filename, filename)
    statinfo = os.stat(filename)
    print("[INFO] Succesfully downloaded " + filename + " " + str(statinfo.st_size) + " bytes.")
    untar(filename, work_dir)

def jpg_files(members):
  for tarinfo in members:
    if os.path.splitext(tarinfo.name)[1] == ".jpg":
      yield tarinfo

def untar(fname, path):
  tar = tarfile.open(fname)
  tar.extractall(path=path, members=jpg_files(tar))
  tar.close()
  print("[INFO] Dataset extracted successfully.")

#-------------------------
# MAIN FUNCTION
#-------------------------
if __name__ == '__main__':
  flowers17_url  = "http://www.robots.ox.ac.uk/~vgg/data/flowers/17/"
  flowers17_name = "17flowers.tgz"
  train_dir      = "dataset"

  if not os.path.exists(train_dir):
    os.makedirs(train_dir)

  download_dataset(flowers17_name, flowers17_url, train_dir)
  if os.path.exists(train_dir + "\\jpg"):
    os.rename(train_dir + "\\jpg", train_dir + "\\train")


  # get the class label limit
  class_limit = 17

  # take all the images from the dataset
  image_paths = glob.glob(train_dir + "\\train\\*.jpg")

  # variables to keep track
  label = 0
  i = 0
  j = 80

  # flower17 class names
  class_names = ["daffodil", "snowdrop", "lilyvalley", "bluebell", "crocus",
             "iris", "tigerlily", "tulip", "fritillary", "sunflower", 
             "daisy", "coltsfoot", "dandelion", "cowslip", "buttercup",
             "windflower", "pansy"]

  # loop over the class labels
  for x in range(1, class_limit+1):
    # create a folder for that class
    os.makedirs(train_dir + "\\train\\" + class_names[label])
    
    # get the current path
    cur_path = train_dir + "\\train\\" + class_names[label] + "\\"
    
    # loop over the images in the dataset
    for index, image_path in enumerate(image_paths[i:j], start=1):
      original_path   = image_path
      image_path      = image_path.split("\\")
      image_file_name = str(index) + ".jpg"
      os.rename(original_path, cur_path + image_file_name)
    
    i += 80
    j += 80
    label += 1

[INFO] Downloading flowers17 dataset....
[INFO] Succesfully downloaded 17flowers.tgz 60270631 bytes.
[INFO] Dataset extracted successfully.


In [5]:
#-----------------------------------
# GLOBAL FEATURE EXTRACTION
#-----------------------------------
!pip install opencv-python
!pip install h5py
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import mahotas
import cv2
import os
import h5py

#--------------------
# tunable-parameters
#--------------------
images_per_class = 80
fixed_size       = tuple((500, 500))
train_path       = "dataset/train"
h5_data          = 'output/data.h5'
h5_labels        = 'output/labels.h5'
bins             = 8

Collecting opencv-python
  Downloading opencv_python-4.5.3.56-cp38-cp38-win_amd64.whl (34.9 MB)
Installing collected packages: opencv-python
Successfully installed opencv-python-4.5.3.56
Collecting h5py
  Downloading h5py-3.4.0-cp38-cp38-win_amd64.whl (2.8 MB)
Installing collected packages: h5py
Successfully installed h5py-3.4.0


In [6]:
# feature-descriptor-1: Hu Moments
def fd_hu_moments(image):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    feature = cv2.HuMoments(cv2.moments(image)).flatten()
    return feature

# feature-descriptor-2: Haralick Texture
def fd_haralick(image):
    # convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # compute the haralick texture feature vector
    haralick = mahotas.features.haralick(gray).mean(axis=0)
    # return the result
    return haralick

# feature-descriptor-3: Color Histogram
def fd_histogram(image, mask=None):
    # convert the image to HSV color-space
    image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    # compute the color histogram
    hist  = cv2.calcHist([image], [0, 1, 2], None, [bins, bins, bins], [0, 256, 0, 256, 0, 256])
    # normalize the histogram
    cv2.normalize(hist, hist)
    # return the histogram
    return hist.flatten()

# get the training labels
train_labels = os.listdir(train_path)

# sort the training labels
train_labels.sort()
print(train_labels)

# empty lists to hold feature vectors and labels
global_features = []
labels          = []

# loop over the training data sub-folders
for training_name in train_labels:
    # join the training data path and each species training folder
    dir = os.path.join(train_path, training_name)

    # get the current training label
    current_label = training_name

    # loop over the images in each sub-folder
    for x in range(1,images_per_class+1):
        # get the image file name
        file = dir + "/" + str(x) + ".jpg"

        # read the image and resize it to a fixed-size
        image = cv2.imread(file)
        image = cv2.resize(image, fixed_size)

        ####################################
        # Global Feature extraction
        ####################################
        fv_hu_moments = fd_hu_moments(image)
        fv_haralick   = fd_haralick(image)
        fv_histogram  = fd_histogram(image)

        ###################################
        # Concatenate global features
        ###################################
        global_feature = np.hstack([fv_histogram, fv_haralick, fv_hu_moments])

        # update the list of labels and feature vectors
        labels.append(current_label)
        global_features.append(global_feature)

    print("[STATUS] processed folder: {}".format(current_label))

print("[STATUS] completed Global Feature Extraction...")

['bluebell', 'buttercup', 'coltsfoot', 'cowslip', 'crocus', 'daffodil', 'daisy', 'dandelion', 'fritillary', 'iris', 'lilyvalley', 'pansy', 'snowdrop', 'sunflower', 'tigerlily', 'tulip', 'windflower']
[STATUS] processed folder: bluebell
[STATUS] processed folder: buttercup
[STATUS] processed folder: coltsfoot
[STATUS] processed folder: cowslip
[STATUS] processed folder: crocus
[STATUS] processed folder: daffodil
[STATUS] processed folder: daisy
[STATUS] processed folder: dandelion
[STATUS] processed folder: fritillary
[STATUS] processed folder: iris
[STATUS] processed folder: lilyvalley
[STATUS] processed folder: pansy
[STATUS] processed folder: snowdrop
[STATUS] processed folder: sunflower
[STATUS] processed folder: tigerlily
[STATUS] processed folder: tulip
[STATUS] processed folder: windflower
[STATUS] completed Global Feature Extraction...


In [13]:
scaler            = MinMaxScaler(feature_range=(0, 1))
rescaled_features = scaler.fit_transform(global_features)
print("[STATUS] feature vector normalized...")

# get the overall feature vector size
print("[STATUS] feature vector size {}".format(np.array(rescaled_features).shape))

# get the overall training label size
print("[STATUS] training Labels {}".format(np.array(labels).shape))

[STATUS] feature vector normalized...
[STATUS] feature vector size (1360, 532)
[STATUS] training Labels (1360,)


In [52]:
import pandas as pd


imagesFeatures = pd.DataFrame(data=np.array(rescaled_features))
imagesLabels = pd.DataFrame({"nome": np.array(labels)})


df = imagesFeatures
df.insert(loc=0, column='Nome', value=np.array(labels))

df

Unnamed: 0,Nome,0,1,2,3,4,5,6,7,8,...,522,523,524,525,526,527,528,529,530,531
0,bluebell,0.010228,0.021757,0.239711,0.221516,0.040204,0.198253,0.432710,0.113347,0.002928,...,0.258128,0.312324,0.998066,0.169249,0.014490,0.002108,0.000083,0.162776,0.052071,0.971890
1,bluebell,0.000027,0.007609,0.007984,0.005197,0.031360,0.061618,0.252850,0.404851,0.000000,...,0.402339,0.437954,0.993650,0.116427,0.004168,0.001204,0.000301,0.162776,0.052075,0.971890
2,bluebell,0.000000,0.010668,0.010380,0.005201,0.039598,0.062982,0.276400,0.425594,0.000000,...,0.387881,0.426567,0.994352,0.116509,0.004225,0.001206,0.000300,0.162776,0.052076,0.971890
3,bluebell,0.000000,0.009452,0.007280,0.009415,0.025756,0.021549,0.164318,0.101586,0.000000,...,0.358897,0.424019,0.994300,0.133263,0.000965,0.000146,0.000091,0.162776,0.052074,0.971890
4,bluebell,0.821640,0.003500,0.023743,0.015770,0.053056,0.071690,0.149807,0.018348,0.010050,...,0.621838,0.569628,0.978109,0.368155,0.168270,0.335273,0.008620,0.163793,0.057370,0.971603
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1355,windflower,0.002483,0.000077,0.000232,0.002056,0.012016,0.044617,0.275577,0.035593,0.005530,...,0.479329,0.536547,0.984312,0.217434,0.035052,0.000713,0.000243,0.162775,0.052152,0.971890
1356,windflower,0.040529,0.000015,0.000219,0.002791,0.019978,0.077251,0.250285,0.326007,0.001430,...,0.471283,0.497904,0.987507,0.231478,0.003318,0.014421,0.001941,0.162758,0.052271,0.971873
1357,windflower,0.000187,0.001294,0.008877,0.000692,0.002391,0.047274,0.073293,0.002872,0.004845,...,0.215830,0.292072,0.997563,0.216032,0.011535,0.007733,0.003105,0.162807,0.052499,0.971872
1358,windflower,0.000000,0.000000,0.000000,0.000185,0.004419,0.031297,0.026558,0.017760,0.000000,...,0.811470,0.864563,0.754924,0.226627,0.001966,0.000801,0.000853,0.162775,0.052147,0.971892


In [53]:
df.to_csv('flowers_feature.csv', index=False)