In [1]:
import os
import glob
from IPython.display import clear_output
import cv2
import pandas as pd

In [2]:
det_dir = 'detection_data'
det_dir_flowers = 'detection_data_flowers'
class_dir = 'classification_data'
class_dir_flowers = 'classification_data_flowers'

In [3]:
jpg_files = glob.glob(det_dir + '/images/*.jpg')
file_names = [file.split('/')[-1] for file in jpg_files]
file_names_without_extension = [os.path.splitext(file)[0] for file in file_names]
len(file_names_without_extension)

3237

In [4]:
jpg_files = glob.glob(class_dir + '/images/*.jpg')
file_names = [file.split('/')[-1] for file in jpg_files]
crops_without_extension = [os.path.splitext(file)[0] for file in file_names]
cropped_names = [name.split('_', 2)[0] + '_' + name.split('_', 2)[1] for name in crops_without_extension]
len(cropped_names)

39445

In [5]:
test = file_names_without_extension[:1].copy()
# test.append('192_20200715165926')
test

['153_20200618121032']

In [6]:
import time

def timer(func):
  def wrapper(*args, **kwargs):
    start_time = time.time()
    result = func(*args, **kwargs)
    end_time = time.time()
    execution_time = end_time - start_time
    print(f"Execution time: {round(execution_time/60, 1)} minutes")
    return result
  return wrapper

In [7]:
@timer
def create_class_data_flowers(class_dir=class_dir, det_dir=det_dir, det_dir_flowers=det_dir_flowers, class_dir_flowers=class_dir_flowers, file_names_without_extension=file_names_without_extension, crops_without_extension=crops_without_extension):
    # Read the classification labels from a CSV file
    labels_df = pd.read_csv(class_dir + '/classification_labels.csv')

    total_anns = len(labels_df[labels_df['basename'].apply(lambda x: '_'.join(x.split('_', 2)[:2]) in file_names_without_extension)])
    current_anns = 0

    total_files = len(list(set(cropped_names).intersection(set(file_names_without_extension))))
    current_file = 0

    # for file in file_names_without_extension:
    for file in list(set(cropped_names).intersection(set(file_names_without_extension))):
        current_file += 1

        temp_crops = [crop for crop in crops_without_extension if file in crop]

        # Read the detection images
        img_rgb = cv2.imread(det_dir + '/images/' + file + '.jpg')
        img_rgb_flowers = cv2.imread(det_dir_flowers + '/images/' + file + '.jpg')
        img_gray = cv2.cvtColor(img_rgb, cv2.COLOR_BGR2GRAY)

        for crop in temp_crops:
            current_anns += 1
            clear_output(wait=True)
            print(f'Processing file {current_file} of {total_files}, (' + str(round( current_anns / total_anns * 100)) + '%)')
            
            # Read the template image
            template = cv2.imread(class_dir + '/images/' + crop + '.jpg', 0)
            h, w = template.shape[::]

            # Perform template matching
            res = cv2.matchTemplate(img_gray, template, cv2.TM_SQDIFF)
            min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
            top_left = min_loc

            # Save template from flowers detection data to classification flowers data
            # [y:y+h,x:x+w]
            template_flowers = img_rgb_flowers[top_left[1]:top_left[1]+h, top_left[0]:top_left[0]+w].copy()
            cv2.imwrite(class_dir_flowers + '/images/' + crop + '.jpg', template_flowers)

In [8]:
create_class_data_flowers()
# create_class_data_flowers(file_names_without_extension=test)

Processing file 2973 of 2973, (100%)
Execution time: 115.3 minutes
