<a href="https://colab.research.google.com/github/paulokuriki/remove_logo_from_images/blob/main/remove_logo_logo_from_images.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Uses the cv2 library to search for logotypes burned in images, drawing rectangles to cover them

Disclaimer: As with all automated techniques, identifying images is subject to failures. A human double-check is always advisable.

<img src="https://raw.githubusercontent.com/paulokuriki/remove_logo_from_images/main/logo.png">

##How to use this code:

###Step 1

1. First, set the list of dictionaries ```logos``` with a list of all logotypes  that will be searched for in the images
2. Set the ```reading_path``` variable with the folder containing the png files that will be scanned. E.g., this will be the path containing all chest x-rays.
3. Run the script and analyze the best threshold that can be used as a cut-off to classify the image as "with the logo" or "without the logo."
4. Images classified as "with the logo" will have a rectangle applied, covering the logotype. Those images will be stored in the folder set as  ```export_path ```.
5. It's possible to personalize the rectangle color and an additional border to guarantee the image is covered.

###Step 2

After choosing the threshold, set it in the dictionary and rerun the test.

###Step 3

If the results are reasonable, you can set the cap_limit_files to 0, so the script will run over all images.

###Optionally

If you have many images to process, you can set ```use_multiprocessing = True```. The script will not show the samples in this mode, prioritizing performance.



In [None]:
%matplotlib inline

# import the necessary packages
import os
import glob
import time
import copy
import errno
import multiprocessing
from multiprocessing import Pool

import cv2
import imutils
import numpy as np
import pandas as pd
from scipy import stats
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors 

c = mcolors.BASE_COLORS

In [None]:
# uses multiple cores to speed up processing. 
# depending on the number of cores it runs much, much faster
use_multiprocessing = False

# a list of dictionaries containing the logos and 
# thresholds to filter them as positive or negative
logos = [
    {"filename": 'logotype_1.png', "threshold": 20},
    {"filename": 'logotype_2.png', "threshold": 30},    
]

# folder containing the png files to be read
reading_path = '../pngs_dasa'
# used for testing purposes. set 0 to remove the limit
cap_limit_files = 100

# folder where removed logos will be saved to. 
export_path = '../pngs_dasa/anonymized'
# asks for confirmation before erasing anonymized files
confirm_before_erase = True

# color used to fill the rectangle used to anonymization
# c['r'] red   c['g'] green  c['b'] blue    c['k'] black 
# Note: if this parameters = '', uses a function to calculate the mode color
filling_color = c['k'] # -> black

# color used to draw the rectangle. has effect only when use_multiprocessing = False
rectangle_color = c['r'] # -> red

# shows only positive cases based on the threshold
# has effect only when use_multiprocessing = False
show_only_positive = True

# converts colors to 0-255
rectangle_color = [c * 255 for c in rectangle_color]
filling_color = [c * 255 for c in filling_color]

In [None]:
def colored_score_msg(score, threshold, img_filename):
    # function used create que score text
    
    p_red = '\033[91m'
    p_green = '\033[92m'
    p_yellow = '\033[93m'
    p_blue = '\033[94m'
    
    if score >= threshold:
        text = f'{p_blue}SCORE: {str(score)} >= THRESHOLD: {threshold}   Filename:{img_filename} SAVED'
    elif score >= threshold * 0.66:
        text = f'{p_green}score: {str(score)} < threshold: {threshold}  filename:{img_filename}'
    elif score >= threshold * 0.33:
        text = f'{p_yellow}score: {str(score)} < threshold: {threshold}  filename:{img_filename}'
    else:
        text = f'{p_red}score: {str(score)} < threshold: {threshold}  filename:{img_filename}'
    
    return text

def calculate_mode(img):
    # optional. used when filling_color was not provided
    
    blue_mode = stats.mode(img[:, :, 0], axis=None, keepdims=True)
    green_mode = stats.mode(img[:, :, 1], axis=None, keepdims=True)
    red_mode = stats.mode(img[:, :, 2], axis=None, keepdims=True)

    # Convert mode to list in the format [r, g, b]
    mode = [int(red_mode.mode[0]), int(green_mode.mode[0]), int(blue_mode.mode[0])]
    
    return mode

In [None]:
def searches_logo(img_filename, logo_img, threshold, border, export_path_anon, show_only_positive):
    # this is the core code
    # img_filename: original file that may contain a logo
    # logo_img: array from the image (logo) that will be seached inside img_filename
    # threshold: used to classify images in positive or negative
    # border: expand the rectangle border. used guarantee coverture
    # export_path_anon: path where anonymized files will be saved
    # show_only_positive: flag to determine whether negative cases will also be displayed or not
    
    # calculates logo height and width
    (logo_h, logo_w) = logo_img.shape[:2]
    
    # load the image, convert it to grayscale, and initialize the
    # bookkeeping variable to keep track of the matched region
    image = cv2.imread(img_filename)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    found = None

    # loop over the scales of the image. tries 30 samples from 30% smaller to 30% bigger (0.7-1.3)
    for scale in np.linspace(0.7, 1.3, 30)[::-1]:
        
        # resize the image according to the scale, and keep track
        # of the ratio of the resizing
        resized = imutils.resize(gray, width = int(gray.shape[1] * scale))
        r = gray.shape[1] / float(resized.shape[1])
        
        # if the resized image is smaller than the logo_img, then break
        # from the loop
        if resized.shape[0] < logo_h or resized.shape[1] < logo_w:
            break

        # detect edges in the resized, grayscale image and apply logo_img
        # matching to find the logo_img in the image
        #edged = cv2.Canny(resized, 50, 200)
        edged = resized
        result = cv2.matchTemplate(edged, logo_img, cv2.TM_CCOEFF)
        (_, maxVal, _, maxLoc) = cv2.minMaxLoc(result)

        # check if  iteration should be visualized. honestly, this is useful just to understand the logic
        flag_visualize_iteration = False
        if flag_visualize_iteration:
            # draws a bounding box around the detected region
            clone = np.dstack([edged, edged, edged])
            cv2.rectangle(clone, (maxLoc[0], maxLoc[1]), (maxLoc[0] + logo_w, maxLoc[1] + logo_h), rectangle_color, 2)
            plt.imshow(clone)#, cmap='gray')
            plt.show()

        # if we have found a new maximum correlation value, then update the bookkeeping variable
        if found is None or maxVal > found[0]:
            found = (maxVal, maxLoc, r)
    
    # unpack the bookkeeping variable and compute the (x, y) coordinates
    # of the bounding box based on the resized ratio
    (maxVal, maxLoc, r) = found
    (startX, startY) = (int(maxLoc[0] * r), int(maxLoc[1] * r))
    (endX, endY) = (int((maxLoc[0] + logo_w) * r), int((maxLoc[1] + logo_h) * r))

    # takes the score. it will be used to filter results
    score = int(maxVal/100_000)
    
    # uses the filling color defined at the begining to calculates based on metrics, eg, mode, median, etc
    if filling_color:
        fill_color = filling_color        
    else:
        fill_color = calculate_mode(image)       
    
    positive_case = (score >= threshold)
    
    # makes copies of the image and then draws rectangles and filled rectangles
    image_rect = copy.copy(image)
    image_filled = copy.copy(image)
    cv2.rectangle(image_rect, (startX - border, startY - border), (endX + border, endY + border), rectangle_color, 2)
    cv2.rectangle(image_filled, (startX - border, startY - border), (endX + border, endY + border), fill_color, -1)
    
    if not use_multiprocessing:
        if (show_only_positive and positive_case) or (show_only_positive == False):
            # firulice mode on. paints the msg with colors depending on the score
            msg = colored_score_msg(score, threshold, os.path.basename(img_filename))
            print(msg)

            fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(10, 3))
            ax1.axis('off')
            ax2.axis('off')
            ax3.axis('off')

            ax1.imshow(image)
            ax2.imshow(image_rect)
            ax3.imshow(image_filled)
            plt.show()

    # saves the image if the score was above the threshold
    png_file = ""
    if positive_case:
        png_file = os.path.join(export_path_anon, os.path.basename(img_filename))
        f, axarr = plt.subplots(1, 1)
        plt.imsave(png_file, image_filled, cmap='gray')
        plt.close()
        
        
    return (img_filename, png_file, score)

In [None]:
def show_logo(logo_img):
    # shows the logo
    fig, ax = plt.subplots(1, 1, figsize=(2, 2))
    ax.axis('off')
    
    plt.imshow(logo_img, cmap='gray')
    plt.show()

In [None]:
def process_filelist(filelist, logo_file, threshold, border, show_only_positive):

    start = time.time()

    logo_img = cv2.imread(logo_file)
    logo_img = cv2.cvtColor(logo_img, cv2.COLOR_BGR2GRAY)
    
    print('Searching for the logo:', logo_file)
    show_logo(logo_img)

    if use_multiprocessing:
        # creates the iterator to pass as a parameter
        n = len(filelist)
        args = zip(filelist, [logo_img]*n, [threshold]*n, [border]*n, [export_path_anon]*n, [show_only_positive]*n)
        
        # runs as multiprocessing using the n of (cores - 2)
        pool = Pool(os.cpu_count() - 2) 
        results = pool.starmap(searches_logo, args)

    else:
        # runs individually, possibly to preview images
        for file in filelist:
            results = searches_logo(file, logo_img, threshold, border, export_path_anon, show_only_positive)
            results = [results]

    # creates a dataframe with results info
    processed = {}
    processed['original_image'] = [r[0] for r in results] 
    processed['processed_image'] = [r[1] for r in results] 
    processed['seach_score'] = [r[2] for r in results]
    df = pd.DataFrame(processed)
    df.sort_values(by=['original_image'], inplace=True)
    df.reset_index(drop=True, inplace=True)
    
    # saves a processing report as a csv file
    report_file = os.path.join(export_path_anon, 'report.csv')
    df.to_csv(report_file, index=False)
    
    # prints information about the results
    qtd_processed = len(df[df["processed_image"] != ''])
    print("Finished in", time.time() - start, 'secs')
    print(f'{len(df)} images processed. {qtd_processed} anonymized')
    print(f'CSV report saved to file:{report_file}\n\n')
    
    return

In [None]:
filelist = sorted(glob.glob(reading_path + "/*.png"))

if cap_limit_files > 0:
    filelist = filelist[0:cap_limit_files]

print(f'{len(filelist)} images selected for processing\n')
    
# iterates for each logo
for logo in logos:
    logo_file = logo['filename']
    threshold = logo['threshold']
    border = int(logo.get('border', 20))
    
    # checks if the logo file exists
    if not os.path.exists(logo_file):
        raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), logo_file)
        
    # takes the basename w/o extension
    base_logo_name = '.'.join(os.path.basename(logo_file).split('.')[:-1])

    # folder where anonymized images will be saved to
    export_path_anon = export_path + '_' + base_logo_name

    if not os.path.exists(export_path_anon):
        os.makedirs(export_path_anon)
    
    if confirm_before_erase:
        confirm = input(f'Erasing file in folder {export_path_anon} before starting processing. Confirm? (Y/N)')
        if confirm.upper() == 'Y':
            for zippath in glob.iglob(os.path.join(export_path_anon, '*.png')):
                os.remove(zippath)
    else:
        for zippath in glob.iglob(os.path.join(export_path_anon, '*.png')):
            os.remove(zippath)
        
    
    process_filelist(filelist, logo_file, threshold, border, show_only_positive)


Kudos to Adrian Rosebrock
https://pyimagesearch.com/2015/01/26/multi-scale-template-matching-using-python-opencv/