In [None]:
# keras-ocr will automatically download pretrained
# weights for the detector and recognizer.
if 'pipeline' not in globals():
    # https://youtu.be/3RNPJbUHZKs
    """
    Remove text from images
    """

    import matplotlib.pyplot as plt
    import keras_ocr
    from PIL import Image
    import cv2
    import math
    import numpy as np

    from pathlib import Path
    import os
    import glob
    import requests
    import shutil
    
    pipeline = keras_ocr.pipeline.Pipeline()

In [5]:


#General Approach.....
#Use keras OCR to detect text, define a mask around the text, and inpaint the
#masked regions to remove the text.
#To apply the mask we need to provide the coordinates of the starting and 
#the ending points of the line, and the thickness of the line

#The start point will be the mid-point between the top-left corner and 
#the bottom-left corner of the box. 
#the end point will be the mid-point between the top-right corner and the bottom-right corner.
#The following function does exactly that.
def midpoint(x1, y1, x2, y2):
    x_mid = int((x1 + x2)/2)
    y_mid = int((y1 + y2)/2)
    return (x_mid, y_mid)

#Main function that detects text and inpaints. 
#Inputs are the image path and kreas_ocr pipeline
def inpaint_text(img_path, pipeline):
    # read the image 
    img = keras_ocr.tools.read(img_path) 
    
    # Recogize text (and corresponding regions)
    # Each list of predictions in prediction_groups is a list of
    # (word, box) tuples. 
    prediction_groups = pipeline.recognize([img])
    
    #Define the mask for inpainting
    mask = np.zeros(img.shape[:2], dtype="uint8")
    inpainted_img = False
    for box in prediction_groups[0]:
        x0, y0 = box[1][0]
        x1, y1 = box[1][1] 
        x2, y2 = box[1][2]
        x3, y3 = box[1][3] 
        
        x_mid0, y_mid0 = midpoint(x1, y1, x2, y2)
        x_mid1, y_mi1 = midpoint(x0, y0, x3, y3)
        
        #For the line thickness, we will calculate the length of the line between 
        #the top-left corner and the bottom-left corner.
        thickness = int(math.sqrt( (x2 - x1)**2 + (y2 - y1)**2 ))
        
        #Define the line and inpaint
        cv2.line(mask, (x_mid0, y_mid0), (x_mid1, y_mi1), 255,    
        thickness)
        inpainted_img = cv2.inpaint(img, mask, 7, cv2.INPAINT_NS)
                 
    return(inpainted_img)

# ================================================

def preprocess_visual_inpaint(inputFile):
  if os.path.isfile(inputFile) is False:
    return False
    
  if inputFile.endswith('.inpaint.jpg') or inputFile.endswith('.crop.jpg'):
    return

  # p = Path(inputFile)
  # name = p.name
  
  # outputFile = os.path.splitext(inputFile)[0] + '.inpaint.jpg'
  # outputFile2 = os.path.splitext(inputFile)[0] + '.crop.jpg'
  outputFile = inputFile + '.inpaint.jpg'
  outputFile2 = inputFile + '.crop.jpg'
    
  # outputFile3 = os.path.splitext(inputFile)[0] + '.inpaint.crop.jpg'
  
  if os.path.isfile(outputFile) or os.path.isfile(outputFile2):
    # print('File is exsited: ' + inputFile)
    return

  # print('Processing: ' + inputFile)

  img_text_removed = inpaint_text(inputFile, pipeline)
  if img_text_removed is False:
    shutil.copyfile(inputFile, outputFile)
  else:
    cv2.imwrite(outputFile, cv2.cvtColor(img_text_removed, cv2.COLOR_BGR2RGB))


Looking for /home/jovyan/.keras-ocr/craft_mlt_25k.h5
Looking for /home/jovyan/.keras-ocr/crnn_kurapan.h5


In [4]:
if 'cropper' not in globals():
    from pathlib import Path
    import os
    import glob
    import requests
    import shutil
    from PIL import Image
    from autocrop import Cropper
    import subprocess
    
    cropper = Cropper(face_percent=70)


# inputFiles = glob.glob("/2.output/**/*.inpaint.jpg", recursive=True)
# for inputFile in inputFiles:
def preprocess_visual_crop(inputFile):
  if os.path.isfile(inputFile) is False:
    return False
    
  if inputFile.endswith('.inpaint.jpg') is False:
    return False
    
  p = Path(inputFile)
  name = p.name
  outputFile = inputFile[:-12] + '.crop.jpg'
  
  if os.path.isfile(outputFile):
    print('File is exsited: ' + outputFile)
    os.remove(inputFile)
    return

  # print('Processing: ' + inputFile + ' => ' + outputFile)

  image = cv2.imread(inputFile)
  height, width, channels = image.shape
  if width < 300:
    image = cv2.resize(image, (600, 600), interpolation=cv2.INTER_LINEAR)
    cv2.imwrite('/tmp/tmp.jpg', image)
    inputFile = '/tmp/tmp.jpg'
  
  cropped_array = cropper.crop(inputFile)

  # print(cropped_array)
  # Save the cropped image with PIL if a face was detected:
  if cropped_array is None:
    #shutil.copyfile(inputFile, outputFile)
    subprocess.run(["smartcrop","-W","256","-H","256", "-i",inputFile, "-o",outputFile])
  else:
    # cropped_image = Image.fromarray(cropped_array)
    #cropped_image.save(outputFile)
    cv2.imwrite(outputFile, cv2.cvtColor(cropped_array, cv2.COLOR_BGR2RGB))

  os.remove(inputFile)

In [2]:
def preprocess_visual(image_path):
    preprocess_visual_inpaint(image_path)
    preprocess_visual_crop(image_path + '.inpaint.jpg')

In [1]:
# preprocess_visual_crop("./input/data/a/10th-Zoom-Meeting.avif.inpaint.jpg")

In [7]:
# !smartcrop --help

usage: smartcrop [-h] -W WIDTH -H HEIGHT -i IMAGE -o OUTPUT [-n]

options:
  -h, --help            show this help message and exit
  -W WIDTH, --width WIDTH
                        Target width
  -H HEIGHT, --height HEIGHT
                        Target height
  -i IMAGE, --image IMAGE
                        Image to crop
  -o OUTPUT, --output OUTPUT
                        Output
  -n, --no-resize       Don't resize image before treating it
