In [None]:
import cv2
import keras_ocr
import numpy as np
from sys import exit
from math import sqrt
from  os import environ
from matplotlib import pyplot as plt

environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # Suppress warnings (from Tensorflow)

# Test F2F with removing background text with Keras

Source: [Remove Text from Images using CV2 and Keras-OCR](https://towardsdatascience.com/remove-text-from-images-using-cv2-and-keras-ocr-24e7612ae4f4)


In [None]:
OUT_VIDEO_NAME = 'output_F2F_ML.avi'

REMOVE_FIRST_TEXT_AL = False

BLACK = 0
WHITE = 255
GAUSSIAN_BLUR = (5,5)
THRESHOLD_GOOD_MATCH = 0.6
BG_INTENSITY_MIN = 50
BG_INTENSITY_MAX = 125
THRESHOLD_LOGO_MASK = 200


def inits():
    cap = cv2.VideoCapture('imgs/Multiple View.avi')
    w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    out = cv2.VideoWriter(OUT_VIDEO_NAME, cv2.VideoWriter_fourcc(*'DIVX'), fps, (w,h))
    sift = cv2.SIFT_create()
    flann = cv2.FlannBasedMatcher(dict(algorithm=1, trees=5), dict(checks=50))
    return cap, out, w, h, sift, flann

def textBackgroundMatching(augmented_layer):
    text = augmented_layer.copy()
    text[:text.shape[0]//2,:] = BLACK
    text_gray = cv2.cvtColor(text, cv2.COLOR_RGB2GRAY)
    mask = (text_gray > BG_INTENSITY_MIN) & (text_gray < BG_INTENSITY_MAX)
    warped_al[mask] = curr_frame[mask]
    return warped_al

def splitAlMask(mask):
    logo_mask = mask.copy()
    logo_mask[h//2:,:] = BLACK
    text_mask = mask.copy()
    text_mask[:h//2,:] = BLACK
    return logo_mask, text_mask

def removeAuthorText(img):
    # generate (word, box) tuples
    prediction_groups = keras_ocr.pipeline.Pipeline().recognize([img])
    mask = np.zeros(img.shape[:2], dtype="uint8")

    for box in prediction_groups[0]:
        if box[0] in ['richard', 'hartley', 'and', 'andrew', 'zisserman']:
            x0, y0 = box[1][0]
            x1, y1 = box[1][1] 
            x2, y2 = box[1][2]
            x3, y3 = box[1][3] 

            x_mid0, y_mid0 = int((x1 + x2)/2), int((y1 + y2)/2)
            x_mid1, y_mi1 = int((x0 + x3)/2), int((y0 + y3)/2)
        
            thickness = int(sqrt( (x2 - x1)**2 + (y2 - y1)**2 ))
        
            cv2.line(mask, (x_mid0, y_mid0), (x_mid1, y_mi1), 255, thickness)
            img = cv2.inpaint(img, mask, 7, cv2.INPAINT_NS)
                 
    return img


# Initializations
cap, out, w, h, sift, flann = inits()

# Read first frame, augmented layer and masks
_, prev_frame = cap.read()
prev_frame = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2RGB)

object_mask = cv2.imread('imgs/ObjectMask.PNG', cv2.IMREAD_GRAYSCALE)

al = cv2.imread('imgs/AugmentedLayer.PNG')[:prev_frame.shape[0], :prev_frame.shape[1]]
al = cv2.cvtColor(al, cv2.COLOR_BGR2RGB)
if REMOVE_FIRST_TEXT_AL: al[300:361, :] = BLACK

al_mask = cv2.imread('imgs/AugmentedLayerMask.PNG', cv2.IMREAD_GRAYSCALE)[:prev_frame.shape[0], :prev_frame.shape[1]]
if REMOVE_FIRST_TEXT_AL: al_mask[300:361, :] = BLACK

# Blur augmented layer's mask for smoother edges on result
al_mask = cv2.GaussianBlur(al_mask, GAUSSIAN_BLUR, 0)

# Mask reference frame
prev_frame[object_mask == BLACK] = BLACK

# Find keypoints and compute descriptions in reference frame
kp_prev, des_prev = sift.detectAndCompute(prev_frame, None)

# Define the first homography as the identity matrix
M = np.array([[1,0,0], [0,1,0], [0,0,1]])

frame_num = 0
while cap.isOpened():
    ret, curr_frame = cap.read()
    if not ret or curr_frame is None:
        print('End of input video. Exiting...')
        break
    curr_frame = cv2.cvtColor(curr_frame, cv2.COLOR_BGR2RGB)

    # Find keypoints and compute descriptions in current frame
    kp_curr, des_curr = sift.detectAndCompute(curr_frame, None)

    matches = flann.knnMatch(des_prev, des_curr, k=2)

    good = []
    for m, n in matches:
        if m.distance < THRESHOLD_GOOD_MATCH*n.distance:
            good.append(m)

    if len(good) < 4:
        print("Not enough good matches. Aborting...")
        break
    
    # building the corrspondences arrays of good matches
    src_pts = np.float32([kp_prev[m.queryIdx].pt for m in good ]).reshape(-1,1,2)
    dst_pts = np.float32([kp_curr[m.trainIdx].pt for m in good ]).reshape(-1,1,2)

    # Estimate a robust homography with RANSAC and multiply it with previous homography
    M_new, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
    M = M_new @ M

    # Warp augmented layer and masks
    warped_al = cv2.warpPerspective(al, M, (w,h))
    warped_al_mask = cv2.warpPerspective(al_mask, M, (w,h), flags=cv2.INTER_NEAREST)
    warped_object_mask = cv2.warpPerspective(object_mask, M, (w,h))

    # Split augmented layer's mask into its two parts
    warped_al_logo_mask, warped_al_text_mask = splitAlMask(warped_al_mask)

    curr_frame = removeAuthorText(curr_frame)

    # Match text's background in the augmented layer with the one on the book 
    warped_al = textBackgroundMatching(warped_al)

    prev_frame = np.copy(curr_frame)
    prev_frame[warped_object_mask==BLACK] = BLACK

    kp_prev = tuple(kp_curr)
    des_prev = np.copy(des_curr)

    # Place both parts of augmented layer on current frame and write to video
    curr_frame[warped_al_logo_mask>THRESHOLD_LOGO_MASK] = warped_al[warped_al_logo_mask>THRESHOLD_LOGO_MASK]
    curr_frame[warped_al_text_mask==WHITE] = warped_al[warped_al_text_mask==WHITE]
    
    out.write(cv2.cvtColor(curr_frame, cv2.COLOR_RGB2BGR))
    
    frame_num += 1
    if frame_num % 15 == 0:
        print(round((27*15-frame_num)/15), "sec of video left.")
    

cap.release()
out.release()

In [None]:
def removeAuthorText(img):
    # generate (word, box) tuples 
    prediction_groups = keras_ocr.pipeline.Pipeline().recognize([img])
    mask = np.zeros(img.shape[:2], dtype="uint8")
    for box in prediction_groups[0]:
        if box[0] in ['richard', 'hartley', 'and', 'andrew', 'zisserman']:
            x0, y0 = box[1][0]
            x1, y1 = box[1][1] 
            x2, y2 = box[1][2]
            x3, y3 = box[1][3] 

            x_mid0, y_mid0 = int((x1 + x2)/2), int((y1 + y2)/2)
            x_mid1, y_mi1 = int((x0 + x3)/2), int((y0 + y3)/2)
        
            thickness = int(sqrt( (x2 - x1)**2 + (y2 - y1)**2 ))
        
            cv2.line(mask, (x_mid0, y_mid0), (x_mid1, y_mi1), 255, thickness)
            img = cv2.inpaint(img, mask, 7, cv2.INPAINT_NS)
                 
    return(img)

img_in = cv2.imread('imgs/ReferenceFrame.png')
img_in = cv2.cvtColor(img_in, cv2.COLOR_BGR2RGB)

out = removeAuthorText(img_in)

plt.imshow(out)

In [None]:
def removeAuthorText(img):
    # generate (word, box) tuples 
    prediction_groups = keras_ocr.pipeline.Pipeline().recognize([img])
    mask = np.zeros(img.shape[:2], dtype="uint8")
    for box in prediction_groups[0]:
        if box[0] in ['richard', 'hartley', 'and', 'andrew', 'zisserman']:
            x0, y0 = box[1][0]
            x1, y1 = box[1][1] 
            x2, y2 = box[1][2]
            x3, y3 = box[1][3] 
            
            print(x0, y0, ' \n', x1, y1, ' \n', x2, y2, ' \n', x3, y3, ' \n',)
            x_mid0, y_mid0 = int((x1 + x2)/2), int((y1 + y2)/2)
            x_mid1, y_mi1 = int((x0 + x3)/2), int((y0 + y3)/2)
        
            thickness = int(sqrt( (x2 - x1)**2 + (y2 - y1)**2 ))
        
            cv2.line(mask, (x_mid0, y_mid0), (x_mid1, y_mi1), 255, thickness)
            img = cv2.inpaint(img, mask, 7, cv2.INPAINT_NS)
                 
    return(img)

img_in = cv2.imread('imgs/ReferenceFrame.png')
img_in = cv2.cvtColor(img_in, cv2.COLOR_BGR2RGB)

out = removeAuthorText(img_in)

plt.imshow(out)

In [None]:
img = cv2.imread('imgs/ReferenceFrame.png')
img = cv2.cvtColor(img_in, cv2.COLOR_BGR2RGB)


def removeAuthorText(img, M, corners_org):
    
    
    corners = cv2.perspectiveTransform(corners_org, M)[0]

    x_mid0, y_mid0 = int((corners[1][0] + corners[2][0])/2), int((corners[1][1] + corners[2][1])/2)
    x_mid1, y_mi1 = int((corners[0][0] + corners[3][0])/2), int((corners[0][1] + corners[3][1])/2)

    thickness = int(sqrt( (corners[2][0] - corners[1][0])**2 + (corners[2][1] - corners[1][1])**2 ))
    
    mask = np.zeros(img.shape[:2], dtype="uint8")

    cv2.line(mask, (x_mid0, y_mid0), (x_mid1, y_mi1), 255, thickness+1)
    img = cv2.inpaint(img, mask, 7, cv2.INPAINT_NS)
    
    return img

M = np.eye(3)

text_corners = np.array([[232.0, 345.0],
                         [455.0, 345.0],
                         [455.0, 361.0],
                         [232.0, 361.0]], dtype='float32').reshape(1, -1, 2)

imgout = removeAuthorText(img, M, text_corners)

plt.imshow(imgout)
plt.show()