In [1]:
import numpy as np
import cv2
from config import *

IMAGE_PATH="images/final.jpg"
img  = cv2.imread(IMAGE_PATH)

In [2]:
# Converting the image from BGR format to HSV
hsv_image = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)

In [3]:
# For white color
max_white = np.array([0,0,255])
min_white = np.array([0,0,230])
final_mask = cv2.inRange(hsv_image, min_white, max_white)

In [4]:
# Fetching the matching components in the original image
result = cv2.bitwise_and(img,img, mask=final_mask)

In [5]:
# Deriving Contours for the Image
contours, hierarchy = cv2.findContours(final_mask, cv2.RETR_EXTERNAL,  cv2.CHAIN_APPROX_NONE)

In [6]:
# Copying the image and checking whether contours are matching.
copy_img = img.copy()
cv2.drawContours(copy_img, contours, -1, (0,255,0), 1)

array([[[39, 47, 76],
        [39, 47, 76],
        [39, 47, 76],
        ...,
        [ 8,  6,  5],
        [ 8,  6,  5],
        [ 8,  6,  5]],

       [[40, 48, 77],
        [40, 48, 77],
        [40, 48, 77],
        ...,
        [ 8,  6,  5],
        [ 8,  6,  5],
        [ 8,  6,  5]],

       [[39, 47, 76],
        [39, 47, 76],
        [39, 47, 76],
        ...,
        [ 8,  6,  5],
        [ 8,  6,  5],
        [ 8,  6,  5]],

       ...,

       [[ 8,  6,  5],
        [ 8,  6,  5],
        [ 8,  6,  5],
        ...,
        [ 8,  6,  5],
        [ 8,  6,  5],
        [ 8,  6,  5]],

       [[ 8,  6,  5],
        [ 8,  6,  5],
        [ 8,  6,  5],
        ...,
        [ 8,  6,  5],
        [ 8,  6,  5],
        [ 8,  6,  5]],

       [[ 8,  6,  5],
        [ 8,  6,  5],
        [ 8,  6,  5],
        ...,
        [ 8,  6,  5],
        [ 8,  6,  5],
        [ 8,  6,  5]]], dtype=uint8)

In [7]:
total = copy_img.shape[0] + copy_img.shape[1]

In [8]:
show_image("contour", copy_img)
cv2.imwrite("contour_final.jpg", copy_img)

True

In [9]:
# Calculating Area for each Contour
area = []
sorted_contours = sorted(contours, key=cv2.contourArea, reverse=True)
for c in sorted_contours:
    area.append(cv2.contourArea(c))

In [10]:
# Getting the top Indices of contoured Images that matter
top_indices = [i for i in range(len(area)) if area[i] > np.mean(area)]

In [11]:
top_indices

[0, 1, 2]

In [12]:
# Cropping out Images that matter
image_data = np.zeros((len(top_indices), 4), dtype=int)
actual_images = []
for i in top_indices:
    x,y,w,h = cv2.boundingRect(sorted_contours[i])
    ROI = img[y:y+h, x:x+w]
    image_data[i] = np.array([x, y, w, h])
    actual_images.append(ROI)

In [13]:
# Calling the OCR Engine
try:
    from PIL import Image
except ImportError:
    import Image
import pytesseract

# If you don't have tesseract executable in your PATH, include the following:
pytesseract.pytesseract.tesseract_cmd = r'D:/Tesseract/tesseract.exe'

In [14]:
image_data

array([[ 191,   55, 1097, 1337],
       [ 113, 1853,  671,  483],
       [1336, 2021,  527,  315]])

In [15]:
(x_total, y_total, _) = img.shape

In [16]:
index = image_data[:,0]/x_total + image_data[:,1]/y_total

In [17]:
image_data = np.column_stack((index, image_data))

In [18]:
import pandas as pd
final_data = pd.DataFrame(image_data, columns=['i','x','y','w','h'])

In [19]:
sortis = final_data.sort_values(by=['i']).index

In [20]:
sortis

Int64Index([0, 1, 2], dtype='int64')

In [21]:
for i in sortis:
    cv2.imwrite('manga_test_images/%d.png'%i,actual_images[i])
    print(pytesseract.image_to_string(actual_images[i], timeout=4))

ATTENTION,
NEW YORKERS--
--ALL

7,927,285
OF YOU.

FOR THOSE UNFAMILIAR
WITH ME, I AM THE 7A/VKER.
I AM THE SOURCE
OF THESE GRAY VERMIN,
MINIATURE REPLICAS OF MY
AWESOME ANPRO/O.

IN CASE
YOU WERE EVER
CURIOUS, ATA
RATIO OF 1-TO-2.879,
I CALCULATE THE
NUMBER OF RATS
IN MANHATTAN WITHIN FOUR

AT 22,822,654. HOURS, AT
THEIR CURRENT
RATE OF DUPLICATION, MY
MINI-ANDROIDS WILL EXCEED THE
RAT POPULATION BY 522.4%--

--UNLESS TAM
PAlO EXACTLY

ONE 8/LL10N
DOLLARS’

WORTH OF
VIBRAMIUM er
MIOMIGHT!
SADLY, IT PREDICT
THE CITY WILL
PAY MY RANSOM

62 MINUTES LATE,
RESULTING IN THE

UNNECESSARY LOSS

OF 382 LIVES

AND £426.9M

IN DAMAGE.
MOST OF YOUR
COSTUMED HEROES,
SUCH AS SP/DER-MAN
AND BAREDEVIL,
WILL BE TOO BUSY
RESCUING (WNOCENTS
TO STOP ME.
ONLY THE NEw,
CAN COME CLOSE

TO OPPOS/ING ME...

 
  
  

POWER LEVELS,
THEIR CHANCE OF
SUCCESS RESTS
AT 2.9%!


In [22]:
sample = img.copy()
image_data = image_data.astype(int)

In [23]:
import re
from PIL import Image, ImageDraw, ImageFont
import textwrap
from googletrans import Translator

translator = Translator()

for i in sortis:
    # Extracting Text from the Image
    some_text = pytesseract.image_to_string(actual_images[i], timeout=4)
    some_text = re.sub(r'\n\s*\n', ' ', some_text) 
    [_, x1, y1, w1, h1] = image_data[i]
    some_text = textwrap.wrap(some_text, width=20)
    whole_text = ""
    for one in some_text:
        whole_text += one + "\n"
    balloon_box = Image.new('RGB', (w1, h1), color = (255, 255, 255))
    
    # Translating Text before writing
    whole_text = translator.translate(whole_text, src='en', dest='fr')
    
    # Writing Text onto the newly created Image
    d = ImageDraw.Draw(balloon_box)
    font = ImageFont.truetype("fonts/SF_Arch_Rival.ttf", size=25)
    d.text((10,10), whole_text.text, fill=(0,0,0), align="left", font=font)
    
    # Converting Image to BGR format suited for Opencv and constructing Borders
    opencvImage = cv2.cvtColor(np.array(balloon_box), cv2.COLOR_RGB2BGR)
    opencvImage = cv2.copyMakeBorder(src=opencvImage,top=5,bottom=5,left=5,right=5,borderType=cv2.BORDER_CONSTANT, value=[0,0,0])
    
    # Superimposing images on the Comic Image
    sample[y1:opencvImage.shape[0]+y1,x1:opencvImage.shape[1]+x1,:] = opencvImage

In [24]:
cv2.imwrite("output.jpg", sample)

True