Skip to content

Detect text dialogue on manga using YoloV3 model trained with Manga109 dataset

Notifications You must be signed in to change notification settings

madeyoga/detect-manga-dialogue

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

37 Commits
 
 
 
 
 
 
 
 
 
 
 
 

Repository files navigation

Detect text dialogue

Detect dialogue on manga pages using yolov3 trained with Manga109 dataset

Darknet YoloV3 Object Detection & Manga109 Dataset

Download the trained yolov3_manga109_weights and the configuration_file

Use cv2 for detection

import cv2
import numpy as np
import glob
import random
import matplotlib.pyplot as plt

# Load yolov3 model configuration & the weights
net = cv2.dnn.readNet("yolov3_manga109_v2_5000.weights", "yolov3.cfg")

# Get all the image path from the test folder.
images_path = glob.glob(r"test\*.jpg")

layer_names = net.getLayerNames()
output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]

# For each image in test folder
for img_path in images_path:
    # Load image
    img = cv2.imread(img_path)
    img = cv2.resize(img, None, fx=0.5, fy=0.5)
    height, width, channels = img.shape

    # Detecting objects
    blob = cv2.dnn.blobFromImage(img, 0.00392, (512, 512), (0, 0, 0), True, crop=False)

    net.setInput(blob)
    outs = net.forward(output_layers)

    # Showing informations on the screen
    class_ids = []
    confidences = []
    boxes = []
    for out in outs:
        for detection in out:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            
            if confidence > 0.25:
                # Detection output is `nomralized` (center_x, center_y, width, height)
                # Convert back, multiply them by the page width/height.
                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)
                w = int(detection[2] * width)
                h = int(detection[3] * height)

                # Calculate (x,y) to get (x,y,w,h) bbox format
                x = int(center_x - w / 2)
                y = int(center_y - h / 2)

                boxes.append([x, y, w, h])
                confidences.append(float(confidence))
                class_ids.append(class_id)

    indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.45, 0.45)
    for i in range(len(boxes)):
        if i in indexes:
            x, y, w, h = boxes[i]
            cv2.rectangle(img,(x, y),(x + w, y + h),(0, 0, 255), 2)
            cv2.putText(img, 'text', (x, y), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 255), 1)
    plt.imshow(img)
    plt.show()

Output

Manga: PLANET7 page 7

About

Detect text dialogue on manga using YoloV3 model trained with Manga109 dataset

Topics

Resources

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published