In [2]:
%cd open-mantra-dataset

D:\manga_translation\open-mantra-dataset


In [17]:
import json
import os
from shutil import copyfile

def create_yolo_annotation(book_title, page_index, image_path, annotations, image_width, image_height):
    yolo_annotations = []
    for annotation in annotations:
        x, y, w, h = annotation['x'], annotation['y'], annotation['w'], annotation['h']
        # Normalize coordinates
        x_center = (x + w / 2) / image_width
        y_center = (y + h / 2) / image_height
        w_normalized = w / image_width
        h_normalized = h / image_height
        yolo_annotations.append(f"0 {x_center} {y_center} {w_normalized} {h_normalized}")

    return yolo_annotations

def process_books(annotation_data, output_folder):
    for book in annotation_data:
        book_title = book['book_title']
        pages = book['pages']

        for page in pages:
            page_index = page['page_index']
            image_paths = page['image_paths']
            text_annotations = page['text']

            for lang, image_path in image_paths.items():
                # Get image size
                image = Image.open(image_path)
                image_width, image_height = image.size

                # Create YOLO annotation file
                yolo_annotations = create_yolo_annotation(book_title, page_index, image_path, text_annotations, image_width, image_height)

                # Save YOLO annotations to a file
                label_file_path = os.path.join(output_folder, f"{book_title}-{page_index:03d}.txt")
                with open(label_file_path, 'w') as label_file:
                    label_file.write('\n'.join(yolo_annotations))

                # Copy image to images folder
                image_file_path = os.path.join(output_folder, f"{book_title}-{page_index:03d}.jpg")
                copyfile(image_path, image_file_path)

if __name__ == "__main__":
    from PIL import Image

    # Load the annotation data from the JSON file
    with open('annotation.json', 'r', encoding='utf-8') as file:
        annotation_data = json.load(file)

    # Specify the output folder
    output_folder = 'data'

    # Process books and create YOLO annotations
    process_books(annotation_data, output_folder)


In [15]:
import os
import json
from PIL import Image

# Define paths
annotation_file_path = "annotation.json"
output_folder = "data-2"

# Create output folder if it doesn't exist
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# Read annotation data from the JSON file
with open(annotation_file_path, 'r', encoding='utf-8') as f:
    annotation_data = json.load(f)

# Iterate through the pages and annotations
for page in annotation_data[0]["pages"]:
    page_index = page["page_index"]
    image_path = page["image_paths"]["ja"]
    
    for index, annotation in enumerate(page["text"]):
        x, y, w, h = annotation["x"], annotation["y"], annotation["w"], annotation["h"]
        text_ja = annotation["text_ja"]
        
        # Open the image and crop the bounding box
        image = Image.open(image_path)
        cropped_image = image.crop((x, y, x + w, y + h))
        
        # Save the cropped image
        output_path = os.path.join(output_folder, f"book1-{page_index:03d}-{index + 1:03d}.jpg")
        cropped_image.save(output_path)
        
        # Write the path and Japanese text to the text file
        with open("text_file.txt", 'a', encoding='utf-8') as text_file:
            text_file.write(f"{output_path}\t{text_ja}\n")


In [8]:
annotation_data

[{'book_title': 'tojime_no_siora',
  'pages': [{'page_index': 1,
    'image_paths': {'ja': 'images/tojime_no_siora/ja/000.jpg'},
    'frame': [{'x': 9, 'y': 84, 'w': 733, 'h': 1080}],
    'text': [{'x': 82,
      'y': 952,
      'w': 668,
      'h': 107,
      'text_ja': '綴じ眼のシオラ',
      'text_en': 'bound eye siora',
      'text_zh': '縫眼的希奧拉'},
     {'x': 515,
      'y': 1057,
      'w': 254,
      'h': 85,
      'text_ja': '朽鷹みつき',
      'text_en': 'Mitsuki Kuchitaka',
      'text_zh': '朽鷹美月'}]},
   {'page_index': 2,
    'image_paths': {'ja': 'images/tojime_no_siora/ja/001.jpg'},
    'frame': [{'x': 61, 'y': 0, 'w': 768, 'h': 220},
     {'x': 179, 'y': 242, 'w': 597, 'h': 198},
     {'x': 334, 'y': 460, 'w': 495, 'h': 709},
     {'x': 72, 'y': 470, 'w': 240, 'h': 615}],
    'text': [{'x': 172,
      'y': 194,
      'w': 126,
      'h': 229,
      'text_ja': 'だからっ',
      'text_en': "I'm telling you!!",
      'text_zh': '所以...'},
     {'x': 692,
      'y': 519,
      'w': 98,
      'h'

In [1]:
# Open the file in read mode
with open('open-mantra-dataset/text_file.txt', 'r', encoding='utf-8') as file:
    # Read all lines from the file
    lines = file.readlines()

# Initialize an empty set to store unique characters
unique_characters = set()

# Iterate through each line
for line in lines:
    # Split the line based on the tab character (\t)
    parts = line.split('\t')
    
    # Extract the characters after the tab and before the newline
    if len(parts) > 1:
        characters = parts[1].strip()
        
        # Add each character to the set
        unique_characters.update(characters)

# Print the unique characters
print("Unique characters:", unique_characters)


Unique characters: {'程', '受', '下', '駄', '稽', '緒', 'へ', 'ち', '探', '銃', '力', '長', '療', 'チ', '故', '仕', '死', 'ら', '恩', '好', '大', '郷', '名', '末', '散', '高', '7', 'み', '朽', '荒', '誰', '背', 'ぞ', 'ズ', '比', '夜', '借', 'の', '介', 'る', '席', '契', 'ご', 'う', 'ネ', '美', '瞼', '迎', '書', '俺', '同', '腰', '取', '遅', '思', '捨', '外', 'タ', '辿', 'イ', 'ク', '始', '自', '人', 'は', 'ず', '食', 'ょ', '身', 'ト', '々', '絶', '以', '!', '紙', '心', '宿', '飛', '持', '煮', 'ド', '呼', '見', 'ろ', '馬', 'ノ', 'そ', '世', '患', '鉛', '少', 'た', '畜', 'ッ', '概', '君', 'ャ', '回', '咎', '約', '絵', '薄', '仇', '満', '件', '窓', '私', 'ぁ', '費', '払', '分', '減', '親', '物', '想', '打', '討', 'ミ', '日', '器', 'ヌ', '時', '庫', '買', '冷', 'で', '量', '弟', '勝', '変', '丈', '恐', 'ジ', 'ぉ', '五', 'バ', '音', '天', 'ふ', '勘', '撃', '川', 'く', '哀', '視', '我', '嬉', '厄', '案', '意', 'ー', '七', '明', '使', '帰', '妙', 'め', '貰', '前', '切', 'フ', '互', '昔', '声', '追', 'オ', '巨', '嘘', '鹿', 'ル', '伝', 'あ', '貴', 'テ', 'グ', 'ア', '家', '望', '待', '夫', '匂', '教', '逃', 'に', '棒', '全', '終', '汽', '困', '血', 'づ', 'せ', '可', '旅', '画', '描', 

In [2]:
len(unique_characters)

441

In [12]:
import os
import cv2
from xml.etree import ElementTree as ET

def crop_images(data_path, output_path):
    annotations_folder = os.path.join(data_path, 'annotations')
    images_folder = os.path.join(data_path, 'images')

    if not os.path.exists(output_path):
        os.makedirs(output_path)

    output_file_path = os.path.join(output_path, 'crop_info.txt')
    with open(output_file_path, 'w', encoding='utf-8') as output_file:
        for annotation_file in os.listdir(annotations_folder):
            if annotation_file.endswith('.xml'):
                xml_path = os.path.join(annotations_folder, annotation_file)
                tree = ET.parse(xml_path)
                root = tree.getroot()

                book_title = root.get('title')
                pages = root.findall('.//page')

                for page in pages:
                    page_index = page.get('index')
                    image_path = os.path.join(images_folder, f'{book_title}/{page_index.zfill(3)}.jpg')

                    image = cv2.imread(image_path)
                    if image is None:
                        continue

                    for text_element in page.findall('.//text'):
                        xmin = int(text_element.get('xmin'))
                        ymin = int(text_element.get('ymin'))
                        xmax = int(text_element.get('xmax'))
                        ymax = int(text_element.get('ymax'))

                        cropped_image = image[ymin:ymax, xmin:xmax]
                        cropped_text = text_element.text.strip()

                        if cropped_text and cropped_image.size > 0:
                            crop_output_path = os.path.join(output_path, f'{book_title}_{page_index}_{text_element.get("id")}.jpg')
                            cv2.imwrite(crop_output_path, cropped_image)

                            output_file.write(f'{crop_output_path}\t{cropped_text}\n')

if __name__ == '__main__':
    data_path = 'Manga109s/Manga109s_released_2023_12_07/'
    output_path = 'crops'

    crop_images(data_path, output_path)


In [5]:
tree = ET.parse("Manga109s/Manga109s_released_2023_12_07/annotations/AisazuNihaIrarenai.xml")
root = tree.getroot()

In [6]:
root

<Element 'book' at 0x000002319577C4A0>

In [9]:
book_title = root.get('title')

In [10]:
book_title

'AisazuNihaIrarenai'

In [19]:
import os
import xml.etree.ElementTree as ET
from shutil import copyfile

def parse_annotations(xml_path, image_width, image_height):
    tree = ET.parse(xml_path)
    root = tree.getroot()

    annotations = []
    for page in root.findall('./pages/page'):
        page_index = page.get('index')
        text_elements = page.findall('./text')
        for text_elem in text_elements:
            text = text_elem.text.strip()
            bbox_elem = text_elem.find('../frame')
            xmin = float(bbox_elem.get('xmin'))
            ymin = float(bbox_elem.get('ymin'))
            xmax = float(bbox_elem.get('xmax'))
            ymax = float(bbox_elem.get('ymax'))
            
            # Normalize bounding box coordinates
            normalized_xmin = xmin / image_width
            normalized_ymin = ymin / image_height
            normalized_xmax = xmax / image_width
            normalized_ymax = ymax / image_height

            annotations.append((page_index, text, normalized_xmin, normalized_ymin, normalized_xmax, normalized_ymax))
    
    return annotations

def convert_to_yolo_format(image_folder, annotation_folder, output_folder):
    
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    for book_folder in os.listdir(annotation_folder):
        book_annotations_path = os.path.join(annotation_folder, book_folder, f'{book_folder}.xml')
        print("run")
        if os.path.isfile(book_annotations_path):
            file_name = os.path.splitext(book_folder)[0]
            image_path = os.path.join(image_folder, f'{file_name}.jpg')
            image_width, image_height = get_image_dimensions(image_path)
            annotations = parse_annotations(book_annotations_path, image_width, image_height)

            with open(os.path.join(output_folder, f'{file_name}.txt'), 'w') as yolo_file:
                for _, text, normalized_xmin, normalized_ymin, normalized_xmax, normalized_ymax in annotations:
                    yolo_file.write(f'{text} {normalized_xmin} {normalized_ymin} {normalized_xmax} {normalized_ymax}\n')
                    
            copyfile(image_path, os.path.join(output_folder, f'{file_name}.jpg'))

def get_image_dimensions(image_path):
    from PIL import Image
    with Image.open(image_path) as img:
        return img.size

# if __name__ == "__main__":
data_folder = "Manga109s\Manga109s_released_2023_12_07"
new_data_folder = "new_data"

images_folder = os.path.join(data_folder, 'images')
annotations_folder = os.path.join(data_folder, 'annotations')

new_images_folder = os.path.join(new_data_folder, 'images')
new_labels_folder = os.path.join(new_data_folder, 'labels')

if not os.path.exists(new_images_folder):
    os.makedirs(new_images_folder)

if not os.path.exists(new_labels_folder):
    os.makedirs(new_labels_folder)

convert_to_yolo_format(images_folder, annotations_folder, new_data_folder)


run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
run
