# 라이브러리 다운로드(Colab 환경)

In [5]:
!pip install paddleocr
!pip install paddlepaddle-gpu==2.6.2 -f https://www.paddlepaddle.org.cn/whl/mkl/avx/stable.html
!pip install deep-translator
!pip install git+https://github.com/ssut/py-hanspell.git

Collecting paddleocr
  Using cached paddleocr-2.10.0-py3-none-any.whl.metadata (12 kB)
Collecting pyclipper (from paddleocr)
  Using cached pyclipper-1.3.0.post6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.0 kB)
Collecting lmdb (from paddleocr)
  Using cached lmdb-1.6.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.1 kB)
Collecting rapidfuzz (from paddleocr)
  Downloading rapidfuzz-3.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting python-docx (from paddleocr)
  Downloading python_docx-1.1.2-py3-none-any.whl.metadata (2.0 kB)
Collecting fire>=0.3.0 (from paddleocr)
  Downloading fire-0.7.0.tar.gz (87 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m87.2/87.2 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Downloading paddleocr-2.10.0-py3-none-any.whl (2.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [

# 가정통신문 이미지에서 OCR 통해 텍스트 추출 및 번역

In [6]:
import paddle
print("GPU 지원 여부:", paddle.is_compiled_with_cuda())
print("사용 중인 디바이스:", paddle.device.get_device())


GPU 지원 여부: True
사용 중인 디바이스: gpu:0


In [7]:
import cv2
from paddleocr import PaddleOCR
from PIL import Image
import numpy as np
from hanspell import spell_checker
from deep_translator import GoogleTranslator

# 이미지 확대 함수
def resize_image(image_path, scale=2.0, save_path='/content/resized_image.jpg'):
    img = cv2.imread(image_path)
    height, width = img.shape[:2]
    resized = cv2.resize(img, (int(width * scale), int(height * scale)), interpolation=cv2.INTER_LINEAR)
    cv2.imwrite(save_path, resized)
    return save_path

# 이미지 확대
image_path = '/content/가정통신문3.jpeg'
resized_path = resize_image(image_path)

# PaddleOCR 로드
ocr = PaddleOCR(
    use_gpu=True,
    lang='korean',
    det_db_box_thresh=0.6,
    rec_algorithm='SVTR_LCNet',
    drop_score=0.5
)

# OCR 수행
results = ocr.ocr(resized_path, cls=True)[0]

# 5. 단어 단위 박스 수집
words = []
for box, (text, conf) in results:
    if conf < 0.5:
        continue
    x_center = sum(pt[0] for pt in box) / 4
    y_center = sum(pt[1] for pt in box) / 4
    words.append({'text': text, 'x': x_center, 'y': y_center})

#  y 중심 기준 줄 그룹화
lines = []
threshold = 15
for word in sorted(words, key=lambda w: w['y']):
    matched = False
    for line in lines:
        if abs(line['y'] - word['y']) < threshold:
            line['words'].append(word)
            matched = True
            break
    if not matched:
        lines.append({'y': word['y'], 'words': [word]})

# 줄 정렬 및 병합
raw_lines = []
for line in lines:
    sorted_words = sorted(line['words'], key=lambda w: w['x'])
    merged_line = ' '.join(w['text'] for w in sorted_words)
    raw_lines.append(merged_line)

# 줄 단위 맞춤법 교정
corrected_lines = []
for line in raw_lines:
    try:
        result = spell_checker.check(line)
        corrected = result.checked.strip()
        corrected_lines.append(corrected if corrected else line)
    except:
        corrected_lines.append(line)

# 병합 후 번역
corrected_paragraph = '\n'.join(corrected_lines)
try:
    vi = GoogleTranslator(source='ko', target='vi').translate(corrected_paragraph)
    zh = GoogleTranslator(source='ko', target='zh-CN').translate(corrected_paragraph)
    en = GoogleTranslator(source='ko', target='en').translate(corrected_paragraph)
except:
    vi = zh = en = "[번역 실패]"

# 저장
final_paragraph = (
    "[원문]\n" + corrected_paragraph + "\n\n"
    "[베트남어 번역]\n" + vi + "\n\n"
    "[중국어 번역]\n" + zh + "\n\n"
    "[영어 번역]\n" + en
)

with open("translated_paragraph.txt", "w", encoding="utf-8") as f:
    f.write(final_paragraph)

print("completed")


download https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/Multilingual_PP-OCRv3_det_infer.tar to /root/.paddleocr/whl/det/ml/Multilingual_PP-OCRv3_det_infer/Multilingual_PP-OCRv3_det_infer.tar


100%|██████████| 3762/3762 [00:16<00:00, 227.13it/s] 


download https://paddleocr.bj.bcebos.com/PP-OCRv4/multilingual/korean_PP-OCRv4_rec_infer.tar to /root/.paddleocr/whl/rec/korean/korean_PP-OCRv4_rec_infer/korean_PP-OCRv4_rec_infer.tar


100%|██████████| 23810/23810 [00:18<00:00, 1258.15it/s] 


download https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar to /root/.paddleocr/whl/cls/ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v2.0_cls_infer.tar


100%|██████████| 2138/2138 [00:15<00:00, 134.73it/s]

[2025/05/05 14:52:30] ppocr DEBUG: Namespace(help='==SUPPRESS==', use_gpu=True, use_xpu=False, use_npu=False, use_mlu=False, use_gcu=False, ir_optim=True, use_tensorrt=False, min_subgraph_size=15, precision='fp32', gpu_mem=500, gpu_id=0, image_dir=None, page_num=0, det_algorithm='DB', det_model_dir='/root/.paddleocr/whl/det/ml/Multilingual_PP-OCRv3_det_infer', det_limit_side_len=960, det_limit_type='max', det_box_type='quad', det_db_thresh=0.3, det_db_box_thresh=0.6, det_db_unclip_ratio=1.5, max_batch_size=10, use_dilation=False, det_db_score_mode='fast', det_east_score_thresh=0.8, det_east_cover_thresh=0.1, det_east_nms_thresh=0.2, det_sast_score_thresh=0.5, det_sast_nms_thresh=0.2, det_pse_thresh=0, det_pse_box_thresh=0.85, det_pse_min_area=16, det_pse_scale=1, scales=[8, 16, 32], alpha=1.0, beta=1.0, fourier_degree=5, rec_algorithm='SVTR_LCNet', rec_model_dir='/root/.paddleocr/whl/rec/korean/korean_PP-OCRv4_rec_infer', rec_image_inverse=True, rec_image_shape='3, 48, 320', rec_batch_




[2025/05/05 14:52:36] ppocr DEBUG: dt_boxes num : 228, elapsed : 1.2090449333190918
[2025/05/05 14:52:38] ppocr DEBUG: rec_res num  : 228, elapsed : 1.8112437725067139
completed


# 번역 텍스트 이미지로 변형

In [8]:
from PIL import Image, ImageDraw, ImageFont

text_blocks = {
    "vi": ("[베트남어 번역]", vi),
    "zh": ("[중국어 번역]", zh),
    "en": ("[영어 번역]", en),
}

# 폰트 설정
font_path = "/content/D2CodingBold-Ver1.3.2-20180524-ligature.ttf"
font_size = 22
font = ImageFont.truetype(font_path, font_size)
line_spacing = 10

# 이미지 너비 설정
image_width = 1200

for lang, (title, text) in text_blocks.items():
    lines = [title] + text.strip().split('\n')

    # 줄 높이 계산
    dummy_img = Image.new("RGB", (10, 10))
    draw = ImageDraw.Draw(dummy_img)
    line_heights = [font.getbbox(line)[3] - font.getbbox(line)[1] for line in lines]
    total_height = sum(line_heights) + (len(lines) - 1) * line_spacing + 20

    # 이미지 생성
    img = Image.new("RGB", (image_width, total_height), color="white")
    draw = ImageDraw.Draw(img)

    # 텍스트 출력
    x, y = 10, 10
    for line, h in zip(lines, line_heights):
        draw.text((x, y), line, font=font, fill=(0, 0, 0))
        y += h + line_spacing

    # 저장
    img.save(f"translated_{lang}_text.png")
    print(f"translated_{lang}_text.png completed")


translated_vi_text.png completed
translated_zh_text.png completed
translated_en_text.png completed
