In [5]:
import pandas as pd
with open('./data/machine_data.txt') as f:
    lines = f.readlines()
data = [line.strip().split('\t') for line in lines]
df = pd.DataFrame(data[1:], columns=data[0])
print(df.columns)
df


Index(['Year', 'Month', 'Day', 'Hour', 'Minute', 'operation'], dtype='object')


Unnamed: 0,Year,Month,Day,Hour,Minute,operation
0,2021,1,29,1,14,error
1,2021,1,29,1,14,stopped
2,2021,1,29,1,15,operating
3,2021,1,29,1,15,error
4,2021,1,29,1,15,stopped
...,...,...,...,...,...,...
197,2021,1,30,15,49,stopped
198,2021,1,30,15,50,operating
199,2021,1,30,15,50,stopped
200,2021,1,30,15,51,error


In [6]:
import os
from pathlib import Path

# 데이터 파일들이 있는 폴더 경로
input_folder = Path('./data')
output_folder = Path("./kor_logs")
output_folder.mkdir(exist_ok=True)

# 전처리 함수
def create_sentence(row):
    date = row['Day']
    month = row['Month']
    year = row['Year']
    hour = row['Hour']
    minute = row['Minute']
    operation = row['operation']
    time = f"{year}년 {month}월 {date}일 {hour}시 {minute}분"
    if operation == "error":
        return f"{time}에 기계에서 오류가 발생했습니다."
    elif operation == "stopped":
        return f"{time}에 기계가 정지되었습니다."
    elif operation == "operating":
        return f"{time}에 기계가 정상적으로 작동 중입니다."
    else:
        return f"{time}에 기계 상태가 '{operation}'로 기록되었습니다."

# 처리할 파일 리스트 (simulated log들만 대상으로)
files_to_process = sorted(input_folder.glob("*.txt"))
output_files = []

# 각 파일에 대해 처리
for file_path in files_to_process:
    df = pd.read_csv(file_path, sep="\t")
    df['Sentence'] = df.apply(create_sentence, axis=1)

    out_name = file_path.stem + "_kor.txt"
    out_path = output_folder / out_name
    with open(out_path, "w", encoding="utf-8") as f:
        for line in df['Sentence']:
            f.write(line + "\n")
    output_files.append(str(out_path))

output_files


['kor_logs\\machine_data_kor.txt',
 'kor_logs\\machine_data_simulated_1_kor.txt',
 'kor_logs\\machine_data_simulated_2_kor.txt',
 'kor_logs\\machine_data_simulated_3_kor.txt']

In [7]:
from reportlab.pdfgen import canvas
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
from reportlab.lib.pagesizes import A4
from pathlib import Path

# 폰트 등록
pdfmetrics.registerFont(TTFont('NanumGothic', './utils/NanumGothic.ttf'))

# kor_logs 내 txt 파일 PDF로 하나씩 변환
input_dir = Path("./kor_logs")
output_dir = Path("./kor_pdfs")
output_dir.mkdir(exist_ok=True)

for txt_file in input_dir.glob("*.txt"):
    pdf_path = output_dir / (txt_file.stem + ".pdf")
    c = canvas.Canvas(str(pdf_path), pagesize=A4)
    c.setFont("NanumGothic", 12)
    width, height = A4
    y = height - 40

    with open(txt_file, "r", encoding="utf-8") as f:
        for line in f:
            c.drawString(40, y, line.strip())
            y -= 15
            if y < 40:
                c.showPage()
                c.setFont("NanumGothic", 12)
                y = height - 40

    c.save()
    print(f"✅ 생성 완료: {pdf_path}")


TTFError: Not a recognized TrueType font: version=0x1F8B0800