In [3]:
import os
import re


# 1. 각 파일을 읽고
def read_readme_from_directories():
    dir_pattern = r"^\d{2}-"

    result = []
    for dir_name in os.listdir():
        if os.path.isdir(os.path.join(dir_name)) and re.match(dir_pattern, dir_name):
            readme_path = os.path.join(dir_name, "README.md")
            if os.path.exists(readme_path):
                with open(readme_path, "r", encoding="utf-8") as readme_file:
                    content = readme_file.read()
                result.append(split_markdown(content))
    return result


# 2. 헤딩 (#, ##, ### …) 별로 나눈 뒤
def split_markdown(content):
    pattern = r"^(#+\s)"
    sections = re.split(pattern, content, flags=re.MULTILINE)[1:]
    return [sections[i] + sections[i + 1] for i in range(0, len(sections), 2)]


readme_files = read_readme_from_directories()

In [4]:
%pip install python-dotenv openai

Collecting python-dotenv
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Collecting openai
  Downloading openai-1.48.0-py3-none-any.whl.metadata (24 kB)
Collecting anyio<5,>=3.5.0 (from openai)
  Downloading anyio-4.6.0-py3-none-any.whl.metadata (4.6 kB)
Collecting distro<2,>=1.7.0 (from openai)
  Downloading distro-1.9.0-py3-none-any.whl.metadata (6.8 kB)
Collecting httpx<1,>=0.23.0 (from openai)
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting jiter<1,>=0.4.0 (from openai)
  Downloading jiter-0.5.0-cp312-cp312-macosx_11_0_arm64.whl.metadata (3.6 kB)
Collecting pydantic<3,>=1.9.0 (from openai)
  Downloading pydantic-2.9.2-py3-none-any.whl.metadata (149 kB)
Collecting sniffio (from openai)
  Downloading sniffio-1.3.1-py3-none-any.whl.metadata (3.9 kB)
Collecting tqdm>4 (from openai)
  Downloading tqdm-4.66.5-py3-none-any.whl.metadata (57 kB)
Collecting typing-extensions<5,>=4.11 (from openai)
  Downloading typing_extensions-4.12.2-py3-none-an

In [None]:
def save_to_file(file_path, content):
    os.makedirs(os.path.dirname(file_path), exist_ok=True)

    with open(file_path, "w") as file:
        file.write(content)

In [5]:
from openai import OpenAI
from dotenv import load_dotenv

load_dotenv()
client = OpenAI()


# 3. GPT-4o 를 사용하여 번역
def translate_text(text):
    completion = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {
                "role": "system",
                "content": "You are an AI expert. Your job is to translate English markdown to Korean markdown.",
            },
            {"role": "user", "content": text},
        ],
    )
    print(completion)
    return completion.choices[0].message.content


for i, file in enumerate(readme_files):
    content = ""
    for j, section in enumerate(file):
        content += translate_text(section) + "\n\n"
    save_to_file(f"translations/ko/{i:02d}.md", content)

ChatCompletion(id='chatcmpl-ABcIc8Bmb06CAcMK7bBxqPEDTR7RQ', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='# LLM 미세 조정\n\n대형 언어 모델을 사용하여 생성형 AI 애플리케이션을 구축하는 것은 새로운 과제를 수반합니다. 주요 문제는 사용자의 요청에 대해 모델이 생성하는 콘텐츠의 품질(정확성과 관련성)을 보장하는 것입니다. 이전 수업에서는 기존 모델에 대한 _프롬프트 입력을 수정_하여 문제를 해결하려고 하는 프롬프트 엔지니어링 및 검색 증강 생성과 같은 기술을 논의했습니다.\n\n오늘 수업에서는 세 번째 기술인 **미세 조정**에 대해 논의합니다. 이는 추가 데이터를 사용하여 _모델 자체를 재교육_ 함으로써 문제를 해결하려고 합니다. 세부 사항을 자세히 살펴보겠습니다.', refusal=None, role='assistant', function_call=None, tool_calls=None))], created=1727332194, model='gpt-4o-2024-05-13', object='chat.completion', service_tier=None, system_fingerprint='fp_f82f5b050c', usage=CompletionUsage(completion_tokens=163, prompt_tokens=149, total_tokens=312, completion_tokens_details=CompletionTokensDetails(reasoning_tokens=0)))
ChatCompletion(id='chatcmpl-ABcIetc9P7elrkEiqlFIqrPIM2JXO', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=

KeyboardInterrupt: 