In [1]:
import os
import re
from pprint import pprint


def extract_info(file_path):
    # Use regex to match the first two digits and the title
    match = re.match(r"(\d{2})-([\w-]+)", file_path)

    # If a match is found, extract the digits and the title
    if match:
        digits = int(match.group(1))  # Convert digits to integer
        title = match.group(2)
        return digits, title
    else:
        return None, None


# 1. 각 파일을 읽고
def read_readme_from_directories():
    dir_pattern = r"^\d{2}-"

    result = {}
    for dir_name in os.listdir():
        if os.path.isdir(os.path.join(dir_name)) and re.match(dir_pattern, dir_name):
            readme_path = os.path.join(dir_name, "README.md")
            readme_path_ko = os.path.join(dir_name, "translations/ko/README.md")
            if not os.path.exists(readme_path_ko):
                with open(readme_path, "r", encoding="utf-8") as readme_file:
                    content = readme_file.read()
                digits, title = extract_info(dir_name)
                result[digits] = {
                    "title": title,
                    "sections": split_markdown(content),
                }
    return result


# 2. 헤딩 (#, ##, ### …) 별로 나눈 뒤
def split_markdown(content):
    pattern = r"^(#+\s)"
    sections = re.split(pattern, content, flags=re.MULTILINE)[1:]
    return [sections[i] + sections[i + 1] for i in range(0, len(sections), 2)]


readme_files = read_readme_from_directories()
pprint(readme_files)

{14: {'sections': ['# The Generative AI Application Lifecycle\n'
                   '\n'
                   'An important question for all AI applications is the '
                   'relevance of AI features, as AI is a fast evolving field, '
                   'to ensure that your application remains relevant, '
                   'reliable, and robust, you need to monitor, evaluate, and '
                   'improve it continuously. This is where the generative AI '
                   'lifecycle comes in.\n'
                   '\n'
                   'The generative AI lifecycle is a framework that guides you '
                   'through the stages of developing, deploying, and '
                   'maintaining a generative AI application. It helps you to '
                   'define your goals, measure your performance, identify your '
                   'challenges, and implement your solutions. It also helps '
                   'you to align your application with the ethical an

In [2]:
%pip install python-dotenv openai pinecone

Note: you may need to restart the kernel to use updated packages.


In [3]:
def save_to_file(file_path, content):
    os.makedirs(os.path.dirname(file_path), exist_ok=True)

    with open(file_path, "w") as file:
        file.write(content)

In [4]:
import asyncio
from openai import AsyncOpenAI
from dotenv import load_dotenv
from pinecone import Pinecone
import os

load_dotenv()
client = AsyncOpenAI()
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))


# 1. 검색어 임베딩으로 변환
async def to_embedding(text):
    return (
        (
            await client.embeddings.create(
                model="text-embedding-3-small",
                input=text,
                encoding_format="float",
            )
        )
        .data[0]
        .embedding
    )


# 2. 임베딩된 검색어 기존 Pinecone DB 에서 검색 (가장 유사도 높은것 1개)
def query_embedding(vector):
    index = pc.Index("gen-ai")
    return index.query(
        namespace="ns1",
        vector=vector,
        top_k=1,
        include_values=False,
        include_metadata=True,
    )["matches"][0]


async def translate_text(text):
    print(f"Searching for translation of: {text}...\n")
    embedding = await to_embedding(text)
    query = query_embedding(embedding)
    print(f"its query: {query}\n")
    # 3. 쿼리 결과를 컨텍스트로 넘기고, 번역
    completion = await client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {
                "role": "system",
                "content": "You are an AI expert. Your job is to translate English markdown to Korean markdown. Output should be a markdown string that can be saved to a file. Omit ```",
            },
            {"role": "user", "content": text},
            {
                "role": "assistant",
                "content": f"""Based on the previous translation: 
                English: {query["metadata"]["en"]}
                Korean: {query["metadata"]["ko"]}
            """,
            },
        ],
    )
    print(completion)
    return completion.choices[0].message.content


async def translate_file(file, i):
    tasks = [translate_text(section) for section in file["sections"]]
    translated_text = await asyncio.gather(*tasks)
    content = "\n\n".join(translated_text)
    save_to_file(f"{i:02d}-{file['title']}/translations/ko/README.md", content)


tasks = [translate_file(file, i) for i, file in list(readme_files.items())]
await asyncio.gather(*tasks)

  from tqdm.autonotebook import tqdm


Searching for translation of: # The Generative AI Application Lifecycle

An important question for all AI applications is the relevance of AI features, as AI is a fast evolving field, to ensure that your application remains relevant, reliable, and robust, you need to monitor, evaluate, and improve it continuously. This is where the generative AI lifecycle comes in.

The generative AI lifecycle is a framework that guides you through the stages of developing, deploying, and maintaining a generative AI application. It helps you to define your goals, measure your performance, identify your challenges, and implement your solutions. It also helps you to align your application with the ethical and legal standards of your domain and your stakeholders. By following the generative AI lifecycle, you can ensure that your application is always delivering value and satisfying your users.

...

Searching for translation of: ## Introduction

In this chapter, you will:

- Understand the Paradigm Shift 

[None]