In [1]:
from openai import OpenAI
from dotenv import load_dotenv
import os
import openai

load_dotenv()

openai.api_key = os.getenv('OPENAI_API_KEY')

client = OpenAI(max_retries=5)

In [26]:
transcript_prompt = """You will be given an image containing text in various languages. Your task is to transcribe all the content in the image accurately.

Please follow these instructions carefully:

1. Examine the image closely and identify all text present.

2. The text in the image may be in Vietnamese, Chinese, or English. Be prepared to recognize and transcribe text in any of these languages.

3. Transcribe all the text you see in the image, regardless of its language. Do not translate the text; transcribe it in its original language.

4. Maintain the original formatting of the text as much as possible. This includes:
   - Preserving line breaks
   - Keeping text in the same order as it appears in the image
   - Noting any special formatting (e.g., bold, italic, underlined) if it's significant

5. If there are any symbols, numbers, or punctuation marks in the image, include them in your transcription.

6. If any part of the text is unclear or illegible, indicate this by writing [unclear] in place of the unreadable text.

7. Do not include any interpretation or analysis of the text; simply transcribe what you see.

8. Enclose your entire transcription within <transcription> tags.

Here's an example of how your output might be structured:

<transcription>
content
</transcription>

Please proceed with the transcription based on the provided image."""

In [27]:
import base64
image_folder = "./images"

def image_to_base64(image_path):
    with open(image_path, "rb") as image_file:
        encoded_string = base64.b64encode(image_file.read()).decode("utf-8")
    return encoded_string


def make_transcript_prompt(text_content, image_folder="./images"):
    all_images = os.listdir(image_folder)
    image_paths = [os.path.join(image_folder, path) for path in all_images]
    messages = [
        {
            "role": "user",
            "content": []
        }
    ]
    
    messages[0]["content"].append({
        "type": "text",
        "text": text_content
    })
    
    for image_path in image_paths:
        base64_image = image_to_base64(image_path)
        messages[0]["content"].append({
            "type": "image_url",
            "image_url": {
                "url": f"data:image/jpeg;base64,{base64_image}"
            }
        })
    
    return messages

In [28]:
prompt = make_transcript_prompt(text_content=transcript_prompt)
transcription = client.chat.completions.create(messages=prompt, model = "gpt-4o")

In [29]:
print(transcription.choices[0].message.content)

<transcription>
1. 现在 xiànzài （名） (hiện tại) now bây giờ

2. 跟 gēn （介，动）(căn) with; to follow cùng, với

3. 一起 yìqǐ （副）(nhất khởi) together cùng nhau

4. 咱们 zánmen （代）(ta môn) we; us chúng ta, chúng mình

5. 走 zǒu （动）(tẩu) to walk; to go đi

6. 常（常）cháng （副）(thường thường) often; frequently thường, thông thường

7. 借 jiè （动）(tá) to borrow; to lend vay, mượn

8. 有时候 yǒu shíhou （hữu thời hậu）sometimes; now and then có lúc, có khi

时候 shíhou （名）(thời hậu) time thời gian, khi, lúc

9. 上网 shàng wǎng （动）(thượng võng) to get online; to surf the Internet lên mạng

网 wǎng （名）(võng) net mạng, Internet

10. 查 chá （动）(tra) to consult; to look up kiểm tra, tìm kiếm

11. 资料 zīliào （名）(tư liệu) material; data tư liệu, tài liệu

12. 总（是）zǒng (shì) （副）(tổng (thị)) always tổng, luôn luôn

13. 宿舍 sùshè （名）(túc xá) dormitory kí túc xá

14. 安静 ānjìng （形）(yên tĩnh) quiet; peaceful yên tĩnh, yên lặng

15. 晚上 wǎnshang （名）(vãn thượng) evening buổi tối
</transcription>


In [35]:
flashcard_prompt = '''You are an expert in creating Chinese language learning flashcards. Your task is to generate flashcards from the given Chinese content and output them in JSON format. Here's the content you'll be working with:

<chinese_content>
{CONTENT}
</chinese_content>

Before creating the flashcards, take a moment to analyze the content and plan your approach. Write your analysis inside <content_analysis> tags.

<content_analysis>
1. Read through the content carefully, identifying key Chinese words or phrases that would be valuable for learners.
2. For each identified word or phrase:
   - Write down the Chinese characters
   - Note the Pinyin (romanization) of the characters
   - Provide the English meaning or translation
   - Assess the difficulty level (beginner, intermediate, advanced)
   - Consider the part of speech (noun, verb, adjective, etc.)
3. Ensure a diverse range of vocabulary is covered, including different parts of speech and difficulty levels.
4. Group similar concepts or related words together.
5. Identify any idiomatic expressions or culturally significant terms that might require additional explanation.
6. Plan how to structure each flashcard as a JSON object with "front" and "back" fields, ensuring each captures a single, clear concept to aid in effective learning.
7. Aim for a balanced set of flashcards that covers the main ideas and key vocabulary from the content.

It's OK for this section to be quite long to ensure a thorough analysis.
</content_analysis>

Now, create the flashcards based on your analysis. Follow these guidelines:

1. For each flashcard:
   - Set the "front" field to contain only the Chinese characters.
   - Set the "back" field to contain the Pinyin followed by the English meaning, separated by a dash (-).
2. Ensure each flashcard represents a single, clear idea or concept.
3. Format your output as a JSON array of objects. Each object should have two fields:
   - "front": The Chinese characters
   - "back": The Pinyin and English meaning

Your entire output must be valid JSON. Do not include any additional text, explanations, or commentary outside of the JSON structure. Here's an example of the expected format:

[
  {{
    "front": "你好",
    "back": "nǐ hǎo - hello"
  }},
  {{
    "front": "谢谢",
    "back": "xiè xiè - thank you"
  }}
]

Remember, your output should only contain the JSON array of flashcard objects. Do not include any other text or formatting.'''

In [36]:
CONTENT = transcription.choices[0].message.content
def make_flashcard_prompt(content):
    message = {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": flashcard_prompt.format(CONTENT=content),
                },
            ],
        }
    return message

flashcards = client.chat.completions.create(
    model="gpt-4o",
    messages=[make_flashcard_prompt(CONTENT)],
    temperature=0.3,
)

In [37]:
print(flashcards.choices[0].message.content)

```json
[
  {
    "front": "现在",
    "back": "xiànzài - now"
  },
  {
    "front": "跟",
    "back": "gēn - with; to follow"
  },
  {
    "front": "一起",
    "back": "yìqǐ - together"
  },
  {
    "front": "咱们",
    "back": "zánmen - we; us"
  },
  {
    "front": "走",
    "back": "zǒu - to walk; to go"
  },
  {
    "front": "常",
    "back": "cháng - often; frequently"
  },
  {
    "front": "借",
    "back": "jiè - to borrow; to lend"
  },
  {
    "front": "有时候",
    "back": "yǒu shíhou - sometimes; now and then"
  },
  {
    "front": "时候",
    "back": "shíhou - time"
  },
  {
    "front": "上网",
    "back": "shàng wǎng - to get online; to surf the Internet"
  },
  {
    "front": "网",
    "back": "wǎng - net"
  },
  {
    "front": "查",
    "back": "chá - to consult; to look up"
  },
  {
    "front": "资料",
    "back": "zīliào - material; data"
  },
  {
    "front": "总",
    "back": "zǒng - always"
  },
  {
    "front": "宿舍",
    "back": "sùshè - dormitory"
  },
  {
    "front": "安静",
    "ba

In [38]:
import json
flashcards_json = flashcards.choices[0].message.content
json_data = flashcards_json.strip('` \n')

if json_data.startswith('json'):
    flashcards_json = json_data[4:]  # Remove the first 4 characters 'json'
print(flashcards_json)
flashcards_json = json.loads(flashcards_json)
with open("flashcards.json", "w", encoding='utf-8') as f:
    json.dump(flashcards_json, f)
    


[
  {
    "front": "现在",
    "back": "xiànzài - now"
  },
  {
    "front": "跟",
    "back": "gēn - with; to follow"
  },
  {
    "front": "一起",
    "back": "yìqǐ - together"
  },
  {
    "front": "咱们",
    "back": "zánmen - we; us"
  },
  {
    "front": "走",
    "back": "zǒu - to walk; to go"
  },
  {
    "front": "常",
    "back": "cháng - often; frequently"
  },
  {
    "front": "借",
    "back": "jiè - to borrow; to lend"
  },
  {
    "front": "有时候",
    "back": "yǒu shíhou - sometimes; now and then"
  },
  {
    "front": "时候",
    "back": "shíhou - time"
  },
  {
    "front": "上网",
    "back": "shàng wǎng - to get online; to surf the Internet"
  },
  {
    "front": "网",
    "back": "wǎng - net"
  },
  {
    "front": "查",
    "back": "chá - to consult; to look up"
  },
  {
    "front": "资料",
    "back": "zīliào - material; data"
  },
  {
    "front": "总",
    "back": "zǒng - always"
  },
  {
    "front": "宿舍",
    "back": "sùshè - dormitory"
  },
  {
    "front": "安静",
    "back": "ā