In [13]:
from gtts import gTTS
import os.path

def download_sound(word, dirpath="public/sounds", slow=False):
    temp_file = f"{dirpath}/{word}.mp3"
    try:
        if not os.path.isfile(temp_file):
            # Create a gTTS object with Chinese language
            tts = gTTS(text=word, lang='zh-TW', slow=slow)
            # Save the audio file temporarily
            tts.save(temp_file)
    except Exception as e:
        print(f"An error occurred: {e}")
        
download_sound("號")

In [38]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin

def extract_stroke_order(character="天", dirpath="public"):
    if len(character) > 1:
        for c in character: extract_stroke_order(c)
        return
    elif len(character) == 0: return
    
    animation_path = f"{dirpath}/animation/{character}.gif"
    stroke_path = f"{dirpath}/stroke/{character}.png"
    
    if os.path.isfile(animation_path) and os.path.isfile(stroke_path):
        return []
    
    base_url = "https://www.strokeorder.com"
    url = f"{base_url}/chinese/{character}"
    try:
        # Fetch the webpage content
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }
        response = requests.get(url, headers=headers)
        response.raise_for_status()  # Raise an exception for HTTP errors

        # Parse the HTML content
        soup = BeautifulSoup(response.text, 'html.parser')

        # Find all divs with the specified class
        target_divs = soup.find_all('div', class_='stroke-article-content')

        # Extract image sources from these divs
        image_sources = []
        for div in target_divs:
            images = div.find_all('img')
            for img in images:
                src = img.get('src')
                if src:  # Only add if src exists
                    image_sources.append(src)

        if image_sources:
            for src in image_sources:
                if src.startswith("/assets/bishun/animation/"):
                    try:
                        absolute_url = urljoin(base_url, src)
                        img_response = requests.get(absolute_url, headers=headers)
                        img_response.raise_for_status()
                        
                        with open(animation_path, "wb") as f:
                            f.write(img_response.content)
                    except Exception as e: print(str(e))
                elif src.startswith("/assets/bishun/stroke/"):
                    try:
                        absolute_url = urljoin(base_url, src)
                        img_response = requests.get(absolute_url, headers=headers)
                        img_response.raise_for_status()
                        
                        with open(stroke_path, "wb") as f:
                            f.write(img_response.content)
                    except: pass
            
        return image_sources

    except requests.exceptions.RequestException as e:
        print(f"Error fetching the webpage: {e}")
        return []
    except Exception as e:
        print(f"An error occurred: {e}")
        return []
    
extract_stroke_order()

[]

In [40]:
import json

def import_decks(filepath="decks.txt"):
    decks, _decks = {"decks": []}, {}
    with open(filepath, "r", encoding="utf-8") as f:
        for line in f.read().split("\n"):
            if not line.startswith("#"):
                segments = line.split("\t")
                if len(segments) == 5:
                    try:
                        download_sound(segments[0])
                        extract_stroke_order(segments[4].strip())
                        
                        if not os.path.os.path.isfile(f"public/sounds/{segments[0].strip()}.mp3"):
                            continue
                        
                        is_complete = True
                        for char in segments[4].strip():
                            if not os.path.os.path.isfile(f"public/stroke/{char}.png"):
                                is_complete = False
                            if not os.path.os.path.isfile(f"public/animation/{char}.gif"):
                                is_complete = False
                        if not is_complete: continue
                        
                        chars = [c for c in segments[4].strip()]
                        strokes = [f"stroke/{c}.png" for c in chars]
                        animations = [f"animation/{c}.gif" for c in chars]
                        
                        if segments[2].strip() not in _decks: 
                            _decks[segments[2].strip()] = []
                            
                        _decks[segments[2].strip()].append({
                            "mp3": f"sounds/{segments[0].strip()}.mp3",
                            "word": segments[0].strip(),
                            "means": segments[1].strip(),
                            "pinyin": segments[3].strip(),
                            "stroke": strokes,
                            "animation": animations
                        })
                    except: print(f"Error {line}")
                else: print(line)
    for k, v in _decks.items():
        decks["decks"].append({
            "name": k,
            "items": v
        })
    with open("public/decks.json", "w", encoding="utf-8") as f:
        json.dump(decks, f, indent=4, ensure_ascii=False)
    
import_decks()