<a href="https://colab.research.google.com/github/ss1111119/Indigenous-Language-e-Paradise/blob/main/%E6%97%8F%E8%AA%9Ee%E6%A8%82%E5%9C%92%E9%AB%98%E4%B8%AD.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# 基本詞彙
import pandas as pd
import requests
from xml.etree import ElementTree
import time

def fetch_and_parse_xml(url):
    try:
        response = requests.get(url)
        if response.status_code == 200:
            xml_content = response.content
            return ElementTree.fromstring(xml_content)
    except Exception as e:
        print(f"Error fetching or parsing XML data: {e}")
    return None

def collect_word_info(xml_element):
    # 定義一個空的列表來收集單詞信息
    data = []
    for item in xml_element.findall('item'):
        classNo = item.find('classNo').text.strip()
        wordOrder = item.find('wordOrder').text.strip()
        autoId = item.find('autoId').text
        wordAb = item.find('wordAb').text.strip()
        wordCh = item.find('wordCh').text.strip()
        mp3Url = f"https://klokah.tw/extension/sp_senior/sound/2/1word/{classNo}_{wordOrder}.mp3"
        # 將每個單詞的信息添加到列表中
        data.append({"AutoId": autoId, "ClassNo": classNo, "Word Order": wordOrder, "Amis": wordAb, "Chinese": wordCh, "MP3 URL": mp3Url})
    return data

# 初始化一個空的列表來保存所有單詞信息
all_word_info = []

# 定義要處理的 XML 檔案編號
url_numbers = list(range(1, 16)) + [211, 212, 213]

for number in url_numbers:
    xml_url = f"https://web.klokah.tw/extension/sp_data/senior/2/{number}.xml"
    xml_element = fetch_and_parse_xml(xml_url)
    if xml_element is not None:
        word_info = collect_word_info(xml_element)
        all_word_info.extend(word_info)  # 將當前檔案的單詞信息添加到總列表中
    time.sleep(15)  # sleep 15s
    print(f"Processed file {number}.")

# 將收集到的所有單詞信息保存到一個 Excel 檔案中
df = pd.DataFrame(all_word_info)
df.to_excel("all_word_info.xlsx", index=False)
print("All word information has been saved to all_word_info.xlsx.")

In [None]:
# 生活百句
import pandas as pd
import requests
from xml.etree import ElementTree
import time

def fetch_and_parse_xml(url):
    try:
        response = requests.get(url)
        if response.status_code == 200:
            return ElementTree.fromstring(response.content)
    except Exception as e:
        print(f"Error fetching XML data: {e}")
    return None

def collect_dialogue_info(xml_element):
    data = []
    for item in xml_element.findall('item'):
        classNo = item.find('classNo').text.strip()
        sentenceOrders = item.findall('sentenceOrder')
        for sentenceOrder in sentenceOrders:
            order = sentenceOrder.text.strip()
            dialogueParts = []

            for part in ['A', 'B', 'C']:
                ab_tag = item.find(f'sentence{part}Ab')
                ch_tag = item.find(f'sentence{part}Ch')

                if ab_tag is not None and ch_tag is not None and ab_tag.text and ch_tag.text:
                    autoId = item.find('autoId').text
                    ab_text = ab_tag.text.strip()
                    ch_text = ch_tag.text.strip()
                    if ch_text != "如果會話只有兩句本欄位請勿更動":
                        dialogueParts.append({"AutoId": autoId,"Part": part, "Amis": ab_text, "Chinese": ch_text})

            mp3_url = f"https://klokah.tw/extension/sp_senior/sound/2/2sentence/{classNo}_{order}.mp3"
            for part_info in dialogueParts:
                part_info["MP3 URL"] = mp3_url
                data.append(part_info)
    return data

all_data = []

url_numbers = list(range(16, 20)) + [214, 215] # 16-19 214,215

for number in url_numbers:
    xml_url = f"https://web.klokah.tw/extension/sp_data/senior/2/{number}.xml"
    xml_element = fetch_and_parse_xml(xml_url)
    if xml_element is not None:
        dialogue_info = collect_dialogue_info(xml_element)
        all_data.extend(dialogue_info)
    time.sleep(15)  # Sleep to avoid hitting the server too frequently
    print(f"Processed file: {number}.xml")

df = pd.DataFrame(all_data)
df.to_excel("all_dialogue_info.xlsx", index=False)
print("All dialogue information has been saved to all_dialogue_info.xlsx.")


In [None]:
# 看圖識字
import pandas as pd
import requests
from xml.etree import ElementTree
import time

def fetch_and_parse_xml(url):
    try:
        response = requests.get(url)
        if response.status_code == 200:
            return ElementTree.fromstring(response.content)
    except Exception as e:
        print(f"Error fetching XML data: {e}")
    return None

def collect_recognize_info(xml_element):
    data = []
    for item in xml_element.findall('item'):
        class_no = item.find('classNo').text.strip()
        recognize_order = item.find('recognizeOrder').text.strip()
        autoId = item.find('autoId').text
        recognize_ab = item.find('recognizeAb').text.strip() if item.find('recognizeAb') is not None else ''
        recognize_ch = item.find('recognizeCh').text.strip() if item.find('recognizeCh') is not None else ''

        mp3_url = f"https://klokah.tw/extension/sp_senior/sound/2/3recognize/{class_no}_{recognize_order}.mp3"
        image_url = f"https://klokah.tw/extension/sp_senior/graphics_100x100/recognize/{class_no}_{recognize_order}.png"

        data.append({
            "AutoId": autoId,
            "Class No": class_no,
            "Order": recognize_order,
            "Amis": recognize_ab,
            "Chinese": recognize_ch,
            "MP3 URL": mp3_url,
            "Image URL": image_url
        })
    return data

# 初始化一個空列表用於收集所有的數據
all_data = []

# 指定的 XML 文件編號
url_numbers = [20, 21, 22, 23, 24]

for number in url_numbers:
    xml_url = f"https://web.klokah.tw/extension/sp_data/senior/2/{number}.xml"
    xml_element = fetch_and_parse_xml(xml_url)
    if xml_element is not None:
        recognize_info = collect_recognize_info(xml_element)
        all_data.extend(recognize_info)
    time.sleep(15)  # 防止對服務器的請求過於頻繁
    print(f"Processed file: {number}.xml")

# 將所有收集到的數據保存到一個 Excel 文件
df = pd.DataFrame(all_data)
df.to_excel("all_recognize_info.xlsx", index=False)
print("All recognize information has been saved to all_recognize_info.xlsx.")


In [None]:
# 選擇題(一)
import pandas as pd
import requests
from xml.etree import ElementTree
import time

def fetch_and_parse_xml(url):
    try:
        response = requests.get(url)
        if response.status_code == 200:
            return ElementTree.fromstring(response.content)
    except Exception as e:
        print(f"Error fetching XML data: {e}")
    return None

def collect_choice_one_info(xml_element):
    data = []
    for item in xml_element.findall('item'):
        class_no = item.find('classNo').text.strip()
        choice_one_order = item.find('choiceOneOrder').text.strip()
        sentences = ['A', 'B', 'C']

        for sentence_label in sentences:
            autoId_element = item.find('autoId')
            ab_element = item.find(f'choiceOne{sentence_label}Ab')
            ch_element = item.find(f'choiceOne{sentence_label}Ch')

            # 確保元素存在且文本不為 None
            autoId = autoId_element.text.strip() if autoId_element is not None and autoId_element.text is not None else ''
            ab = ab_element.text.strip() if ab_element is not None and ab_element.text is not None else ''
            ch = ch_element.text.strip() if ch_element is not None and ch_element.text is not None else ''

            # 如果是"C"選項且沒有資料，則跳過
            if sentence_label == 'C' and not ab:
                continue

            audio_url = f"https://klokah.tw/extension/sp_senior/sound/2/4choiceOne/{class_no}_{choice_one_order}.mp3"
            image_url = f"https://klokah.tw/extension/sp_senior/graphics_100x100/choiceOne/{class_no}_{choice_one_order}.png"

            data.append({
                "AutoId": autoId,
                "Class No": class_no,
                "Order": choice_one_order,
                "Label": sentence_label,
                "Amis": ab,
                "Chinese": ch,
                "Audio URL": audio_url,
                "Image URL": image_url
            })
    return data

# 初始化一個空列表用於收集所有的數據
all_data = []

# 指定的 XML 文件編號
url_numbers = range(26, 31)  # 例如，從 26 到 30

for number in url_numbers:
    xml_url = f"https://web.klokah.tw/extension/sp_data/senior/2/{number}.xml"
    xml_element = fetch_and_parse_xml(xml_url)
    if xml_element is not None:
        choice_one_info = collect_choice_one_info(xml_element)
        all_data.extend(choice_one_info)
    time.sleep(2)  # 防止對服務器的請求過於頻繁
    print(f"Processed file: {number}.xml")

# 將所有收集到的數據保存到一個 Excel 文件
df = pd.DataFrame(all_data)
df.to_excel("all_choice_one_info.xlsx", index=False)
print("All choice one information has been saved to all_choice_one_info.xlsx.")


In [None]:
# 選擇題(二)
import pandas as pd
import requests
from xml.etree import ElementTree as ET
import time

def fetch_and_parse_xml(url):
    try:
        response = requests.get(url)
        if response.status_code == 200:
            return ET.fromstring(response.content)
    except Exception as e:
        print(f"Error fetching or parsing XML data: {e}")
    return None

def collect_choice_two_info(xml_element):
    data = []
    for item in xml_element.findall('item'):
        classNo = item.find('classNo').text.strip()
        choiceTwoOrder = item.find('choiceTwoOrder').text.strip()
        sentences = ['A', 'B', 'C']

        for sentence_label in sentences:
            autoId = item.find('autoId').text
            ab_tag = item.find(f'choiceTwo{sentence_label}Ab')
            ch_tag = item.find(f'choiceTwo{sentence_label}Ch')
            if ab_tag is not None and ch_tag is not None:
                ab_text = ab_tag.text.strip() if ab_tag.text else ""
                ch_text = ch_tag.text.strip() if ch_tag.text else ""
                audio_url = f"https://klokah.tw/extension/sp_senior/sound/2/5choiceTwo/{classNo}_{choiceTwoOrder}_{sentence_label}.mp3"

                data.append({
                    "AutoId": autoId,
                    "Class No": classNo,
                    "Order": choiceTwoOrder,
                    "Label": sentence_label,
                    "Amis": ab_text,
                    "Chinese": ch_text,
                    "Audio URL": audio_url
                })
    return data

# 初始化一個空列表用於收集所有的數據
all_data = []

# 循環處理指定範圍內的XML文件
for number in range(31, 36):  # 包含31到35
    xml_url = f"https://web.klokah.tw/extension/sp_data/senior/2/{number}.xml"
    xml_element = fetch_and_parse_xml(xml_url)
    if xml_element is not None:
        choice_two_info = collect_choice_two_info(xml_element)
        all_data.extend(choice_two_info)
    time.sleep(2)  # 避免頻繁請求服務器
    print(f"Processed file: {number}.xml")

# 將所有收集到的數據保存到一個 Excel 文件
df = pd.DataFrame(all_data)
df.to_excel("all_choice_two_info.xlsx", index=False)
print("All choice two information has been saved to all_choice_two_info.xlsx.")


In [None]:
# 選擇題(三)
import pandas as pd
import requests
from xml.etree import ElementTree as ET
import time

def fetch_and_parse_xml(url):
    try:
        response = requests.get(url)
        if response.status_code == 200:
            return ET.fromstring(response.content)
    except Exception as e:
        print(f"Error fetching or parsing XML data: {e}")
    return None

def collect_choice_three_info(xml_element):
    data = []
    for item in xml_element.findall('item'):
        classNo = item.find('classNo').text.strip()
        choiceThreeOrder = item.find('choiceThreeOrder').text.strip()
        autoId = item.find('autoId').text

        ab = item.find('choiceThreeAb').text.strip() if item.find('choiceThreeAb') is not None else ""
        ch = item.find('choiceThreeCh').text.strip() if item.find('choiceThreeCh') is not None else ""
        audio_url = f"https://klokah.tw/extension/sp_senior/sound/2/7choiceThree/{classNo}_{choiceThreeOrder}.mp3"
        image_url = f"https://klokah.tw/extension/sp_senior/graphics_100x100/choiceThree/{classNo}_{choiceThreeOrder}.png"

        data.append({
            "AutoId": autoId,
            "Class No": classNo,
            "Order": choiceThreeOrder,
            "Amis": ab,
            "Chinese": ch,
            "Audio URL": audio_url,
            "Image URL": image_url
        })

    return data

# 初始化一個空列表用於收集所有的數據
all_data = []

# 循環處理指定範圍內的XML文件
for number in range(37, 42):  # 37-42
    xml_url = f"https://web.klokah.tw/extension/sp_data/senior/2/{number}.xml"
    xml_element = fetch_and_parse_xml(xml_url)
    if xml_element is not None:
        choice_three_info = collect_choice_three_info(xml_element)
        all_data.extend(choice_three_info)
    time.sleep(2)  # 避免頻繁請求服務器
    print(f"Processed file: {number}.xml")

# 將所有收集到的數據保存到一個 Excel 文件
df = pd.DataFrame(all_data)
df.to_excel("all_choice_three_info.xlsx", index=False)
print("All choice three information has been saved to all_choice_three_info.xlsx.")



In [None]:
# 念念看
import requests
import xml.etree.ElementTree as ET
import pandas as pd
import time

def fetch_and_parse_xml(url):
    try:
        response = requests.get(url)
        if response.status_code == 200:
            return ET.fromstring(response.content)
    except Exception as e:
        print(f"Error fetching or parsing XML data: {e}")
    return None

def save_oral_reading_info_to_excel(xml_root, filename):
    data = []
    for item in xml_root.findall('item'):
        autoId = item.find('autoId').text.strip() if item.find('autoId') is not None else ''
        classNo = item.find('classNo').text.strip() if item.find('classNo') is not None else ''
        oralReadingOrder = item.find('oralReadingOrder').text.strip() if item.find('oralReadingOrder') is not None else ''

        dialogues = ['A', 'B', 'C', 'D', 'E']

        for dialogue in dialogues:
            ab = item.find(f'oralReading{dialogue}Ab').text.strip() if item.find(f'oralReading{dialogue}Ab') is not None else ''
            ch = item.find(f'oralReading{dialogue}Ch').text.strip() if item.find(f'oralReading{dialogue}Ch') is not None else ''
            audio_url = f"https://klokah.tw/extension/sp_senior/sound/2/8oralReading/{classNo}_{oralReadingOrder}_{dialogue}.mp3"

            data.append({
                "AutoId": autoId,
                "ClassNo": classNo,
                "Oral Reading Order": oralReadingOrder,
                "Dialogue": dialogue,
                "Amis": ab,
                "Chinese Translation": ch,
                "Audio URL": audio_url
            })

    df = pd.DataFrame(data)
    filepath = f"./{filename}.xlsx"  # Adjust the file path if necessary
    df.to_excel(filepath, index=False, engine='openpyxl')
    print(f"Data saved to {filepath}")

# Specific XML file processing
xml_url = "https://web.klokah.tw/extension/sp_data/senior/2/42.xml"
xml_root = fetch_and_parse_xml(xml_url)
if xml_root is not None:
    save_oral_reading_info_to_excel(xml_root, "oral_reading_info_42")


Data saved to ./oral_reading_info_42.xlsx


In [None]:
# 簡短對話
import pandas as pd
import requests
from xml.etree import ElementTree as ET
import time

def fetch_and_parse_xml(url):
    try:
        response = requests.get(url)
        if response.status_code == 200:
            return ET.fromstring(response.content)
    except Exception as e:
        print(f"讀取或解析 XML 數據時出錯: {e}")
    return None

def save_dialogue_info_to_excel(xml_element, filename):
    data = []
    for item in xml_element.findall('item'):
        autoId = item.find('autoId').text.strip()  # 獲取 autoId
        classNo = item.find('classNo').text.strip()  # 獲取 classNo
        dialogueOrder = item.find('dialogueOrder').text.strip()
        dialogues = ['A', 'B', 'C', 'D', 'E']  # 假設有五個對話部分

        for dialogue in dialogues:
            dialogueAbTag = item.find(f'dialogue{dialogue}Ab')
            dialogueChTag = item.find(f'dialogue{dialogue}Ch')
            if dialogueAbTag is not None and dialogueChTag is not None:
                dialogueAb = dialogueAbTag.text.strip() if dialogueAbTag.text else ''
                dialogueCh = dialogueChTag.text.strip() if dialogueChTag.text else ''
                # 在音檔 URL 中使用 classNo 和 dialogueOrder
                audio_url = f"https://klokah.tw/extension/sp_senior/sound/2/9dialogue/{classNo}_{dialogueOrder}_{dialogue}.mp3"

                data.append({
                    "AutoId": autoId,
                    "班級號碼": classNo,
                    "對話順序": f"{dialogueOrder}{dialogue}",
                    "阿美語": dialogueAb,
                    "中文翻譯": dialogueCh,
                    "音檔 URL": audio_url
                })

    df = pd.DataFrame(data)
    df.to_excel(f"{filename}.xlsx", index=False)
    print(f"檔案已保存為 {filename}.xlsx")

# 定義 XML 數據的 URL
xml_url = "https://web.klokah.tw/extension/sp_data/senior/2/43.xml"
xml_element = fetch_and_parse_xml(xml_url)
if xml_element is not None:
    filename = f"dialogue_info_43_{int(time.time())}"  # 使用時間戳防止文件名衝突
    save_dialogue_info_to_excel(xml_element, filename)


In [None]:
# 看圖說話
import requests
import xml.etree.ElementTree as ET
import pandas as pd

def fetch_and_parse_xml(url):
    """從指定 URL 獲取並解析 XML 資料"""
    try:
        response = requests.get(url)
        if response.status_code == 200:
            return ET.fromstring(response.content)
        else:
            print(f"獲取 XML 資料失敗。狀態碼: {response.status_code}")
    except Exception as e:
        print(f"獲取或解析 XML 資料時出錯: {e}")
    return None

def xml_to_excel(xml_root, filename):
    """將解析的 XML 數據轉換為 Excel 檔案"""
    data = []

    for item in xml_root.findall('item'):
        autoId = item.find('autoId').text.strip() if item.find('autoId') is not None and item.find('autoId').text is not None else ''
        pictureTalkOrder = item.find('pictureTalkOrder').text.strip() if item.find('pictureTalkOrder') is not None and item.find('pictureTalkOrder').text is not None else ''
        pictureTalkAb = item.find('pictureTalkAb').text.strip() if item.find('pictureTalkAb') is not None and item.find('pictureTalkAb').text is not None else ''
        pictureTalkCh = item.find('pictureTalkCh').text.strip() if item.find('pictureTalkCh') is not None and item.find('pictureTalkCh').text is not None else ''
        audio_url = f"https://klokah.tw/extension/sp_senior/sound/2/10pictureTalk/{pictureTalkOrder}_1.mp3"

        # 更新圖片URL為指定的格式
        image_url = f"https://klokah.tw/extension/sp_senior/graphics_100x100/pictureTalk/{pictureTalkOrder}.png"


        data.append({
            "AutoId": autoId,
            "看圖識字順序": pictureTalkOrder,
            "阿美語描述": pictureTalkAb,
            "中文翻譯": pictureTalkCh,
            "音檔 URL": audio_url,
            "圖片 URL": image_url  # 更新圖片 URL
        })

    df = pd.DataFrame(data)
    df.to_excel(f"{filename}.xlsx", index=False)
    print(f"檔案已保存為 {filename}.xlsx")

# 定義 XML 數據的 URL
xml_url = "https://web.klokah.tw/extension/sp_data/senior/2/44.xml"

# 獲取並解析 XML 數據
xml_root = fetch_and_parse_xml(xml_url)
if xml_root is not None:
    # 將 XML 數據保存到 Excel
    xml_to_excel(xml_root, "picture_talk_info_44")
