## Import Packages & Function Definitions

In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer, TextGenerationPipeline
import torch
import accelerate

def get_pipeline(path:str, tokenizer:AutoTokenizer, accelerator:accelerate.Accelerator) -> TextGenerationPipeline:
    model = AutoModelForCausalLM.from_pretrained(
        path, torch_dtype=torch.float16, device_map='auto', trust_remote_code=True)
    
    terminators = [tokenizer.eos_token_id, tokenizer.pad_token_id]

    pipeline = TextGenerationPipeline(model = model, tokenizer = tokenizer, num_workers=accelerator.state.num_processes*4, pad_token_id=tokenizer.pad_token_id, eos_token_id=terminators)

    return pipeline

## Taigi-Llama-2-series: Causal Language Modeling for Taigi

In [2]:
model_dir = "Bohanlu/Taigi-Llama-2-7B" # or Bohanlu/Taigi-Llama-2-13B for the 13B model
tokenizer = AutoTokenizer.from_pretrained(model_dir, use_fast=False)

accelerator = accelerate.Accelerator()
pipe = get_pipeline(model_dir, tokenizer, accelerator)

Detected kernel version 3.10.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [3]:
# CausalLM示例：台語文本生成
clm_prompt = "我真愛食台灣的"

# Few-shot示例：情感分析
sentiment_prompt = """Example 1:
輸入：這齣電影真正是蓋讚啦！
情感：正面

Example 2:
輸入：今仔日的天氣真䆀。
情感：負面

Example 3:
輸入：這間餐廳的服務一般般仔爾爾。
情感：中性

Example 4:
輸入：我拄買彼支手機仔的螢幕誠大塊。
情感："""

# Few-shot示例：問答
qa_prompt = """Example 1:
問題：台北101有偌懸？
答案：台北101的高度是五百空八公尺。

Example 2:
問題：台灣上長的溪仔是佗一條？
答案：台灣上長的溪仔是濁水溪，規个長度有百八公里遐爾長。

Example 3:
問題：臺灣上懸的山是啥物？
答案："""

# Few-shot示例：台語翻譯
translation_prompt = """Example 1:
中文：你好嗎？
台語：你好無？

Example 2:
中文：我很喜歡吃水果。
台語：我真愛食水果。

Example 3:
中文：是否有人會講台語？
台語：敢有人會曉講臺語？

Example 4:
中文：請問這裡怎麼走到火車站？
台語："""

In [4]:
pipe([clm_prompt, sentiment_prompt, qa_prompt, translation_prompt], return_full_text=False)

[[{'generated_text': '水果，'}],
 [{'generated_text': '正面'}],
 [{'generated_text': '臺灣上懸的山是玉山，伊的懸度是三千九百五十二公尺。'}],
 [{'generated_text': '請問這欲按怎去到火車站？'}]]

## Taigi-Llama-2-Translator-series: A Comprehensive Translator for Traditional Chinese, English, and Taigi (POJ, Hanzi and Hanlo)

In [5]:
model_dir = "Bohanlu/Taigi-Llama-2-Translator-7B" # or Bohanlu/Taigi-Llama-2-Translator-13B for the 13B model
tokenizer = AutoTokenizer.from_pretrained(model_dir, use_fast=False)

accelerator = accelerate.Accelerator()
pipe = get_pipeline(model_dir, tokenizer, accelerator)

Detected kernel version 3.10.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [6]:
PROMPT_TEMPLATE = "[TRANS]\n{source_sentence}\n[/TRANS]\n[{target_language}]\n"
def translate(source_sentence:str, target_language:str) -> str:
    prompt = PROMPT_TEMPLATE.format(source_sentence=source_sentence, target_language=target_language)
    out = pipe(prompt, return_full_text=False, repetition_penalty=1.1, do_sample=False)[0]['generated_text']
    return out[:out.find("[/")].strip()

source_sentence = "How are you today？"

print(f"{source_sentence=}\n")
print("To Hanzi: " + translate(source_sentence, "HAN"))
print("To POJ: " + translate(source_sentence, "POJ"))
print("To Traditional Chinese: " + translate(source_sentence, "ZH"))
print("To Hanlo: " + translate(source_sentence, "HL"))

source_sentence='How are you today？'





To Hanzi: 你今仔日好無？
To POJ: Lí kin-á-ji̍t án-chóaⁿ?
To Traditional Chinese: 你今天好嗎？
To Hanlo: 你今仔日好無？


In [7]:
source_sentence = "Thài-khong pêng-iú, lín hó! Lín chia̍h-pá--bē?"

print(f"{source_sentence=}\n")
print("To Hanzi: " + translate(source_sentence, "HAN"))
print("To English: " + translate(source_sentence, "EN"))
print("To Traditional Chinese: " + translate(source_sentence, "ZH"))
print("To Hanlo: " + translate(source_sentence, "HL"))

source_sentence='Thài-khong pêng-iú, lín hó! Lín chia̍h-pá--bē?'

To Hanzi: 太空朋友，恁好！恁食飽未？
To English: Space friends, you guys are great! Have you eaten yet?
To Traditional Chinese: 太空朋友，你們好！你們吃飽了嗎？
To Hanlo: 太空朋友，lín好！Lín食飽--未？


## Taigi-Llama-2-Chat Series: A Comprehensive Chat Model for Taigi (To be completed soon)