In [3]:
from transformers import pipeline

# 推薦使用 Whisper ，這個模型在多語言和不同口音的語音識別任務中表現更好
transcriber = pipeline(model="openai/whisper-large-v2")

# 轉換
result = transcriber("demo.flac")
# 輸出
print(result['text'])

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


你好,我是小朱,請多多指教


檢查緩存位置

In [None]:
from transformers import TRANSFORMERS_CACHE

print(f"緩存位置在：{TRANSFORMERS_CACHE}")

輸入多個音頻

In [None]:
from transformers import pipeline

# 推薦使用 Whisper ，這個模型在多語言和不同口音的語音識別任務中表現更好
transcriber = pipeline(
    model="openai/whisper-large-v2"
)

# 轉換
results = transcriber(
    ["demo.wav", "demo.flac"]
)
# 遍歷結果並打印
for result in results:
    print(result['text'])

指定使用 GPU

In [None]:
from transformers import pipeline

# 推薦使用 Whisper ，這個模型在多語言和不同口音的語音識別任務中表現更好
transcriber = pipeline(
    model="openai/whisper-large-v2",
    device=0
)

# 轉換
result = transcriber("demo.flac")
# 輸出
print(result['text'])

自動檢測系統硬件配置，包括 CPU 和 GPU，並決定如何分配模型的不同部分。

In [None]:
from transformers import pipeline

# 推薦使用 Whisper ，這個模型在多語言和不同口音的語音識別任務中表現更好
transcriber = pipeline(
    model="openai/whisper-large-v2",
    device_map="auto"
)

# 轉換
result = transcriber("demo.flac")
# 輸出
print(result['text'])

設置批次大小以提高推理速度

In [8]:
from transformers import pipeline

# 推薦使用 Whisper ，這個模型在多語言和不同口音的語音識別任務中表現更好
transcriber = pipeline(
    model="openai/whisper-large-v2",
    batch_size=4
)

# 轉換
result = transcriber("demo.flac")
# 輸出
print(result['text'])

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


你好,我是小朱,請多多指教


處理大量數據集

In [None]:
from transformers import pipeline

# 假設 my_data 是一個包含 1000 筆文本數據的列表
my_data = [f"My example {i}" for i in range(1000)]

# 創建 GPT-2 文本生成 pipeline
pipe = pipeline(model="openai-community/gpt2")

# 定義數據生成器
def data_generator(data):
    for item in data:
        yield item

# 使用 pipeline 處理數據生成器
generated_characters = 0
for output in pipe(data_generator(my_data)):
    # 假設輸出是包含生成文本的字典
    generated_text = output['generated_text']
    generated_characters += len(generated_text)

print(f"Total generated characters: {generated_characters}")


假設是文本也是相同

In [None]:
from transformers import pipeline

# 假設 my_data 是一個包含 1000 筆文本數據的列表
my_data = [f"My example {i}" for i in range(1000)]

# 創建 GPT-2 文本生成 pipeline
pipe = pipeline(model="openai-community/gpt2")

# 定義數據生成器
def data_generator(data):
    for item in data:
        yield item

# 使用 pipeline 處理數據生成器
generated_characters = 0
for output in pipe(data_generator(my_data)):
    # 假設輸出是包含生成文本的字典
    generated_text = output['generated_text']
    generated_characters += len(generated_text)

print(f"Total generated characters: {generated_characters}")


圖像分類

In [12]:
from transformers import pipeline

vision_classifier = pipeline(
    model="google/vit-base-patch16-224"
)

results = vision_classifier(images="tiger.jpeg")
# 優化輸出格式
formatted_results = [
    {"Label": result["label"], "Confidence": round(result["score"], 4)}
    for result in results
]
# 輸出結果
for res in formatted_results:
    print(f"Label: {res['Label']}, Confidence: {res['Confidence']}")

Label: tiger, Panthera tigris, Confidence: 0.959
Label: tiger cat, Confidence: 0.0306
Label: jaguar, panther, Panthera onca, Felis onca, Confidence: 0.0028
Label: leopard, Panthera pardus, Confidence: 0.0017
Label: lion, king of beasts, Panthera leo, Confidence: 0.0011


多張相片

In [19]:
from transformers import pipeline

# 創建圖像分類的 pipeline
vision_classifier = pipeline(model="google/vit-base-patch16-224")

# 處理多張圖像
image_files = ["tiger.jpeg", "lion.jpeg", "cat.jpeg"]
results = vision_classifier(images=image_files)

# 優化輸出格式
for i, image_results in enumerate(results):
    print(f"Results for {image_files[i]}:")
    formatted_results = [
        {"Label": result["label"], "Confidence": round(result["score"], 4)}
        for result in image_results
    ]
    for res in formatted_results:
        print(f"  Label: {res['Label']}, Confidence: {res['Confidence']}")
    # 添加一個空行以便區分不同圖片的結果
    print()


Results for tiger.jpeg:
  Label: tiger, Panthera tigris, Confidence: 0.959
  Label: tiger cat, Confidence: 0.0306
  Label: jaguar, panther, Panthera onca, Felis onca, Confidence: 0.0028
  Label: leopard, Panthera pardus, Confidence: 0.0017
  Label: lion, king of beasts, Panthera leo, Confidence: 0.0011

Results for lion.jpeg:
  Label: lion, king of beasts, Panthera leo, Confidence: 0.758
  Label: leopard, Panthera pardus, Confidence: 0.0372
  Label: cougar, puma, catamount, mountain lion, painter, panther, Felis concolor, Confidence: 0.0365
  Label: lynx, catamount, Confidence: 0.0261
  Label: cheetah, chetah, Acinonyx jubatus, Confidence: 0.0248

Results for cat.jpeg:
  Label: Egyptian cat, Confidence: 0.669
  Label: tabby, tabby cat, Confidence: 0.1815
  Label: tiger cat, Confidence: 0.1011
  Label: lynx, catamount, Confidence: 0.0097
  Label: Persian cat, Confidence: 0.0073



展示相片的腳本

In [18]:
from PIL import Image, ImageDraw, ImageFont

# 圖片文件路徑和標註名稱
image_files = [
    ("tiger.jpeg", "Tiger"),
    ("lion.jpeg", "Lion"),
    ("cat.jpeg", "Cat")
]

# 設定統一的圖片大小
target_size = 300

# 定義函數來調整和裁剪圖片
def resize_and_crop(image, size):
    width, height = image.size
    # 取最小邊作為裁剪基準
    new_size = min(width, height)
    left = (width - new_size) / 2
    top = (height - new_size) / 2
    right = (width + new_size) / 2
    bottom = (height + new_size) / 2

    # 裁剪圖片
    image = image.crop((left, top, right, bottom))
    # 調整圖片大小
    image = image.resize((size, size), Image.ANTIALIAS)
    return image

# 打開所有圖片並調整大小
images = [
    resize_and_crop(Image.open(file), target_size)
    for file, _ in image_files
]

# 計算總寬度和最大高度
total_width = target_size * len(images)
max_height = target_size

# 創建一個新的空白圖像（考慮到標註文字的高度）
combined_image = Image.new(
    'RGB',
    (total_width, max_height + 30),
    (255, 255, 255)
)

# 設置字體（你可以選擇系統中的其他字體）
font = ImageFont.load_default()

# 初始化繪圖對象
draw = ImageDraw.Draw(combined_image)

# 將每張圖片貼到新圖像上
x_offset = 0
for img, (file, name) in zip(images, image_files):
    combined_image.paste(img, (x_offset, 0))
    draw.text(
        (x_offset + target_size // 2 - font.getsize(name)[0] // 2, max_height + 5),
        name,
        font=font,
        fill=(0, 0, 0)
    )
    x_offset += target_size

# 保存最終合成的圖像
combined_image.save('combined_image.jpeg')

# 顯示合成的圖像
combined_image.show()


  image = image.resize((size, size), Image.ANTIALIAS)
  draw.text((x_offset + target_size // 2 - font.getsize(name)[0] // 2, max_height + 5), name, font=font, fill=(0, 0, 0))


檢查圖像並回答問題

In [5]:
from transformers import pipeline

# 確保你已經安裝了 pytesseract
import pytesseract

# 創建文件問答的 pipeline
vqa = pipeline(model="impira/layoutlm-document-qa")

# 處理圖片並提出問題
result = vqa(
    image="invoice.png",
    question="請問這一張發票的號碼？"
)

# 格式化輸出結果
if result:
    for answer in result:
        text = answer.get('answer', 'N/A')
        confidence = answer.get('score', 'N/A')
        start = answer.get('start', 'N/A')
        end = answer.get('end', 'N/A')
        print(
            f"Answer: {text}\nConfidence: {confidence}\n"
            f"start: {start}\nend: {end}\n"
        )
else:
    print("No answer found.")

Answer: us-001
Confidence: 0.0035401417408138514
start: 15
end: 15



問題組

In [15]:
from transformers import pipeline

# 確保你已經安裝了 pytesseract
import pytesseract

# 創建文件問答的 pipeline
vqa = pipeline(model="impira/layoutlm-document-qa")

# 定義圖片和問題組
image_path = "invoice.png"
questions = [
    "請問 Bill To 誰？",
    "請問 Invoice Date？",
    "請問 Invoice Number？"
]

# 處理每個問題並格式化輸出結果
for question in questions:
    result = vqa(image=image_path, question=question)

    if result:
        print(f"Question: {question}")
        for answer in result:
            text = answer.get('answer', 'N/A')
            confidence = answer.get('score', 'N/A')
            start = answer.get('start', 'N/A')
            end = answer.get('end', 'N/A')
            print(
                f"Answer: {text}\nConfidence: {confidence}\n"
                f"Start: {start}\nEnd: {end}\n"
            )
    else:
        print(f"No answer found for question: {question}")
    # 分隔不同問題的結果
    print("\n" + "="*50 + "\n")


Question: 請問 Bill To 誰？
Answer: John Smith
Confidence: 0.9599351286888123
Start: 16
End: 17



Question: 請問 Invoice Date？
Answer: 1102/2019
Confidence: 0.9988793730735779
Start: 22
End: 22



Question: 請問 Invoice Number？
Answer: us-001
Confidence: 0.9999278783798218
Start: 15
End: 15





自然語言處理

In [5]:
from transformers import pipeline

classifier = pipeline(
    model="facebook/bart-large-mnli"
)
result = classifier(
    "I have a problem with my iPhone and need it fixed ASAP!",
    candidate_labels=[
        "urgent", "not urgent", "phone", "tablet", "computer"
    ]
)
# 格式化輸出結果
# 檢查結果格式是否為字典，然後提取每個標籤及其對應的分數
if isinstance(result, dict):
    formatted_results = sorted(
        [
            # 根據分數對結果進行排序並四捨五入，以便顯示最相關的標籤
            {"label": label, "score": round(score, 4)}
            for label, score in zip(result["labels"], result["scores"])
        ],
        key=lambda x: x["score"],
        reverse=True
    )

    # 打印格式化的結果
    print("分類結果：")
    for res in formatted_results:
        print(f"Label: {res['label']}, Score: {res['score']}")
else:
    print("Unexpected result format:", result)

分類結果：
Label: urgent, Score: 0.6274
Label: phone, Score: 0.365
Label: computer, Score: 0.0037
Label: not urgent, Score: 0.0027
Label: tablet, Score: 0.0012


在 Gradio 上創建 Web

In [1]:
import gradio as gr
from transformers import pipeline

pipe = pipeline(
    "image-classification",
    model="google/vit-base-patch16-224"
)
gr.Interface.from_pipeline(pipe).launch()

Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


