#### 如果有Iprogress not found的錯誤,更新ipywidgets

`conda install -c conda-forge ipywidgets`

#### 查看pipeline支援的任務類型

In [2]:
from transformers.pipelines import SUPPORTED_TASKS

for k, v in SUPPORTED_TASKS.items():
    print(k,' -> ' ,v['type'])

audio-classification  ->  audio
automatic-speech-recognition  ->  multimodal
text-to-audio  ->  text
feature-extraction  ->  multimodal
text-classification  ->  text
token-classification  ->  text
question-answering  ->  text
table-question-answering  ->  text
visual-question-answering  ->  multimodal
document-question-answering  ->  multimodal
fill-mask  ->  text
summarization  ->  text
translation  ->  text
text2text-generation  ->  text
text-generation  ->  text
zero-shot-classification  ->  text
zero-shot-image-classification  ->  multimodal
zero-shot-audio-classification  ->  multimodal
image-classification  ->  image
image-feature-extraction  ->  image
image-segmentation  ->  multimodal
image-to-text  ->  multimodal
object-detection  ->  multimodal
zero-shot-object-detection  ->  multimodal
depth-estimation  ->  image
video-classification  ->  video
mask-generation  ->  multimodal
image-to-image  ->  image


#### 如何查看支援類型的細節說明

In [3]:
from pprint import pprint
for k, value in SUPPORTED_TASKS.items():
    print(f'支援任務:{k}')
    pprint(value)
    break

#default->是使用的預設模型
#impl->對應的class
#pt->支援pytorch模型
#tf->支援TensorFlow模型

支援任務:audio-classification
{'default': {'model': {'pt': ('superb/wav2vec2-base-superb-ks', '372e048')}},
 'impl': <class 'transformers.pipelines.audio_classification.AudioClassificationPipeline'>,
 'pt': (<class 'transformers.models.auto.modeling_auto.AutoModelForAudioClassification'>,),
 'tf': (),
 'type': 'audio'}


#### 依據任務類型直接建立Pipeline,預設都是英文模型

In [4]:
from transformers import pipeline
#沒有指定模型會有警告,並且告知預設的模型->
pipe = pipeline('text-classification')

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


In [5]:
pipe("very good!")

[{'label': 'POSITIVE', 'score': 0.9998525381088257}]

#### 指定任務類型, 再指定模型, 建立基於指定模型的pipeline


In [6]:
pipe = pipeline("text-classification", model="uer/roberta-base-finetuned-dianping-chinese",)
pipe("我覺得有待改進")

[{'label': 'negative (stars 1, 2 and 3)', 'score': 0.9404445886611938}]

#### 預先下載模型,再建立pipeline
- 這種方式必需同時指定model和tokenizer

In [7]:
from transformers import AutoModelForSequenceClassification
from transformers import AutoTokenizer

model = AutoModelForSequenceClassification.from_pretrained('uer/roberta-base-finetuned-dianping-chinese')
tokenizer = AutoTokenizer.from_pretrained('uer/roberta-base-finetuned-dianping-chinese')
pipe = pipeline('text-classification', model=model, tokenizer = tokenizer)
pipe("我覺得很好吃")

[{'label': 'positive (stars 4 and 5)', 'score': 0.9163680076599121}]

#### 檢查pipeline是在什麼樣的CPU上面執行

In [8]:
pipe.model.device

device(type='cpu')

In [9]:
#計算在cpu上跑的時間
import torch
import time
times = []
for i in range(100):
    #torch.cuda.synchronize()
    start = time.time()
    pipe("我覺得還不錯")
    #torch.cuda.synchronize()
    end = time.time()
    times.append(end - start)

print(sum(times)/ 100)

0.32208051919937136


##### 使用GPU處理


In [10]:
#有GPU的處理方式
pipe = pipeline("text-classification", model="uer/roberta-base-finetuned-dianping-chinese",device="0")
pipe("我覺得有待改進")

RuntimeError: Invalid device string: '0'

#### pipeline實體參數的查詢

In [11]:
qa_pipe = pipeline("question-answering", model='uer/roberta-base-chinese-extractive-qa')

In [12]:
qa_pipe

<transformers.pipelines.question_answering.QuestionAnsweringPipeline at 0x7ffe91bc5100>

In [13]:
from transformers import QuestionAnsweringPipeline
#按右鍵移至類型定義
#找尋def __call__()
QuestionAnsweringPipeline

transformers.pipelines.question_answering.QuestionAnsweringPipeline

In [14]:
qa_pipe(question='台灣的首都是哪裏?',context='台灣的首都是台北')

{'score': 0.2752968966960907, 'start': 6, 'end': 8, 'answer': '台北'}

In [15]:
#限定回答長度:1
qa_pipe(question='台灣的首都是哪裏?',context='台灣的首都是台北',max_answer_len=1)


{'score': 0.0016681747511029243, 'start': 7, 'end': 8, 'answer': '北'}

#### 其他pipline範例
- 零樣本的測試

In [16]:
checkpoint = "google/owlvit-base-patch32"
detector = pipeline(model=checkpoint, task="zero-shot-object-detection")

config.json:   0%|          | 0.00/4.42k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/613M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/775 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.06M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/460 [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/392 [00:00<?, ?B/s]