In [None]:
# base
import os
from transformers import pipeline

#os.environ['HF_HOME']='/mnt/new_volume/hf'
#os.environ['HF_HUB_CACHE']='mnt/new_volume/hf/hub'

### **使用 transformers的pipeLine API实现 text classification 任务**

In [None]:
# 仅指定任务时，使用默认模型-不推荐
#pipe = pipeline("sentiment-analysis")
pipe = pipeline(task="text-classification", model="tabularisai/multilingual-sentiment-analysis")#135M

In [None]:
pipe("今儿上海可真冷啊")
pipe("我觉得这家店蒜泥白肉的味道一般")

In [None]:
# 默认使用的模型 distilbert-base-uncased-finetuned-sst-2-english 
# 并未针对中文做太多训练，中文的文本分类任务表现未必满意
pipe("你学东西真的好快，理论课一讲就明白了")

In [None]:
# 替换为英文后，文本分类任务的表现立刻改善
pipe("You learn things really quickly. You understand the theory class as soon as it is taught.")

In [None]:
pipe("Today Shanghai is really cold.")

In [None]:
# 批处理调用模型推理

text_list = [
    "Today Shanghai is really cold.",
    "I think the taste of the garlic mashed pork in this store is average.",
    "You learn things really quickly. You understand the theory class as soon as it is taught."
]

pipe(text_list)

### **使用Pipeline API调用更多预定义任务**

#### Natural Language Processing NLP

In [None]:
#### Token Classification


#classifier = pipeline(task="ner")
classifier = pipeline(task = "ner", model="dslim/bert-base-NER") #108M

In [None]:
preds = classifier("Hugging Face is a French company based in New York City.")
preds = [
    {
        "entity": pred["entity"],
        "score": round(pred["score"], 4),
        "index": pred["index"],
        "word": pred["word"],
        "start": pred["start"],
        "end": pred["end"],
    }
    for pred in preds
]

In [None]:
# 合并实体
#classifier = pipeline(task="ner", grouped_entities=True)
classifier = pipeline(task = "ner", model="dslim/bert-base-NER",grouped_entities=True)

In [None]:

classifier("Hugging Face is a French company based in New York City.")

In [None]:
### QA task

#question_answerer = pipeline(task="question-answering")
question_answerer = pipeline(task="question-answering", model="deepset/roberta-base-squad2")#124M

In [None]:

preds = question_answerer(question="What is the name of the repository?", 
                          context="The name of the repository is huggingface/transformers")
print(f"score:{round(preds['score'],4)}, start:{preds['start']}, end: {preds['end']}, answer: {preds['answer']}")

In [None]:
preds = question_answerer(
    question="What is the capital of China?",
    context="On 1 October 1949, CCP Chairman Mao Zedong formally proclaimed the People's Republic of China in Tiananmen Square, Beijing.",
)
print(
    f"score: {round(preds['score'], 4)}, start: {preds['start']}, end: {preds['end']}, answer: {preds['answer']}"
)

In [None]:
### summarization

#406M
summarizer = pipeline(task="summarization",
                      #model="t5-base",
                      model="facebook/bart-large-cnn"
                      min_length=8,
                      max_length=32,
)

In [None]:
summarizer(
    """
    In this work, we presented the Transformer, the first sequence transduction model based entirely on attention, 
    replacing the recurrent layers most commonly used in encoder-decoder architectures with multi-headed self-attention. 
    For translation tasks, the Transformer can be trained significantly faster than architectures based on recurrent or convolutional layers. 
    On both WMT 2014 English-to-German and WMT 2014 English-to-French translation tasks, we achieve a new state of the art. 
    In the former task our best model outperforms even all previously reported ensembles.
    """
)

In [None]:
summarizer(
    '''
    Large language models (LLM) are very large deep learning models that are pre-trained on vast amounts of data. 
    The underlying transformer is a set of neural networks that consist of an encoder and a decoder with self-attention capabilities. 
    The encoder and decoder extract meanings from a sequence of text and understand the relationships between words and phrases in it.
    Transformer LLMs are capable of unsupervised training, although a more precise explanation is that transformers perform self-learning. 
    It is through this process that transformers learn to understand basic grammar, languages, and knowledge.
    Unlike earlier recurrent neural networks (RNN) that sequentially process inputs, transformers process entire sequences in parallel. 
    This allows the data scientists to use GPUs for training transformer-based LLMs, significantly reducing the training time.
    '''
)

#### Audio 音频处理任务

In [None]:
#### Audio classification

#依赖包安装
# $apt update & opt upgrade
# $apt install -y ffmpeg
# $pip install ffmpeg ffmpeg-python

In [None]:
# classifier = pipeline(task="audio-classification", model="superb/hubert-base-superb-er")
# audeering/wav2vec2-large-robust-12-ft-emotion-msp-dim
classifier = pipeline(task="audio-classification", model="audeering/wav2vec2-large-robust-12-ft-emotion-msp-dim") #165M

In [None]:

#使用Hugging Face Datasets上的测试文件
preds = classifier("https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac")
preds = [{"score": round(pred['score'], 4), "label": pred['label']} for pred in preds]
preds

In [None]:
# 使用本地的音频文件做测试
preds = classifier("data/audio/mlk.flac")
preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
preds

In [None]:
### Automatic speech recognition（ASR 自动语音识别）

# 使用OpenAI Whisper Small 模型实现ASR的pipeline api 示例

#transcriber = pipeline(task="automatic-speech-recognition", model="openai/whisper-small")
transcriber = pipeline(task="automatic-speech-recognition", model="openai/whisper-large-v3-turbo") 
#openai/whisper-tiny 37.8M openai/whisper-large-v3-turbo 809M

In [None]:

text = transcriber("data/audio/mlk.flac")
text

#### Computer Vision计算机视觉

In [None]:
### Image Classification 图像分类

#classifier = pipeline(task="image-classification")
classifier = pipeline(task="image-classification", model= "Falconsai/nsfw_image_detection")#85.8M

In [None]:

#使用hugging face上的图片
preds = classifier(
    "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
)
preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
print(*preds, sep="\n")

In [None]:
# 使用本地图片（狼猫）
preds = classifier(
    "data/image/cat-chonk.jpeg"
)
preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
print(*preds, sep="\n")

In [None]:
# 使用本地图片（熊猫）
preds = classifier(
    "data/image/panda.jpg"
)
preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
print(*preds, sep="\n")

In [None]:
### Object Detection

#依赖包安装
!pip install timm

In [None]:
#detector = pipeline(task="object-detection")
detector = pipeline(task="object-detection", model="microsoft/table-transformer-detection")#28.8M

In [None]:

preds = detector(
    "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
)
preds = [{"score": round(pred["score"], 4), "label": pred["label"], "box": pred["box"]} for pred in preds]
preds

In [None]:
preds = detector(
    "data/image/cat_dog.jpg"
)
preds = [{"score": round(pred["score"], 4), "label": pred["label"], "box": pred["box"]} for pred in preds]
preds