# pipelines

### text classification

In [None]:
from transformers import pipelines as pl

analyzer = pl.pipeline("sentiment-analysis", model="distilbert/distilbert-base-uncased-finetuned-sst-2-english")

inputs = ["Hello world", "I hate you", "It's a beautiful day"]
preds = analyzer(inputs)
for i, pred in enumerate(preds):
    print("sentence: ", inputs[i])
    print("inference: ", pred)

### token classification

In [None]:
from transformers import pipeline

ner = pipeline("ner", "dslim/bert-base-NER")

inputs = [
    "My name is Wolfgang and I live in Berlin"
]
outputs = ner(inputs)
for i, res in enumerate(outputs):
    print("%s\n%s" % (inputs[i], res[i]))

### question answering

In [None]:
from transformers import pipeline

# extractive QA
qa = pipeline("question-answering", model="uer/roberta-base-chinese-extractive-qa")

inputs = {
    "question": "囚徒的作者是谁",
    "context": "普希金从那里学习人民的语言，吸取了许多有益的养料，这一切对普希金后来的创作产生了很大的影响。这两年里，普希金创作了不少优秀的作品，如《囚徒》、《致大海》、《致凯恩》和《假如生活欺骗了你》等几十首抒情诗，叙事诗《努林伯爵》，历史剧《鲍里斯·戈都诺夫》，以及《叶甫盖尼·奥涅金》前六章。"
}
outputs = qa(inputs)
print(outputs)

### summarization

In [None]:
from transformers import pipeline

sum = pipeline("summarization", model="csebuetnlp/mT5_multilingual_XLSum")
input = "并查集是一种树型的数据结构，用于处理不相交集合的合并及查询问题，在使用中常常以森林来表示。"
print(sum(input))

### translation

In [None]:
from transformers import pipelines as pl

inputs = [
    "你好，世界", "今天是一个好天气", "手写数字识别",
    "将递归函数转换为非递归函数的一般方法是使用栈来模拟递归调用的过程。具体来说，可以将递归函数中的每个递归调用转换为将参数压入栈中，并在循环中取出栈顶元素进行处理，直到栈为空为止。",
]

model_name = "Helsinki-NLP/opus-mt-zh-en"
# model_name = "facebook/nllb-200-distilled-600M"
tr = pl.pipeline("translation", model=model_name)
res = tr(inputs, src_lang="zh", tgt_lang="eng_Latn")
for i in range(len(inputs)):
    print("%s\n%s\n"%(inputs[i], res[i]["translation_text"]))


In [None]:
from transformers import AutoTokenizer, TFAutoModelForSeq2SeqLM

model_name = "Helsinki-NLP/opus-mt-zh-en"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = TFAutoModelForSeq2SeqLM.from_pretrained(model_name)

inputs = [
    "你好，世界", "并查集是一种树型的数据结构，用于处理不相交集合的合并及查询问题，在使用中常常以森林来表示。",
]

tf_batch = tokenizer(inputs, padding=True, truncation=True, max_length=512, return_tensors="tf")
input_ids = tf_batch.input_ids
outputs = model.generate(input_ids, max_new_tokens=40, do_sample=True, top_k=30, top_p=0.95)

for i, output_ids in enumerate(outputs):
    print(inputs[i])
    print(tokenizer.decode(output_ids))
    print()
