## 第三章作业

#### 替换课程示例代码中的模型（ https://github.com/DjangoPeng/LLM-quickstart/blob/main/transformers/pipelines.ipynb ） 对比不同模型在相同任务上的性能表现。

#### 你可以在 Hugging Face Models 中找到适合你的模型： https://huggingface.co/models

In [1]:
import os
from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
os.environ['HF_HOME'] = '/mnt/sda/hf'
os.environ['HF_HUB_CACHE'] = '/mnt/sda/hf/hub'
# os.environ['http_proxy'] = 'http://127.0.0.1:7890'
# os.environ['https_proxy'] = 'http://127.0.0.1:7890'

#### 1. 情感分析任务

In [3]:
pipe = pipeline(
    task='sentiment-analysis',
    model='lxyuan/distilbert-base-multilingual-cased-sentiments-student',
    return_all_scores=True
)



In [4]:
pipe('今儿上海可真冷啊')

[[{'label': 'positive', 'score': 0.11399053037166595},
  {'label': 'neutral', 'score': 0.22023959457874298},
  {'label': 'negative', 'score': 0.6657698750495911}]]

In [5]:
pipe("我觉得这家店蒜泥白肉的味道一般")

[[{'label': 'positive', 'score': 0.0725812315940857},
  {'label': 'neutral', 'score': 0.6030056476593018},
  {'label': 'negative', 'score': 0.3244131803512573}]]

In [6]:
pipe("你学东西真的好快，理论课一讲就明白了")

[[{'label': 'positive', 'score': 0.9461327791213989},
  {'label': 'neutral', 'score': 0.03845958411693573},
  {'label': 'negative', 'score': 0.015407565981149673}]]

In [7]:
text_list = [
    "Today Shanghai is really cold.",
    "I think the taste of the garlic mashed pork in this store is average.",
    "You learn things really quickly. You understand the theory class as soon as it is taught."
]

pipe(text_list)

[[{'label': 'positive', 'score': 0.09706173837184906},
  {'label': 'neutral', 'score': 0.12048669904470444},
  {'label': 'negative', 'score': 0.7824515104293823}],
 [{'label': 'positive', 'score': 0.3724641501903534},
  {'label': 'neutral', 'score': 0.25003641843795776},
  {'label': 'negative', 'score': 0.3774994909763336}],
 [{'label': 'positive', 'score': 0.7639099359512329},
  {'label': 'neutral', 'score': 0.15310528874397278},
  {'label': 'negative', 'score': 0.0829847902059555}]]

#### 2. NER任务

In [8]:
model_name = 'dslim/distilbert-NER'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForTokenClassification.from_pretrained(model_name)

ner = pipeline(task='ner', model=model, tokenizer=tokenizer)

In [9]:
preds = ner("Hugging Face is a French company based in New York City.")
preds = [
    {
        "entity": pred["entity"],
        "score": round(pred["score"], 4),
        "index": pred["index"],
        "word": pred["word"],
        "start": pred["start"],
        "end": pred["end"],
    }
    for pred in preds
]
print(*preds, sep="\n")

{'entity': 'LABEL_0', 'score': 0.7291, 'index': 1, 'word': 'Hu', 'start': 0, 'end': 2}
{'entity': 'LABEL_0', 'score': 0.7136, 'index': 2, 'word': '##gging', 'start': 2, 'end': 7}
{'entity': 'LABEL_4', 'score': 0.9664, 'index': 3, 'word': 'Face', 'start': 8, 'end': 12}
{'entity': 'LABEL_0', 'score': 0.9991, 'index': 4, 'word': 'is', 'start': 13, 'end': 15}
{'entity': 'LABEL_0', 'score': 0.9993, 'index': 5, 'word': 'a', 'start': 16, 'end': 17}
{'entity': 'LABEL_7', 'score': 0.9873, 'index': 6, 'word': 'French', 'start': 18, 'end': 24}
{'entity': 'LABEL_0', 'score': 0.9971, 'index': 7, 'word': 'company', 'start': 25, 'end': 32}
{'entity': 'LABEL_0', 'score': 0.9993, 'index': 8, 'word': 'based', 'start': 33, 'end': 38}
{'entity': 'LABEL_0', 'score': 0.9994, 'index': 9, 'word': 'in', 'start': 39, 'end': 41}
{'entity': 'LABEL_5', 'score': 0.9959, 'index': 10, 'word': 'New', 'start': 42, 'end': 45}
{'entity': 'LABEL_6', 'score': 0.9959, 'index': 11, 'word': 'York', 'start': 46, 'end': 50}
{'e

In [10]:
ner = pipeline(task='ner', model=model, tokenizer=tokenizer, grouped_entities=True)
ner("Hugging Face is a French company based in New York City.")



[{'entity_group': 'LABEL_0',
  'score': 0.7213408,
  'word': 'Hugging',
  'start': 0,
  'end': 7},
 {'entity_group': 'LABEL_4',
  'score': 0.9663961,
  'word': 'Face',
  'start': 8,
  'end': 12},
 {'entity_group': 'LABEL_0',
  'score': 0.99920726,
  'word': 'is a',
  'start': 13,
  'end': 17},
 {'entity_group': 'LABEL_7',
  'score': 0.98729867,
  'word': 'French',
  'start': 18,
  'end': 24},
 {'entity_group': 'LABEL_0',
  'score': 0.99861956,
  'word': 'company based in',
  'start': 25,
  'end': 41},
 {'entity_group': 'LABEL_5',
  'score': 0.99593264,
  'word': 'New',
  'start': 42,
  'end': 45},
 {'entity_group': 'LABEL_6',
  'score': 0.99578834,
  'word': 'York City',
  'start': 46,
  'end': 55},
 {'entity_group': 'LABEL_0',
  'score': 0.9983041,
  'word': '.',
  'start': 55,
  'end': 56}]