In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer, StaticCache
import copy
import torch
from itertools import chain

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model_name = "Qwen/Qwen2.5-3B-Instruct-GPTQ-Int4"

model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(model_name)

  def forward(ctx, input, qweight, scales, qzeros, g_idx, bits, maxq):
  def backward(ctx, grad_output):
  @custom_fwd(cast_inputs=torch.float16)
CUDA extension not installed.
CUDA extension not installed.
`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.


In [3]:
prompt_cache = StaticCache(
    config=model.config,
    batch_size=1,
    max_cache_len=1024,
    device="cuda",
    dtype=torch.float16,
)

In [4]:
# sys_prompt = """根據以下情感元素定義：
# - "方面"是指使用者可以表達意見的產品或服務的特定特徵或屬性。方面詞為給定文字的子字串。
# - "意見"是指使用者對產品或服務的特定方面或特徵所表達的情緒或態度。意見詞為給定文字的子字串。
# - "情感極性"是指對產品或服務的特定方面或功能所表達的意見的正面、負面或中立程度，可用的極性包括 「正面」、「負面」和 「中性」。「中性」表示輕微正面或輕微負面。具有客觀情感極性的三元組應忽略。
# 請仔細遵循指示。確保方面詞彙在評論中被識別為完全匹配。確保評論中的意見詞彙是完全匹配的。確保情感極性來自可用的極性。
# 在給定的輸入文字（評論）中識別出所有情感元素及其對應的方面詞彙、意見詞彙和情感極性。以 Python 元組列表的格式提供您的回應：[("方面", "意見", "情感極性"), ...]。請注意，, ... 表示可能的話，列表中可能會有更多的元組，而且一定不能使用。"""

In [5]:
sys_prompt = """According to the following sentiment elements definition:
- The “aspect term” refers to a specific feature, attribute, or aspect of a product or service on which a user can express an opinion. Explicit aspect terms appear explicitly as a substring of the given text.
- The “opinion term” refers to the sentiment or attitude expressed by a user towards a particular aspect or feature of a product or service. Explicit opinion terms  appear explicitly as a substring of the given text.
- The “sentiment polarity” refers to the degree of positivity, negativity or neutrality expressed in the opinion towards a particular aspect or feature of a product or service, and the available polarities include: “positive”, “negative” and “neutral”. “neutral” means mildly positive or mildly negative. Triplets with objective sentiment polarity should be ignored. 
Please carefully follow the instructions. Ensure that aspect terms are recognized as exact matches in the review. Ensure that opinion terms are recognized as exact matches in the review. Ensure that sentiment polarities are from the available polarities. 
Recognize all sentiment elements with their corresponding aspect terms, opinion terms, and sentiment polarity in the given input text (review)."""
sys_prompt = """The output will be the aspect terms in the sentence followed by their describing words and sentiment polarity."""
# sys_prompt = """你是一个自然语言处理工程师，你的任务是根据给定的文本，识别文本中的情感元素。情感元素包括：方面术语、意见术语和情感极性。方面术语是指用户可以对产品或服务的特定特征、属性或方面发表意见的特定特征、属性或方面。明确的方面术语在给定文本中明确出现。意见术语是指用户对产品或服务的特定特征或方面表达的情感或态度。明确的意见术语在给定文本中明确出现。情感极性是指用户对产品或服务的特定特征或方面表达的情感或态度的积极性、消极性或中立性程度，可用的极性包括：“积极的”、“消极的”和“中性的”。“中性的”表示轻微的积极或轻微的消极。应忽略具有客观情感极性的三元组。请仔细遵循说明。确保方面术语在评论中被识别为确切匹配。确保意见术语在评论中被识别为确切匹配。确保情感极性来自可用的极性。在给定的输入文本（评论）中识别所有情感元素及其相应的方面术语、意见术语和情感极性。以Python元组列表的格式提供您的响应：’情感元素：[(“方面术语”，“意见术语”，“情感极性”)，...]’。请注意，“，...”表示列表中可能有更多的元组（如果适用），并且不得出现在答案中。确保响应中没有额外的文本。"""

# sys_prompt = """
# 执行一个情感三元组提取任务。给定一个句子，从中提取所有情感三元组，其中包括面向、观点和情感極性，并用Python元组列表表示:[(“方面术语”，“意见术语”，“情感极性”)，...]’。请注意，“，...”表示列表中可能有更多的元组（如果适用），并且不得出现在答案中。确保响应中没有额外的文本。。
# """

In [6]:
demstrantion_set = [
    ("很夠味起司也很香。", "起司:很夠味:positive,起司:很香:positive"),
    ("這款沙拉真是我的愛。", "沙拉:我的愛:positive"),
    ("但可惜熱炒的品質不穩定，且價格也不平價。", "熱炒的品質:不穩定:negative,價格:不平價:negative"),
    ("但裡面的肉吃起來柴柴的。", "肉:柴柴的:negative"),
    ("鮮奶油和水果則是中規中矩。", "鮮奶油:中規中矩:neutral,水果:中規中矩:neutral"),
    ("小菜都沒有雷，但也沒有太印象深刻。", "小菜:沒有雷:neutral,小菜:沒有太印象深刻:neutral"),
]

In [7]:
tmp = [[{"role": "user", "content": i}, {"role": "assistant", "content": o}] for i, o in demstrantion_set]
base_messages = [{"role": "system", "content": sys_prompt}]
base_messages.extend(chain.from_iterable(tmp))

In [8]:
def apply_template(user_input=None):
    messages = copy.deepcopy(base_messages)
    if user_input:
        messages.append({"role": "user", "content": user_input})
    prompt = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True if user_input else False,
    )
    return prompt

In [9]:
INITIAL_PROMPT = apply_template()
inputs_initial_prompt = tokenizer(INITIAL_PROMPT, return_tensors="pt").to(model.device)

In [10]:
with torch.no_grad():
    prompt_cache = model(
        **inputs_initial_prompt, past_key_values=prompt_cache
    ).past_key_values

In [11]:
prompts = [
    "麵線很好吃，但是價格有點貴。",
    "We have gone for dinner only a few times but the same great quality and service is given .",
]

In [12]:
responses = []
for prompt in prompts:
    prompt = apply_template(prompt)
    new_inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    past_key_values = copy.deepcopy(prompt_cache)
    outputs = model.generate(
        **new_inputs,
        past_key_values=past_key_values,
        max_new_tokens=128,
        do_sample=False,
        num_beams=1,
    )
    generated_ids = [
        output_ids[len(input_ids):] for input_ids, output_ids in zip(new_inputs.input_ids, outputs)
    ]

    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
    responses.append(response)



In [19]:
[aop.split(":") for r in responses for aop in r.split(",")]

[['麵線', '很好吃', 'positive'],
 ['價格', '有點貴', 'negative'],
 ['dinner', 'great quality', 'positive'],
 ['service', 'great', 'positive']]

In [15]:
responses[0].split(",")

['麵線:很好吃:positive', '價格:有點貴:negative']

In [20]:
[aop.split(":") for aop in responses[0].split(",")]

[['麵線', '很好吃', 'positive'], ['價格', '有點貴', 'negative']]