In [7]:
import os

os.environ["HF_HOME"] = "/content/models/hf"

In [32]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

model_name = "Qwen/Qwen3-0.6B"
# model_name = "Qwen/Qwen3-1.7B"
# model_name = "Qwen/Qwen3-4B-Instruct-2507"

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    trust_remote_code=True,
    # torch_dtype=torch.float16,  # 使用 FP16 而不是 FP8
    torch_dtype="auto",
    device_map="auto",
)

In [33]:
# prepare the prompt

prompt_template = """
You are given an email ({input_text}) sent to XXX Cloud about a possible infringement complaint.

Your task is to analyze the email and output **only JSON** that fills the following schema.
Read the mail content to generate the summary and fill in each field as required.

{{
  "incident_id": "Extract the incident ID from the complaint, if present. Omit this field if not applicable.",
  "eff_sender_name": "Identify and provide the original sender's name if the email appears to be forwarded. Omit this field otherwise.",
  "eff_sender_mail": "Provide the original sender's email address if the email appears to be forwarded. Omit this field otherwise.",
  "eff_sent_time": "Include the sent time of the original email, converted to Beijing time in the format 'YYYY-MM-DD HH:MM:SS', if the email appears to be forwarded. Omit this field otherwise.",
  "summary": "Summarize the main complaint of the email, including the main points of the complaint and the requested actions. Include any relevant details about the complaint. Omit this field if the email is not a complaint.",
  "summary_zh": "Translate the summary into Chinese.",
  "infringe_ips": "List IP addresses that are accused of infringement, separated by commas. Omit this field if there are no IPs mentioned.",
  "infringe_domains": "List domain names accused of infringing on copyright, separated by commas. These domains, if they exist, should be a part of the infringing URLs mentioned in the mail.",
  "infringed_domains": "List domain names that hold the copyrighted content which is claimed to be infringed upon, separated by commas if multiple. Use this field for domains that are victims of copyright infringement.",
  "copyright_owner": "Provide the name of the copyright owner or the infringed party. Use abbreviations if possible, set to \"Unknown\" if not mentioned in the mail.",
  "spam": "Mark this field as true if the email is obviously not related to infringement, such as spam or marketing emails. Omit this field if the email is related to infringement.",
  "contact_email": "If a contact or reporter email is provided in the email, include it here. Omit this field if not applicable."
}}
"""

input_text = """
Subject: Urgent Copyright Infringement Notice - Incident ID 8888888 - Urgent live stream escalation for URL http://play1nm.aaaa.cn/live/ballbar_24690-1715195519.ts - The Union of European Football Associations (UEFA) - 11.11.11.11

Content:

**发件人:** bounce-md_30134155.663be85b.v1-08d5422e6de84195ad22066429622b97@mail.bbb.com代表Friend MTS
**已发送:** 2024年5月9日 5:02:19 (UTC+08:00)北京，重庆，香港特别行政区，乌鲁木齐
**收件人:** XXX_net_duty(CNOC值班帐户)
**抄送:** operations@bbb.com
**主题:** [Internet]Urgent Copyright Infringement Notice - Incident ID 8888888 - Urgent live stream escalation for URL http://play1nm.aaaa.cn/live/ballbar_24690-1715195519.ts - The Union of European Football Associations (UEFA) - 11.11.11.11

Dear Sir/Madam,

Please find below a copyright infringement notice sent to a site that is making use of streaming servers hosted within your infrastructure. The technical parameters for the stream are as follows, to enable you to trace the specific machine within your infrastructure.

ip address: 11.11.11.11
tcUrl: http://play1nm.aaaa.cn/live/ballbar_24690-1715195519.ts
Playpath:
pageUrl: http://play1nm.aaaa.cn/live/ballbar_24690-1715195519.ts
swfUrl:

We appreciate that the site itself may be hosted separately, and the domain may no longer resolve to one of your IP addresses, however at the time quoted we captured the stream from a stream server at the indicated IP address and current best information resolves this address to yourselves.
Please disable access to all such unauthorized materials of the copyright owner/holder and cease display and distribution immediately, via all means, of all proprietary materials of The Union of European Football Associations (UEFA).

Kind regards

ccc DDD

DDD MTS Limited
Email: uefamonitoring@bbb.com

---------- Forwarded Message ----------

Subject: Urgent Copyright Infringement Notice - The Union of European Football Associations (UEFA): Incident ID 8888888
Date: 2024-05-08 19:27:16

Dear Sir/Madam,

I am writing to you on behalf of our client, The Union of European Football Associations (UEFA).

Through our monitoring programme we have become aware that, on your website play1nm.aaaa.cn, there is copyrighted programming, images and/or hyperlinks, found through the following URL(s)/Channel(s):

URL: http://play1nm.aaaa.cn/live/ballbar_24690.m3u8
Incident ID: 8888888
Date seen (UTC): 2024-05-08 19:27:16

Your actions are an infringement of our client's rights; in particular they constitute copyright infringement under applicable legislation, including that relating to apparatus used for unauthorised reception of transmissions or to online service providers. As a result, your use of our client's copyright protected material is likely to divert custom from our client.

Our client takes infringement of their rights in this way very seriously. This is a serious matter causing, or likely to cause, significant loss to our client, and our client therefore requires that you immediately cease all such infringing and unlawful activity.

We hereby state that we have a good faith belief that the disputed use of the copyrighted material is not authorised by the copyright owner, its agent, or the law (e.g. as a fair use).

We hereby state that the information in this Notice is accurate and, under penalty of perjury, that our client is the owner, or authorised to act on behalf of the owner, of the copyright or of an exclusive right under the copyright that is allegedly infringed.

In the circumstances, our client requires you to remove the channels complained about immediately.

Our client reserves all of their rights in this matter.

Please confirm within 10 minutes by email to uefamonitoring@bbb.com, retaining our Incident ID in the subject line, that you have complied with the above.

Yours sincerely,

ccc DDD

DDD MTS Limited


Email: uefamonitoring@bbb.com
"""


In [34]:
# Inference

messages = [
    {"role": "user", "content": prompt_template.format(input_text=input_text)}
]

formatted_messages = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True,
    enable_thinking=False # Switches between thinking and non-thinking modes. Default is True.
)

tokenized_messages = tokenizer([formatted_messages], return_tensors="pt").to(model.device)

import time
start_time = time.time()

generated_ids = model.generate(
    **tokenized_messages,
    use_cache=False,
    max_new_tokens=32768
)
output_ids = generated_ids[0][len(tokenized_messages.input_ids[0]):].tolist()

end_time = time.time()
execution_time_minutes = (end_time - start_time) / 60
print(f"Generating completed in {execution_time_minutes:.2f} minutes.")

# parsing thinking content
try:
    # rindex finding 151668 (</think>)
    index = len(output_ids) - output_ids[::-1].index(151668)
except ValueError:
    index = 0

thinking_content = tokenizer.decode(output_ids[:index], skip_special_tokens=True).strip("\n")
content = tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip("\n")

print("thinking content:", thinking_content)
print("content:", content)


Generating completed in 0.30 minutes.
thinking content: 
content: ```json
{
  "incident_id": "8888888",
  "eff_sender_name": "bounce-md_30134155.663be85b.v1-08d5422e6de84195ad22066429622b97@mail.bbb.com",
  "eff_sender_mail": "bounce-md_30134155.663be85b.v1-08d5422e6de84195ad22066429622b97@mail.bbb.com",
  "eff_sent_time": "2024-05-09 5:02:19",
  "summary": "A copyright infringement notice was sent to XXX_net_duty(CNOC) and operations@bbb.com. The incident ID is 8888888. The URL http://play1nm.aaaa.cn/live/ballbar_24690-1715195519.ts is claimed to be infringing due to unauthorized content. The EU's UEFA is involved.",
  "summary_zh": "关于侵权通知的投诉内容：发件人是Friend MTS代表，发送时间为2024年5月9日，投诉内容涉及侵权。URL地址为http://play1nm.aaaa.cn/live/ballbar_24690-1715195519.ts，涉及未经授权的版权内容。投诉人要求立即停止侵权行为。",
  "infringe_ips": "",
  "infringe_domains": ["play1nm.aaaa.cn"],
  "infringed_domains": ["ballbar_24690-1715195519.ts"],
  "copyright_owner": "UEFA",
  "spam": false,
  "contact_email": "uefamonitoring@bbb.com"
}
