## datasets prepare

In [1]:
# check datasets 

import csv
csv.field_size_limit(500 * 1024 * 1024)

data_list = []
class_number = {'SPAM':0, 'EDM':0, 'HAM':0, 'NOTE':0, 'HACK':0}

with open('spam_data_append_v3.csv', newline='', encoding='utf-8') as csvfile:
    # count = 0
   
    rows = csv.reader(csvfile)
    for i, row in enumerate(rows):
        if row[2] in class_number.keys():
            pass
        else:
            print(i)

In [3]:
# load datasets

import csv
csv.field_size_limit(500 * 1024 * 1024)

data_list = []
class_number = {'SPAM':0, 'EDM':0, 'HAM':0, 'NOTE':0, 'HACK':0}

with open('spam_data_append.csv', newline='', encoding='utf-8') as csvfile:
    
    
   
    rows = csv.reader(csvfile)
    for i, row in enumerate(rows):
        # print(row[0])
        if i != 0:
            context = row[3].replace('Num','')
            class_number[row[2]]+=1
            data_list.append({'index': row[0], 
                              'md5sum': row[1],
                              'label':row[2], 
                              'context':context})

In [4]:
# number of data

len(data_list)

347700

In [5]:
# number of data for each class

print(class_number)

{'SPAM': 36796, 'EDM': 218647, 'HAM': 68952, 'NOTE': 22048, 'HACK': 1257}


In [6]:
data_list[3]

{'index': '4',
 'md5sum': '00010a27a02be1b98537cd22e44d40a4',
 'label': 'EDM',
 'context': 'Read email browser 再生能源 業者 農曆 五月 五日 端午 佳節 到來 古時 稱惡 惡日 這天 驅除 瘟疫 惡運 臺灣 近期 疫情 影響 藉由 機會 驅瘟 去疫 快速 回復 自由 活動 無拘無束 生活 能源 週 呼籲 居家 追劇 喫 肉糉 盡量 外出 常備 酒精 做好 消毒 防疫 臺灣 國際 智慧 能源 週 Energy Taiwan 實體 展     虛擬 展      Tel    Email emailAddress 訂閱 取消 訂閱 Subscribe Unsubscribe'}

In [35]:
# train : val : test = 8:1:1

from sklearn.model_selection import train_test_split

train_data, dev_test_data = train_test_split(data_list, random_state=777, train_size=0.8)
dev_data, test_data = train_test_split(dev_test_data, random_state=777, train_size=0.5)

In [36]:
print(len(train_data))
print(len(dev_data))
print(len(test_data))

278344
34793
34793


In [37]:
from common import save_jsonl

save_jsonl(train_data, 'datasets/train_v3.jsonl')
save_jsonl(dev_data, 'datasets/dev_v3.jsonl')
save_jsonl(test_data, 'datasets/test_v3.jsonl')

Save to Jsonl: datasets/train_v3.jsonl
Save to Jsonl: datasets/dev_v3.jsonl
Save to Jsonl: datasets/test_v3.jsonl


### Training

In [38]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [9]:
from torch.utils.data import Dataset, DataLoader
from datasets import load_metric
import torch.nn as nn
import torch
from tqdm.auto import tqdm
import random
from sklearn.metrics import accuracy_score, f1_score
from sklearn.metrics import classification_report
from common import load_jsonl, save_jsonl

from transformers import (
    AdamW,
    get_scheduler,
    BertTokenizer,
    AutoTokenizer,
    AutoModelForSequenceClassification,
    DataCollatorForTokenClassification
)

In [10]:
class NLIDataset(Dataset):
    def __init__(self, data_list, max_length=512, model_name="bert-base-multilingual-cased"):
        self.d_list = data_list
        self.len = len(self.d_list)
        self.max_length = max_length
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.label2index = {
            'SPAM': 0,
            'EDM': 1,
            'HAM': 2,
            'NOTE': 3,
            'HACK': 4,
        }

    def __getitem__(self, index):
        data = self.d_list[index]
        context = data['context']
        label = data['label']
        
        processed_sample = dict()
        processed_sample['labels'] = torch.tensor(self.label2index[label])
        tokenized_input = self.tokenizer(context,
                                         max_length=self.max_length,
                                         padding='max_length', 
                                         truncation=True,
                                         return_tensors="pt")
        
        input_items = {key: val.squeeze() for key, val in tokenized_input.items()}
        processed_sample.update(input_items)
        return processed_sample

    def __len__(self):
        return self.len

In [41]:
train_list = load_jsonl('datasets/train_v3.jsonl')
dev_list = load_jsonl('datasets/dev_v3.jsonl')

Load Jsonl: datasets/train_v3.jsonl


278344it [00:16, 16685.42it/s]


Load Jsonl: datasets/dev_v3.jsonl


34793it [00:02, 17263.23it/s]


In [42]:
train_dataset = NLIDataset(train_list)
dev_dataset = NLIDataset(dev_list)

In [43]:
train_list[0]

{'index': '182241',
 'md5sum': '8975620867bd9c129fcea8cfe71290fb',
 'label': 'EDM',
 'context': '親愛的 Email emailAddress 訂閱 生活 市集 電子報 封信 誤判為 垃圾信 點選 垃圾郵件 好康 天天 抽 機會 抽到 肯德基 蛋塔 熱門 銷售 排行榜 X TOP  熱銷 No  想給 優秀品質 兼具 親民 價格 衛生紙 非 生活 市集 莫屬 生活 市集 自有 品牌 抽取式 溶水 衛生紙 擁有 五大 保證 柔軟 觸感  原生 紙漿 含 螢光劑  臺灣 製造 溶於 水 衛生紙 搭配 可愛 夯吉 包裝 實用 療癒 物超所值   搶購  日本 幫寶適 特規 增量 尿布   搶購  Switch 主機 健身 環 遊戲組   搶購  Dyson Digital Slim 吸塵   搶購  白熊 溶水 抽取式 衛生紙   搶購  卜蜂 經典 佐 義法 嫩 雞胸肉   搶購  唯潔雅 優質 抽取式 衛生紙   搶購  康乃馨 Hi water 水 濕巾   搶購  水嫩 舒肥 雞胸 即食 隨手包   搶購  臺中 逢甲 夜市 炳 叔 烤 玉米   搶購 搶購 中 倒數  小時 熱銷 No  TZUMii 日式 空間 收納櫃 衣帽架 臺灣 製造 SGS 安心 認證 日式 沉穩 風格 獨家 設計 造型 把手 適合 居家 風格 超大 收納 空間 開放式 收納 置物 取物 一眼 明瞭 衣帽架 衣櫥 任選 高 CP值 收納 生活 環境 整齊 清爽   搶購  日本 大王 迪士尼 褲型 尿布   搶購  灰 絨毛 四層 爬架 貓 跳臺   搶購  復 易 防漏 成人 紙尿褲   搶購  抽屜式 分層 收納 隔板 架   搶購  Kiehl 激光 淡斑 精華   搶購  小妖 機全 摺疊 跑步機 M7   搶購  蝶型 防 駝背 脊椎 護腰 坐墊   搶購  無印 風 純白 抽屜 收納 層櫃   搶購  碎花 蝴蝶結 亞麻 手工 拖鞋   搶購 商品 嚴選 ON SALE 熱銷 No  日本 幫寶適 特規版 拉拉 褲 日本 原裝 進口 特製 吸收 層 快速 吸收 水分 屁屁 乾爽 舒適 穿 拉拉 褲 穿脫 穿 好動 寶寶 超自 孩子 選擇   搶購  泰凱 食堂 麻油 猴頭 杏鮑菇 

In [44]:
train_dataset[0]

{'labels': tensor(1),
 'input_ids': tensor([  101,  7150,  3910,  5718, 11289, 35240, 79515, 10738, 12969, 30743,
          7169,  8148,  5600,  4978,  3600,  8272,  8299,  3350,  3115,  3448,
          2316,  7224,  2549,  5287,  3066,  3039,  2316,  8809,  7779,  3066,
          3039,  7835,  2210,  3240,  3676,  3198,  3198,  4055,  4741,  4459,
          4055,  2555,  6516,  3789,  3099,  6980,  3124,  5351,  8129,  7959,
          2884,  4151,  7069,  4689,   161, 98151,  5351,  7959, 10657,  3898,
          6217,  2419,  5948,  2854,  7457,  2465,  2461,  7150,  4851,  2406,
          4580,  7075,  5600,  6192,  8332,  5600,  4978,  3600,  8272,  6776,
          3502,  5600,  4978,  3600,  8272,  6621,  4461,  2854,  5397,  4055,
          2737,  3709,  5122,  4867,  7075,  5600,  6192,  4245,  4461,  2155,
          3197,  2312,  7276,  4540,  7603,  7167,  3911,  2715,  5600,  6192,
          5177,  2793,  7041,  2432,  2593,  6625,  5255,  7120,  7740,  5122,
          4336,  

In [45]:
# note: 5 classes
model = AutoModelForSequenceClassification.from_pretrained("bert-base-multilingual-cased", num_labels=5)

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
if torch.cuda.device_count() >1:
    model = nn.DataParallel(model,device_ids=[0])
model.to(device)

Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model ch

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(119547, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elemen

In [46]:
train_batch_size=40
learning_rate=2e-5 
train_epochs=5

optimizer = AdamW(model.parameters(), lr=learning_rate)
train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=train_batch_size)
dev_dataloader = DataLoader(dev_dataset, shuffle=True, batch_size=train_batch_size)
print(len(train_dataloader))
print(len(dev_dataloader))

6959
870


In [47]:
for batch_index, batch_dict in enumerate(train_dataloader):
    print(batch_dict)
    break

{'labels': tensor([1, 1, 2, 1, 1, 3, 1, 1, 3, 1, 0, 2, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 2,
        1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2]), 'input_ids': tensor([[ 101, 6252, 2078,  ..., 2341, 5512,  102],
        [ 101, 8148, 7290,  ...,    0,    0,    0],
        [ 101, 5477, 3504,  ...,    0,    0,    0],
        ...,
        [ 101, 6252, 2078,  ...,    0,    0,    0],
        [ 101, 5351, 7454,  ...,    0,    0,    0],
        [ 101, 4280, 4780,  ...,    0,    0,    0]]), 'token_type_ids': tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1,  ..., 1, 1, 1],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        ...,
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0]])}


In [48]:
## 進度條
num_training_steps = train_epochs * len(train_dataloader)
progress_bar = tqdm(range(num_training_steps))

## 設定warmup
lr_scheduler = get_scheduler(
  "linear",
  optimizer=optimizer,
  num_warmup_steps=10,
  num_training_steps=num_training_steps
)

## start training
for epoch in range(train_epochs):
    model.train()
    for batch_index, batch_dict in enumerate(train_dataloader):
        
        input_items = {key: val.to(device) for key, val in batch_dict.items()}
#         del input_items['token_type_ids'] ## bart不需要這個
        
        optimizer.zero_grad()
        outputs = model(**input_items)
        
        loss = outputs.loss
        if torch.cuda.device_count() >1: ##多GPU的情況要對loss求平均
            loss = loss.mean()
        loss.backward()
        optimizer.step()
        lr_scheduler.step()
        progress_bar.update(1)
        
        if batch_index % 500 ==0:
            print('epoch: ', epoch, '  loss: ', loss)
            
    model.eval()
    predictions = []
    references = []
    with torch.no_grad():
        for batch_index, batch_dict in enumerate(dev_dataloader):
            input_items = {key: val.to(device) for key, val in batch_dict.items()}
            outputs = model(**input_items)

            predictions += outputs.logits.argmax(dim=-1).tolist()
            references += batch_dict['labels'].tolist()

    accuracy = accuracy_score(references, predictions)
    f1 = f1_score(references, predictions,average='macro')
    print('acc: ', accuracy)
    print('f1: ', f1)
    
    ## save model
    save_path = 'model/notice_v3_epoch_' + str(epoch+1)
    if torch.cuda.device_count() >1:
        model_to_save = model.module if hasattr(model, 'module') else model  # Take care of distributed/parallel training
        model_to_save.save_pretrained(save_path)
    else:
        model.save_pretrained(save_path)

  0%|          | 0/34795 [00:00<?, ?it/s]

epoch:  0   loss:  tensor(1.6502, device='cuda:0', grad_fn=<NllLossBackward0>)
epoch:  0   loss:  tensor(0.2305, device='cuda:0', grad_fn=<NllLossBackward0>)
epoch:  0   loss:  tensor(0.0289, device='cuda:0', grad_fn=<NllLossBackward0>)
epoch:  0   loss:  tensor(0.0947, device='cuda:0', grad_fn=<NllLossBackward0>)
epoch:  0   loss:  tensor(0.0056, device='cuda:0', grad_fn=<NllLossBackward0>)
epoch:  0   loss:  tensor(0.0221, device='cuda:0', grad_fn=<NllLossBackward0>)
epoch:  0   loss:  tensor(0.0394, device='cuda:0', grad_fn=<NllLossBackward0>)
epoch:  0   loss:  tensor(0.0721, device='cuda:0', grad_fn=<NllLossBackward0>)
epoch:  0   loss:  tensor(0.0175, device='cuda:0', grad_fn=<NllLossBackward0>)
epoch:  0   loss:  tensor(0.2245, device='cuda:0', grad_fn=<NllLossBackward0>)
epoch:  0   loss:  tensor(0.0274, device='cuda:0', grad_fn=<NllLossBackward0>)
epoch:  0   loss:  tensor(0.0938, device='cuda:0', grad_fn=<NllLossBackward0>)
epoch:  0   loss:  tensor(0.0391, device='cuda:0', g

# Test

In [19]:
from common import load_jsonl
test_list = load_jsonl('datasets/test_v2.jsonl')
test_dataset = NLIDataset(test_list)
test_dataloader = DataLoader(test_dataset, shuffle=False, batch_size=512)

Load Jsonl: datasets/test_v2.jsonl



0it [00:00, ?it/s][A
1528it [00:00, 15273.86it/s][A
3056it [00:00, 13965.51it/s][A
4793it [00:00, 15435.88it/s][A
6545it [00:00, 16233.21it/s][A
8178it [00:00, 16016.98it/s][A
9902it [00:00, 16416.35it/s][A
11559it [00:00, 16465.23it/s][A
13315it [00:00, 16809.40it/s][A
14999it [00:00, 16593.64it/s][A
16661it [00:01, 16599.27it/s][A
18358it [00:01, 16711.31it/s][A
20052it [00:01, 16779.38it/s][A
21731it [00:01, 16612.61it/s][A
23397it [00:01, 16626.13it/s][A
25061it [00:01, 16547.26it/s][A
26717it [00:01, 16543.47it/s][A
28372it [00:01, 16379.30it/s][A
30011it [00:01, 16137.34it/s][A
31626it [00:01, 15841.23it/s][A
34778it [00:02, 16188.17it/s][A


In [31]:
# note: 5 classes
model_path = 'model/notice_v3_epoch_5'
# model_path = 'models/Mail_Classifier/epoch_5'
model = AutoModelForSequenceClassification.from_pretrained(model_path, num_labels=5)
tokenizer = AutoTokenizer.from_pretrained("bert-base-multilingual-cased")  

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(119547, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elemen

In [32]:
model.eval()
predictions = []
references = []
num_steps = len(test_dataloader)
progress_bar = tqdm(range(num_steps))

with torch.no_grad():
    for batch_index, batch_dict in enumerate(test_dataloader):
        input_items = {key: val.to(device) for key, val in batch_dict.items()}
        outputs = model(**input_items)

        predictions += outputs.logits.argmax(dim=-1).tolist()
        references += batch_dict['labels'].tolist()
        progress_bar.update(1)

accuracy = accuracy_score(references, predictions)
f1 = f1_score(references, predictions,average='macro')
print('acc: ', accuracy)
print('f1: ', f1)

  0%|          | 0/68 [00:00<?, ?it/s]

acc:  0.9951981137500718
f1:  0.9814823947571488


In [33]:
print(model_path)
print(classification_report(references, predictions))

model/notice_v3_epoch_5
              precision    recall  f1-score   support

           0       0.98      0.98      0.98      3670
           1       1.00      1.00      1.00     21985
           2       1.00      1.00      1.00      6776
           3       1.00      1.00      1.00      2225
           4       0.93      0.94      0.93       122

    accuracy                           1.00     34778
   macro avg       0.98      0.98      0.98     34778
weighted avg       1.00      1.00      1.00     34778



## find out class 3 (note)

In [34]:
import pandas as pd
import operator
class_number = {'SPAM':0, 'EDM':1, 'HAM':2, 'NOT':3, 'HACKER':4}

count = 0
wrong = 0
right = 0
which_class = 3

print(len(references))

for i, class_num in enumerate(references):
    if class_num == which_class:
        count += 1
        if predictions[i] != which_class:
            wrong += 1
            # print(test_list[i]['context'])
        else:
            right += 1
    
print('in testdatasets , there are %d %s messages ~'%(count, [k for k, v in class_number.items() if v == which_class][0]))
print('the accurate of %s is : %.2f'%([k for k, v in class_number.items() if v == which_class][0], (right - wrong)/count))
       

        
# print('in testdatasets , there are %d notice messages ~')

# print(test_list[i]['context'])
# print(predictions[i])

34778
in testdatasets , there are 2225 NOT messages ~
the accurate of NOT is : 1.00


## find out class 4 (hacker)

In [35]:
import pandas as pd
import operator
class_number = {'SPAM':0, 'EDM':1, 'HAM':2, 'NOT':3, 'HACKER':4}

count = 0
wrong = 0
right = 0
which_class = 4

print(len(references))

for i, class_num in enumerate(references):
    if class_num == which_class:
        count += 1
        if predictions[i] != which_class:
            wrong += 1
            # print(test_list[i]['context'])
        else:
            right += 1
    
print('in testdatasets , there are %d %s messages ~'%(count, [k for k, v in class_number.items() if v == which_class][0]))
print('the accurate of %s is : %.2f'%([k for k, v in class_number.items() if v == which_class][0], (right - wrong)/count))
       


34778
in testdatasets , there are 122 HACKER messages ~
the accurate of HACKER is : 0.89


## predict from batch_data

In [65]:
import pandas as pd
import operator
class_number = {'SPAM':0, 'EDM':1, 'HAM':2, 'NOTE':3, 'HACKER':4}
model.eval()

softmax=torch.nn.Softmax()
csvfile = pd.read_csv('test_data/notice.csv')    # change the value up to the colab limit

predict_list = []
score_list = []
sm_score_list = []
for token in csvfile['context']:

    tokenized_input = tokenizer(token,
                                max_length=20,
                                truncation=True,
                                return_tensors="pt")
    with torch.no_grad():
        input_items = {key: val.to(device) for key, val in tokenized_input.items()}
#         del input_items['token_type_ids'] ## bart不需要這個

        outputs = model(**input_items)
        prediction = outputs.logits.argmax(dim=-1)

        prediction = int(prediction)
        sm = softmax(outputs.logits[0])

        predict_list.append(prediction)
        score_list.append(outputs.logits[0][prediction].item())
        sm_score_list.append(sm[prediction].item())
        
print(predict_list)




[3, 3, 3, 1, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3]


## predict from each sentence

In [64]:
test_num = 10

# subject = test_list[test_num]['subject']
context = test_list[test_num]['context']
label = test_list[test_num]['label']

tokenized_input = tokenizer(context,
                            max_length=512,
                            truncation=True,
                            return_tensors="pt")
class_number = {'SPAM':0, 'EDM':1, 'HAM':2, 'NOTE':3, 'KACKER':4}
model.eval()
with torch.no_grad():
    input_items = {key: val.to(device) for key, val in tokenized_input.items()}
    del input_items['token_type_ids'] ## bart不需要這個
    
    outputs = model(**input_items)
    prediction = outputs.logits.argmax(dim=-1)
    print(type(int(prediction)))
    
    
    # print('主旨: ', subject)
    print('內文: ', context)
    print('label: ', class_number[label], label)
    print('predict: ', int(prediction), list(class_number.keys())[list(class_number.values()).index(int(prediction))])
    
    # if int(prediction) == 0:
    #     print('predict: ham')
    # else:
    #     print('predict: spam')

<class 'int'>
內文:  EyeCloud  親愛的  林  淵博  設備  狀態  切換  資訊  設備  名稱  華府  總部  伺服器  型號  UR    隸屬  羣組  設備  狀態  切換  信件  EyeCloud  自動  發送  回覆  本信件  登入  EyeCloud  查看  管理員  謝謝  UrlText
label:  3 NOTE
predict:  3 NOTE
