In [15]:
type_num=16
cuda_num = str(3)
date='12.02'
trainingSet_path0= "/data/fuwen/SuWen/Bert-classification/data/task2/train_"+date+".txt"
valSet_path0= "/data/fuwen/SuWen/Bert-classification/data/task2/test_"+date+".txt"
zeng_path0= "../data/sentiment/quan_9/zeng.txt"
model_save_path='../result2/classifier_'+date
type_=['ICT', '新能源汽车', '生物医药', '医疗器械', '钢铁', 
       '能源', '工业机器人', '先进轨道交通', '其他', '汽车', 
       '数控机床', '工业软件', '高端装备', '半导体', '人工智能', '稀土']

In [16]:
"""
手动实现transformer.models.bert.BertForSequenceClassification()函数
根据论文[How to Fine-Tune BERT for Text Classification（2019）](https://www.aclweb.org/anthology/P18-1031.pdf)
在分类问题上，把最后四层进行concat然后maxpooling 输出的结果会比直接输出最后一层的要好
这里进行实现测试

"""
import torch
import torch.nn as nn
from transformers import BertModel,BertTokenizer
import torch.nn.functional as F


class bert_lr_last4layer_Config(nn.Module):
    def __init__(self):
        self.bert_path = "../chinese-bert-wwm"
        self.config_path = "../chinese-bert-wwm/config.json"

        # self.tokenizer = BertTokenizer.from_pretrained(self.bert_path)
        self.hidden_size = 768
        self.num_labels = type_num
        # self.dropout_bertout = 0.2
        self.dropout_bertout = 0.5
        self.mytrainedmodel = "../result/bert_clf_model.bin"
        """
        current loss: 0.4363991916179657 	 current acc: 0.8125
        current loss: 0.1328232882924341 	 current acc: 0.9527363184079602
        current loss: 0.11797185830000853 	 current acc: 0.9585411471321695
        train loss:  0.11880445411248554 	 train acc: 0.9583704495516361
        valid loss:  0.1511497257672476 	 valid acc: 0.9431549028896258
        """

class bert_lr_last4layer(nn.Module):

    def __init__(self,config):
        super(bert_lr_last4layer, self).__init__()
        self.bert = BertModel.from_pretrained(config.bert_path,config = config.config_path)
        self.dropout_bertout = nn.Dropout(config.dropout_bertout)
        self.num_labels = config.num_labels
        self.classifier = nn.Linear(config.hidden_size, config.num_labels)

        for param in self.bert.parameters():
            param.requires_grad = True

    def forward(
        self,
        input_ids=None,
        attention_mask=None,
        token_type_ids=None,
        position_ids=None,
        head_mask=None,
        inputs_embeds=None,
        labels=None,
        output_attentions=None,
        output_hidden_states=True,
        return_dict=None,
    ):
        outputs = self.bert(
            input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            position_ids=position_ids,
            head_mask=head_mask,
            inputs_embeds=inputs_embeds,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
        )

        # outputs = outputs[2] # [1]是pooled的结果 # [3]是hidden_states 12层
        hidden_states = outputs.hidden_states
        nopooled_output = torch.cat((hidden_states[9],hidden_states[10],hidden_states[11],hidden_states[12]),1)
        batch_size = nopooled_output.shape[0] # 32
        # print(batch_size)
        # print(nopooled_output.shape) # torch.Size([32, 400, 768])
        kernel_hight = nopooled_output.shape[1]
        pooled_output = F.max_pool2d(nopooled_output,kernel_size = (kernel_hight,1))
        # print(pooled_output.shape) # torch.Size([32, 1, 768])

        flatten = pooled_output.view(batch_size,-1)
        # print(flatten.shape) # [32,768]

        flattened_output = self.dropout_bertout(flatten)

        logits = self.classifier(flattened_output)

        loss = None
        if labels is not None:
            if self.num_labels == 1:
                #  We are doing regression
                loss_fct = nn.MSELoss()
                loss = loss_fct(logits.view(-1), labels.view(-1))
            else:
                loss_fct = nn.CrossEntropyLoss()
                loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))

        if not return_dict:
            output = (logits,) + outputs[2:]
            return ((loss,) + output) if loss is not None else output

        return loss,logits

In [17]:
class Config(object):
    def __init__(self):
        self.config_dict = {
            "data_path": {
                # "trainingSet_path": "../data/sentiment/sentiment.train0.data",
                # "valSet_path": "../data/sentiment/sentiment.valid0.data",
                "trainingSet_path": trainingSet_path0,
                "valSet_path": valSet_path0,
                "testingSet_path": "../data/sentiment/sentiment.test0.data",
                "zeng_path": zeng_path0
            },

            "BERT_path": {
                "file_path": '../chinese-bert-wwm/',
                "config_path": '../chinese-bert-wwm/',
                "vocab_path": '../chinese-bert-wwm/',
            },

            "training_rule": {
                "max_length": 300,  # 输入序列长度，别超过512
                "hidden_dropout_prob": 0.3,
                "num_labels": type_num,  # 几分类个数
                "learning_rate": 1e-5,
                "weight_decay": 1e-2,
                "batch_size": 16
            },

            "result": {
                "model_save_path": '../result/bert_clf_model.bin',
                "config_save_path": '../result/bert_clf_config.json',
                "vocab_save_path": '../result/bert_clf_vocab.txt'
            }
        }

    def get(self, section, name):
        return self.config_dict[section][name]

In [18]:
import torch
from torch.utils.data import Dataset
import pandas as pd

class SentimentDataset(Dataset):
    def __init__(self, path_to_file):
#         print(path_to_file)
        self.dataset = pd.read_csv(path_to_file, sep="\t", names=["text", "label"])
    def __len__(self):
        return len(self.dataset)
    def __getitem__(self, idx):
        text = self.dataset.loc[idx, "text"]
        label = self.dataset.loc[idx, "label"]
        sample = {"text": text, "label": label}
        # print(sample)
        return sample

def convert_text_to_ids(tokenizer, text, max_len=100):
    if isinstance(text, str):
        tokenized_text = tokenizer.encode_plus(text, max_length=max_len, add_special_tokens=True, truncation=True)
        input_ids = tokenized_text["input_ids"]
        token_type_ids = tokenized_text["token_type_ids"]
    elif isinstance(text, list):
        input_ids = []
        token_type_ids = []
        for t in text:
            tokenized_text = tokenizer.encode_plus(t, max_length=max_len, add_special_tokens=True, truncation=True)
            input_ids.append(tokenized_text["input_ids"])
            token_type_ids.append(tokenized_text["token_type_ids"])
    else:
        print("Unexpected input")
    return input_ids, token_type_ids

def seq_padding(tokenizer, X):
    pad_id = tokenizer.convert_tokens_to_ids("[PAD]")
    if len(X) <= 1:
        return torch.tensor(X)
    L = [len(x) for x in X]
    ML = max(L)
    X = torch.Tensor([x + [pad_id] * (ML - len(x)) if len(x) < ML else x for x in X])
    return X

In [19]:
import random

import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import numpy as np
from sklearn import metrics
import numpy
import transformers
from transformers import AdamW

In [20]:
class transformers_bert_binary_classification(object):
    def __init__(self):
        self.config = Config()
        self.device_setup()

    def device_setup(self):
        """
        设备配置并加载BERT模型
        :return:
        """
        self.freezeSeed()
        # 使用GPU，通过model.to(device)的方式使用
        device_s = "cuda:" + cuda_num
        self.device = torch.device(device_s if torch.cuda.is_available() else "cpu")

        # import os
        # result_dir = "../result"
        # MODEL_PATH = self.config.get("BERT_path", "file_path")
        # config_PATH = self.config.get("BERT_path", "config_path")
        vocab_PATH = self.config.get("BERT_path", "vocab_path")

        # num_labels = self.config.get("training_rule", "num_labels")
        # hidden_dropout_prob = self.config.get("training_rule", "hidden_dropout_prob")

        # 通过词典导入分词器
        self.tokenizer = transformers.BertTokenizer.from_pretrained(vocab_PATH)
        # self.model_config = BertConfig.from_pretrained(config_PATH, num_labels=num_labels,
        #                                                hidden_dropout_prob=hidden_dropout_prob)
        # self.model = BertForSequenceClassification.from_pretrained(MODEL_PATH, config=self.model_config)
        """
        train loss:  0.10704718510208534 	 train acc: 0.9637151849872321
        valid loss:  0.17820182011222863 	 valid acc: 0.9459971577451445
        """
        # 如果想换模型，换成下边这句子
        # bert+lr 跟官方方法差不都
        # self.model = bert_lr(bert_lr_Config())
        # self.model = bert_cnn(bert_cnn_Config())
        self.model = bert_lr_last4layer(bert_lr_last4layer_Config())

        self.model.to(self.device)

    def model_setup(self, zeng=0):
        weight_decay = self.config.get("training_rule", "weight_decay")
        learning_rate = self.config.get("training_rule", "learning_rate")
        print("**model_setup:")
        print("zeng",zeng)
        if zeng == 1:
            learning_rate = learning_rate * 2
        # 定义优化器和损失函数
        # Prepare optimizer and schedule (linear warmup and decay)
        no_decay = ['bias', 'LayerNorm.weight']
        optimizer_grouped_parameters = [
            {'params': [p for n, p in self.model.named_parameters() if not any(nd in n for nd in no_decay)],
             'weight_decay': weight_decay},
            {'params': [p for n, p in self.model.named_parameters() if any(nd in n for nd in no_decay)],
             'weight_decay': 0.0}
        ]
        self.optimizer = AdamW(optimizer_grouped_parameters, lr=learning_rate)
        self.criterion = nn.CrossEntropyLoss(weight=torch.from_numpy(np.array([2.980574324324324, 1.0, 4.089223638470452, 9.748618784530386, 7.02988047808765, 2.204247345409119, 2.892622950819672, 12.694244604316546, 16.966346153846153, 3.529, 3.529, 3.529, 11.80267558528428, 8.503614457831326, 4.433417085427136, 3.529])).float())
        self.criterion.to(self.device)

    def get_data(self):
        """
        读取数据
        :return:
        """
        train_set_path = self.config.get("data_path", "trainingSet_path")
        valid_set_path = self.config.get("data_path", "valSet_path")
        batch_size = self.config.get("training_rule", "batch_size")
        zeng_set_path = self.config.get("data_path", "zeng_path")
        print(train_set_path,valid_set_path,batch_size,zeng_set_path)

        # 数据读入
        # 加载数据集
        sentiment_train_set = SentimentDataset(train_set_path)
        sentiment_train_loader = DataLoader(sentiment_train_set, batch_size=batch_size, shuffle=True, num_workers=2)
        sentiment_valid_set = SentimentDataset(valid_set_path)
        sentiment_valid_loader = DataLoader(sentiment_valid_set, batch_size=batch_size, shuffle=False, num_workers=2)

        sentiment_zeng_set = SentimentDataset(zeng_set_path)
        sentiment_zeng_loader = DataLoader(sentiment_zeng_set, batch_size=batch_size, shuffle=True, num_workers=2)

        return sentiment_train_loader, sentiment_valid_loader, sentiment_zeng_loader

    def train_an_epoch(self, iterator, zeng=0):
        print("**train_an_epoch")
        print("zeng",zeng)
        self.model_setup(zeng)
        epoch_loss = 0
        epoch_acc = 0

        for i, batch in enumerate(iterator):
            label = batch["label"]
            text = batch["text"]
            # print(label)
            input_ids, token_type_ids = convert_text_to_ids(self.tokenizer, text)
            input_ids = seq_padding(self.tokenizer, input_ids)
            token_type_ids = seq_padding(self.tokenizer, token_type_ids)
            # 标签形状为 (batch_size, 1)
            label = label.unsqueeze(1)
            # 需要 LongTensor
            input_ids, token_type_ids, label = input_ids.long(), token_type_ids.long(), label.long()
            # 梯度清零
            self.optimizer.zero_grad()
            # 迁移到GPU
            input_ids, token_type_ids, label = input_ids.to(self.device), token_type_ids.to(self.device), label.to(
                self.device)
            output = self.model(input_ids=input_ids, token_type_ids=token_type_ids, labels=label)  # 这里不需要labels
            # BertForSequenceClassification的输出loss和logits
            # BertModel原本的模型输出是last_hidden_state，pooler_output
            # bert_cnn的输出是[batch_size, num_class]
            # print(numpy.array(torch.tensor(output).cpu()).shape)

            y_pred_prob = output[1]
            y_pred_label = y_pred_prob.argmax(dim=1)

            # 计算loss
            # 这个 loss 和 output[0] 是一样的
            loss = self.criterion(y_pred_prob.view(-1, type_num), label.view(-1))  # 多分类改这里
            # loss = output[0]
            # 计算acc
            acc = ((y_pred_label == label.view(-1)).sum()).item()
            # 反向传播
            loss.backward()
            self.optimizer.step()
            # epoch 中的 loss 和 acc 累加
            epoch_loss += loss.item()
            epoch_acc += acc
            if i % 200 == 0:
                print("current loss:", epoch_loss / (i + 1), "\t", "current acc:", epoch_acc / ((i + 1) * len(label)))
        return epoch_loss / len(iterator), epoch_acc / len(iterator.dataset.dataset)

    def evaluate(self, iterator):
        self.model.eval()
        epoch_loss = 0
        epoch_acc = 0
        y_pred_label_all = []
        label_all = []
        with torch.no_grad():
            for _, batch in enumerate(iterator):
                label = batch["label"]
                text = batch["text"]

                input_ids, token_type_ids = convert_text_to_ids(self.tokenizer, text)
                input_ids = seq_padding(self.tokenizer, input_ids)
                token_type_ids = seq_padding(self.tokenizer, token_type_ids)
                label = label.unsqueeze(1)
                input_ids, token_type_ids, label = input_ids.long(), token_type_ids.long(), label.long()
                input_ids, token_type_ids, label = input_ids.to(self.device), token_type_ids.to(self.device), label.to(
                    self.device)
                output = self.model(input_ids=input_ids, token_type_ids=token_type_ids, labels=label)
                # 更改了以下部分
                # y_pred_label = output[1].argmax(dim=1)
                y_pred_prob = output[1]
                y_pred_label = y_pred_prob.argmax(dim=1)
                loss = output[0]
                # loss = self.criterion(y_pred_prob.view(-1, 2), label.view(-1))
                acc = ((y_pred_label == label.view(-1)).sum()).item()
                y_pred_label_all += y_pred_label.tolist()
                label_all += label.view(-1).tolist()

                epoch_loss += loss.item()
                epoch_acc += acc

        print(metrics.classification_report(y_pred_label_all, label_all))
        print("准确率:", metrics.accuracy_score(y_pred_label_all, label_all))
        return epoch_loss / len(iterator), epoch_acc / len(iterator.dataset.dataset)

    def train(self, epochs, zeng=0):
        sentiment_train_loader, sentiment_valid_loader, sentiment_zeng_loader = self.get_data()

        for i in range(epochs):
            print('____________________________________________________________________________________')
            print('____________________________________________________________________________________')
            print('epochs:', i)
            print('____________________________________________________________________________________')
            print('____________________________________________________________________________________')
            print('____train____')
            if zeng == 0:
                train_loss, train_acc = self.train_an_epoch(sentiment_train_loader)
            else:
                train_loss, train_acc = self.train_an_epoch(sentiment_zeng_loader, 1)
            print("train loss: ", train_loss, "\t", "train acc:", train_acc)
            print('____evaluate____')
            valid_loss, valid_acc = self.evaluate(sentiment_valid_loader)
            print("valid loss: ", valid_loss, "\t", "valid acc:", valid_acc)
        # self.save_model()

    def save_model(self):
        model_save_path = self.config.get("result", "model_save_path")
        config_save_path = self.config.get("result", "config_save_path")
        vocab_save_path = self.config.get("result", "vocab_save_path")

        model_to_save = self.model.module if hasattr(self.model, 'module') else self.model
        torch.save(model_to_save.state_dict(), model_save_path)
        # model_to_save.config.to_json_file(config_save_path) # !!!'bert_lr' object has no attribute 'config'
        # self.tokenizer.save_vocabulary(vocab_save_path)
        print("model saved...")

    def predict(self, sentence):
        # self.model.setup()
        self.model_setup()
        self.model.eval()
        # 转token后padding
        input_ids, token_type_ids = convert_text_to_ids(self.tokenizer, sentence)
        input_ids = seq_padding(self.tokenizer, [input_ids])
        token_type_ids = seq_padding(self.tokenizer, [token_type_ids])
        # 需要 LongTensor
        input_ids, token_type_ids = input_ids.long(), token_type_ids.long()
        # 梯度清零
        self.optimizer.zero_grad()
        # 迁移到GPU
        input_ids, token_type_ids = input_ids.to(self.device), token_type_ids.to(self.device)
        output = self.model(input_ids=input_ids, token_type_ids=token_type_ids)
        # y_pred_prob:各个类别的概率
        y_pred_prob = output[0]
        # 取概率最大的标签
        y_pred_label = y_pred_prob.argmax(dim=1)

        # 将torch.tensor转换回int形式
        return y_pred_prob, y_pred_label.item()

    def freezeSeed(self):
        seed = 1
        torch.manual_seed(seed)
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        np.random.seed(seed)  # Numpy module.
        random.seed(seed)  # Python random module.
        torch.manual_seed(seed)
        torch.backends.cudnn.benchmark = False
        torch.backends.cudnn.deterministic = True

In [21]:
classifier = transformers_bert_binary_classification()
classifier.train(3,zeng=0)

/data/fuwen/SuWen/Bert-classification/data/task2/train_12.02.txt /data/fuwen/SuWen/Bert-classification/data/task2/test_12.02.txt 16 ../data/sentiment/quan_9/zeng.txt
____________________________________________________________________________________
____________________________________________________________________________________
epochs: 0
____________________________________________________________________________________
____________________________________________________________________________________
____train____
**train_an_epoch
zeng 0
**model_setup:
zeng 0
current loss: 3.1111135482788086 	 current acc: 0.125
current loss: 2.7351562223624235 	 current acc: 0.1806592039800995
current loss: 2.0994939279377904 	 current acc: 0.3782730673316708
current loss: 1.779056809656632 	 current acc: 0.4728577371048253
train loss:  1.7019093138372112 	 train acc: 0.4950838093454443
____evaluate____
                      precision    recall  f1-score   support

-9223372036854775808      

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


current loss: 1.998249888420105 	 current acc: 0.5625
current loss: 0.773925553136204 	 current acc: 0.7521766169154229
current loss: 0.7508814432600193 	 current acc: 0.7607543640897756
current loss: 0.7381292324940892 	 current acc: 0.7664309484193012
train loss:  0.72553422656691 	 train acc: 0.7690795018260137
____evaluate____
                      precision    recall  f1-score   support

-9223372036854775808       0.00      0.00      0.00         0
                   0       0.70      0.61      0.65       387
                   1       0.82      0.89      0.85       951
                   2       0.95      0.97      0.96       241
                   3       0.91      0.89      0.90       110
                   4       0.99      0.97      0.98       154
                   5       0.82      0.92      0.87       463
                   6       0.77      0.76      0.77       393
                   7       0.95      0.88      0.91        83
                   8       0.47      0.36     

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


current loss: 0.4017607867717743 	 current acc: 0.8125
current loss: 0.4699749868381676 	 current acc: 0.8451492537313433
current loss: 0.45230280880135787 	 current acc: 0.8497506234413965
current loss: 0.4426940717236968 	 current acc: 0.850769550748752
train loss:  0.44067685820251523 	 train acc: 0.8521397134563161
____evaluate____
                      precision    recall  f1-score   support

-9223372036854775808       0.00      0.00      0.00         0
                   0       0.68      0.69      0.69       335
                   1       0.78      0.90      0.84       898
                   2       0.97      0.96      0.96       247
                   3       0.95      0.80      0.87       129
                   4       0.99      0.95      0.97       157
                   5       0.80      0.93      0.86       444
                   6       0.83      0.75      0.78       428
                   7       0.96      0.85      0.90        87
                   8       0.50      0.35

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [23]:
classifier.train(1,zeng=0)

/data/fuwen/SuWen/Bert-classification/data/task2/train_12.02.txt /data/fuwen/SuWen/Bert-classification/data/task2/test_12.02.txt 16 ../data/sentiment/quan_9/zeng.txt
____________________________________________________________________________________
____________________________________________________________________________________
epochs: 0
____________________________________________________________________________________
____________________________________________________________________________________
____train____
**train_an_epoch
zeng 0
**model_setup:
zeng 0
current loss: 0.113947793841362 	 current acc: 0.9375
current loss: 0.2438846579312685 	 current acc: 0.9126243781094527
current loss: 0.23807453212242322 	 current acc: 0.9155236907730673
current loss: 0.24703491033177308 	 current acc: 0.9126455906821963
train loss:  0.24479852008081215 	 train acc: 0.9129131941192996
____evaluate____
                      precision    recall  f1-score   support

-9223372036854775808  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [24]:
print("model saving...")
torch.save(classifier, model_save_path)
print("model saved")

model saving...
model saved


In [27]:
import torch
import jieba

device = torch.device("cpu")

classifier= torch.load(model_save_path,map_location=device)
classifier.device=torch.device('cpu')
ty = ['ICT', '新能源汽车', '生物医药', '医疗器械', '钢铁', 
       '能源', '工业机器人', '先进轨道交通', '其他', '汽车', 
       '数控机床', '工业软件', '高端装备', '半导体', '人工智能', '稀土']
# print(classifier1.predict("『巴西』圣保罗城际铁路听证会延期至10月15日"))  # 0
# print(classifier1.predict("永恒力叉车入驻京东工业品 载重2吨的叉车设备也能线上采购"))  # 0

def read_list(text_path):
    lsit = []
    with open('%s' % text_path, 'r', encoding="utf8") as f:  # 打开一个文件只读模式
        line = f.readlines()  # 读取文件中的每一行，放入line列表中
        for line_list in line:
            lsit.append(line_list.replace('\n', ''))
    return lsit


def test():
    test_list = read_list('test.txt')
    for i in test_list:
        re = classifier.predict(i)  # 0
        result1 = re[1]
        result2 = re[0].tolist()

        if result2[0][result1] < 3.5:
            print(i, '\n', result1, '***** 其他 ***** 原预测:',ty[result1], result2[0][result1], '\n', result2[0], '\n')
        else:
            print(i, '\n', result1, ty[result1], result2[0][result1], '\n', result2[0], '\n')

test()

**model_setup:
zeng 0
e络盟社区发布新一期3D打印电子书 
 10 数控机床 4.260926246643066 
 [0.03308815136551857, 1.1343685388565063, -2.8630611896514893, -1.061619758605957, -3.376024007797241, -0.10399092733860016, -0.005142254754900932, -2.2104415893554688, 1.009634017944336, -0.8443334698677063, 4.260926246643066, 1.6069154739379883, 2.4495534896850586, -1.3922654390335083, -0.8762305378913879, -0.5523805618286133] 

**model_setup:
zeng 0
2021中国国际消费电子博览会和青岛国际软件融合创新博览会开幕 
 8 其他 3.5968732833862305 
 [0.02243836410343647, 0.33484792709350586, -1.9435091018676758, -0.9808441400527954, -1.961703896522522, 0.4833490550518036, 0.20244036614894867, -1.174248218536377, 3.5968732833862305, -2.058866262435913, 3.4826254844665527, -0.3917062282562256, 3.377153158187866, -1.662366271018982, -2.0469601154327393, -0.8627790212631226] 

**model_setup:
zeng 0
4D打印软体机器人：打印出来即可工作 
 6 工业机器人 5.155308246612549 
 [0.6124314069747925, -0.9994395971298218, -2.608823537826538, -0.572187066078186, -2.851116895675659, -1.643765330

中联农机摆擂台，安徽农机驾驶高手技能大比武 
 12 高端装备 6.310936450958252 
 [-1.14996337890625, -0.3448808789253235, -2.7443904876708984, -1.6743855476379395, -1.6701724529266357, 0.46899375319480896, -0.03157568722963333, -0.22040753066539764, 1.818386197090149, -0.07824014872312546, 1.9036312103271484, 0.9213793873786926, 6.310936450958252, -1.106046438217163, -2.358062744140625, -0.12359105050563812] 

**model_setup:
zeng 0
中国商飞：6架C919国产大飞机正试验试飞，CR929研制进展顺利 
 12 ***** 其他 ***** 原预测: 高端装备 2.972586154937744 
 [0.09799832850694656, 1.2181470394134521, -1.8455843925476074, -0.9421917200088501, -1.4446443319320679, -0.8576133847236633, 0.3876481354236603, 0.3748096823692322, 2.133892774581909, -1.329843521118164, 2.0969600677490234, -1.5337951183319092, 2.972586154937744, -1.2033048868179321, -1.6863709688186646, -1.4079668521881104] 

**model_setup:
zeng 0
库克被曝曾斥资1010万美元在加州购买豪宅面积 
 0 ICT 5.308465957641602 
 [5.308465957641602, 2.325765609741211, -2.0592164993286133, -1.018647313117981, -3.04697847366333, -0.501

猪肉股逆势上涨 牧原股份大涨7% 反攻行情开启？ 
 8 其他 7.464602470397949 
 [0.13274729251861572, -0.06478330492973328, 0.16635847091674805, -0.14782588183879852, 0.022502567619085312, 0.7107023596763611, -0.24629977345466614, -1.7644445896148682, 7.464602470397949, -1.606689453125, -0.2960723638534546, -1.0465366840362549, -1.0477285385131836, -0.9971848130226135, -1.3768640756607056, -0.6237084865570068] 

**model_setup:
zeng 0
十大券商策略：节前仍是布局窗口 今年A股“第三股风”吹向哪些板块？ 
 8 其他 6.797500133514404 
 [0.1660422384738922, 0.28290578722953796, -0.8418099880218506, -1.439254641532898, 0.32249799370765686, 0.4828543961048126, -1.076156735420227, -1.737673044204712, 6.797500133514404, -1.6487292051315308, -0.14494922757148743, -1.414153814315796, -0.4987751245498657, 0.3326707184314728, -2.184338331222534, -0.17382337152957916] 

**model_setup:
zeng 0
三大稀土巨头联手“放大招” 行业格局要变天？ 
 15 稀土 7.974476337432861 
 [0.3104734718799591, 0.7887653112411499, -1.2506428956985474, -0.29095718264579773, -1.1850987672805786, 0.20510552823543549,

禾轩科技年产6万吨电解液材料项目将开工 投资逾亿元 
 12 ***** 其他 ***** 原预测: 高端装备 2.9709975719451904 
 [-0.901229202747345, 1.5871795415878296, -2.8921732902526855, -1.9376909732818604, -1.3693499565124512, 1.5924803018569946, -0.9909111261367798, -1.2069448232650757, -1.2967233657836914, -1.7944105863571167, 2.761965036392212, 0.597094714641571, 2.9709975719451904, 0.6267309784889221, -1.7781929969787598, 1.7479162216186523] 

**model_setup:
zeng 0
因芯片短缺，奔驰新车交付期或超一年 
 1 新能源汽车 4.801406383514404 
 [3.8593590259552, 4.801406383514404, -1.9758334159851074, -1.552641749382019, -2.888871669769287, 0.039338838309049606, -1.0738581418991089, -2.1797103881835938, -0.4314260482788086, -0.45684975385665894, 0.7785373330116272, -1.0632680654525757, -1.2751805782318115, 0.7179789543151855, -1.461222529411316, -0.06743216514587402] 

**model_setup:
zeng 0
马斯克：芯片短缺或于明年结束 
 0 ICT 4.8050713539123535 
 [4.8050713539123535, 3.6657047271728516, -1.8543058633804321, -2.014613151550293, -2.63649582862854, 0.24840253591537476, -0.81