In [2]:
!pip install transformers

Collecting transformers
[?25l  Downloading https://files.pythonhosted.org/packages/50/10/aeefced99c8a59d828a92cc11d213e2743212d3641c87c82d61b035a7d5c/transformers-2.3.0-py3-none-any.whl (447kB)
[K     |████████████████████████████████| 450kB 3.4MB/s 
Collecting sacremoses
[?25l  Downloading https://files.pythonhosted.org/packages/a6/b4/7a41d630547a4afd58143597d5a49e07bfd4c42914d8335b2a5657efc14b/sacremoses-0.0.38.tar.gz (860kB)
[K     |████████████████████████████████| 870kB 49.7MB/s 
Collecting sentencepiece
[?25l  Downloading https://files.pythonhosted.org/packages/74/f4/2d5214cbf13d06e7cb2c20d84115ca25b53ea76fa1f0ade0e3c9749de214/sentencepiece-0.1.85-cp36-cp36m-manylinux1_x86_64.whl (1.0MB)
[K     |████████████████████████████████| 1.0MB 39.7MB/s 
Building wheels for collected packages: sacremoses
  Building wheel for sacremoses (setup.py) ... [?25l[?25hdone
  Created wheel for sacremoses: filename=sacremoses-0.0.38-cp36-none-any.whl size=884629 sha256=79656a5eea2bf36365afb6

In [0]:
import argparse
import glob
import json
import logging
import os
import random

import numpy as np
import torch
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, TensorDataset
from torch.utils.data.distributed import DistributedSampler
from tqdm import tqdm, trange

In [4]:
from transformers import (
    WEIGHTS_NAME,
    AdamW,
    get_linear_schedule_with_warmup,
    BertConfig,
    BertModel,
    BertPreTrainedModel,
    BertTokenizer,)

In [0]:
from transformers import glue_compute_metrics as compute_metrics
from transformers import glue_convert_examples_to_features as convert_examples_to_features
from transformers import glue_output_modes as output_modes
from transformers import glue_processors as processors

In [0]:
from transformers.data.processors.utils import InputExample, DataProcessor

In [0]:
try:
    from torch.utils.tensorboard import SummaryWriter   #version 1.14 or higher
except ImportError:
    from tensorboardX import SummaryWriter

In [0]:
import code
import os
import pickle
from sklearn.metrics.pairwise import cosine_similarity

In [0]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [0]:
import numpy as np
import pandas as pd

In [11]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
logger=logging.getLogger(__name__)
model_path="/content/drive/My Drive/chinese_wwm_ext_pytorch"
#model_path="/Users/valleria_ruka/Desktop/FAQ/BERT/chinese_wwm_ext_pytorch"

In [0]:
MODEL_CLASSES={
    "bert":(BertConfig,BertTokenizer),
}

In [0]:
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    #if args.n_gpu>0:
    #    torch.cuda.manual_seed_all(seed)

In [0]:
class FAQProcessor(DataProcessor):
    """Processor for the CoLA data set (GLUE version)."""

    def get_example_from_tensor_dict(self, tensor_dict):
        """See base class."""
        return InputExample(
            tensor_dict["idx"].numpy(),
            tensor_dict["sentence"].numpy().decode("utf-8"),
            None,
            str(tensor_dict["label"].numpy()),
        )

    def get_data(self, file_dir):
        train_df = pd.read_csv(file_dir, sep="\t")
        self.candidate_title = train_df["title"].tolist()
        self.candidate_reply = train_df["reply"].tolist()
        self.candidate_translated = train_df["translated"].tolist()

    def get_train(self):
        return self._create_pos_examples(self.candidate_title, self.candidate_translated, "pos"), self._create_neg_examples(self.candidate_title, "neg")

    def _create_pos_examples(self, lines_a, lines_b, set_type):
        """Creates examples for the training and dev sets."""
        examples = []
        for (i, (line_a, line_b)) in enumerate(zip(lines_a, lines_b)):
            guid = "%s-%s" % (set_type, i)
            examples.append(InputExample(guid=guid, text_a=line_a, text_b=line_b, label=1))
        return examples

    def _create_neg_examples(self, lines, set_type):
        """Creates examples for the training and dev sets."""
        examples = []
        for (i, line_a) in enumerate(zip(lines)):
            guid = "%s-%s" % (set_type, i)
            line_b = random.sample(lines, 1)[0]
            while line_b == line_a:
                line_b = random.sample(1)[0]
            examples.append(InputExample(guid=guid, text_a=line_a, text_b=line_b, label=1))
        return examples

In [0]:
class BertForFAQHinge(BertPreTrainedModel):
    def __init__(self,config):
        super(BertPreTrainedModel,self).__init__(config)
        self.num_labels=config.num_labels
            
        self.bert=BertModel(config)
        self.dropout=nn.Dropout(config.hidden_dropout_prob)
        self.scoring=nn.Linear(config.hidden_size,1)
        
        self.init_weights()
    
    def forward(
        self,
        input_ids=None,
        attention_mask=None,
        token_type_ids=None,
        position_ids=None,
        head_mask=None,
        inputs_embeds=None,
        labels=None,
    ):
        outputs=self.bert(
            input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            position_ids=position_ids,
            head_mask=head_mask,
            inputs_embeds=inputs_embeds,
        )
        
        # 对output前面一段进行drop和缩小
        pooled_output=outputs[1]
        pooled_output=self.dropout(pooled_output) 
        score=F.tanh(self.scoring(pooled_output)) #范围太大,-无穷~+无穷,用tanh缩小
        
        outputs=(score,)+outputs[2:]
        
        return outputs
            

In [0]:
def train(args,train_dataset,model,processor,tokenizer):
    no_decay=["bias","LayerNorm.weight"]
    optimizer_grouped_parameters=[
        {
            "params":[p for n,p in model.named_parameters() if not any(nd in n for nd in no_decay)],
            "weight_decay":args["weight_decay"],

        },
        {
            "params": [p for n,p in model.named_parameters() if any(nd in n for nd in no_decay)],
            "weight_decay":0.0
        },
    ]

    
    t_total=len(train_dataset)//args["gradient_accumulation_steps"] * args["num_train_epochs"]
    optimizer=AdamW(optimizer_grouped_parameters,lr=args["learning_rate"],eps=args["adam_epsilon"])
    scheduler=get_linear_schedule_with_warmup(
        optimizer,num_warmup_steps=args["warmup_steps"],num_training_steps=t_total
        )
    
    
    
    # *********************
    logger.info("*****Running training*****")
    logger.info("  Num examples = %d", len(train_dataset))
    logger.info("  Num Epochs = %d", args["num_train_epochs"])


    epochs_trained=0
    global_step=0
    steps_trained_in_current_epoch=0

    tr_loss,logging_loss=0.0,0.0
    model.zero_grad()
    train_iterator=trange(epochs_trained,args["num_train_epochs"],desc="Epoch",disable=False)

    set_seed()

    for k in train_iterator:
        if k!=0:
            dataset,_,_=load_examples(args,args["task_name"],tokenizer,processor)

        train_sampler=SequentialSampler(train_dataset)
        train_dataloader=DataLoader(train_dataset,sampler=train_sampler,batch_size=args["batch_size"])
        epoch_iterator=tqdm(train_dataloader,desc="Iteration",disable=False)

        for step,batch in enumerate(epoch_iterator):
            if steps_trained_in_current_epoch>0:
                steps_traned_in_current_epoch-=1
                continue

            model.train()
            batch=tuple(t.to(args["device"]) for t in batch)
            pos_inputs={"input_ids":batch[0],"attention_mask":batch[1],"token_type_ids":batch[2]}
            neg_inputs={"input_ids":batch[3],"attention_mask":batch[4],"token_type_ids":batch[5]}
            pos_outputs=model(**pos_inputs)
            neg_outputs=model(**neg_inputs)

            pos_score=pos_outputs[0]
            neg_score=neg_outputs[0]

            loss=-(pos_score-neg_score-args["margin"])
            loss[loss<0]=0
            loss=torch.mean(loss)
            loss.backward()

            tr_loss+=loss.item()
            if (step+1)%args["gradient_accumulation_steps"]==0:
                torch.nn.utils.clip_grad_norm_(model.parameters(),args["max_grad_norm"])

                optimizer.step()
                scheduler.step()
                model.zero_grad()
                global_step+=1

        logger.info("average loss:" +str(tr_loss/global_step))

    return global_step,tr_loss/global_step

In [0]:
def evaluate(args,model,eval_dataset):
    scores=[]
    
    eval_sampler = SequentialSampler(eval_dataset)
    eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args["batch_size"])
    
    logger.info("  Num examples = %d", len(eval_dataset))
    logger.info("  Batch size = %d", args["batch_size"])
    eval_loss=0.0
    nb_eval_steps=0
    preds=None
    out_label_ids=None
    
    for batch in tqdm(eval_dataloader,desc="Evaluating"):
        model.eval()
        batch=tuple(t.to(args["device"]) for t in batch)
        
        with torch.no_grad():
            inputs={"input_ids": batch[0], "attention_mask": batch[1], "token_type_ids": batch[2]} 
            outputs=model(**inputs)
            
            score=outputs[0]
            scores.append(score)
        
    scores=[score.data.cpu().view(-1) for score in scores]
    scores=torch.cat(scores)
    
    return scores.numpy()

In [0]:
def load_examples(args, task, tokenizer, processor):

    pos_data, neg_data = processor.get_train()
 
    # Load data features from cache or dataset file
    logger.info("Creating features from dataset file at %s", args["data_dir"])
    pos_features = convert_examples_to_features(
        pos_data,
        tokenizer,
        label_list=[1],
        output_mode="classification",
        max_length=args["max_seq_length"],
        pad_on_left=bool(args["model_type"] in ["xlnet"]),  # pad on the left for xlnet
        pad_token=tokenizer.convert_tokens_to_ids([tokenizer.pad_token])[0],
        pad_token_segment_id=4 if args["model_type"] in ["xlnet"] else 0,
    )
    neg_features = convert_examples_to_features(
        neg_data,
        tokenizer,
        label_list=[1],
        output_mode="classification",
        max_length=args["max_seq_length"],
        pad_on_left=bool(args["model_type"] in ["xlnet"]),  # pad on the left for xlnet
        pad_token=tokenizer.convert_tokens_to_ids([tokenizer.pad_token])[0],
        pad_token_segment_id=4 if args["model_type"] in ["xlnet"] else 0,
    )

    # Convert to Tensors and build dataset
    pos_input_ids = torch.tensor([f.input_ids for f in pos_features], dtype=torch.long)
    pos_attention_mask = torch.tensor([f.attention_mask for f in pos_features], dtype=torch.long)
    pos_token_type_ids = torch.tensor([f.token_type_ids for f in pos_features], dtype=torch.long)
    neg_input_ids = torch.tensor([f.input_ids for f in neg_features], dtype=torch.long)
    neg_attention_mask = torch.tensor([f.attention_mask for f in neg_features], dtype=torch.long)
    neg_token_type_ids = torch.tensor([f.token_type_ids for f in neg_features], dtype=torch.long)
    dataset = TensorDataset(pos_input_ids, pos_attention_mask, pos_token_type_ids, neg_input_ids, neg_attention_mask, neg_token_type_ids)

    return dataset, processor.candidate_title, processor.candidate_reply

In [0]:
def main(args):
        
    # Setup logging
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO,
    )
    
    
    set_seed()
    model_type = args["model_type"]
    
    
    config_class, tokenizer_class = MODEL_CLASSES[model_type]
    model_class=BertForFAQHinge

    config = config_class.from_pretrained(
        args["config_name"],
        finetuning_task=args["task_name"], 
        cache_dir=None,
    )
    tokenizer = tokenizer_class.from_pretrained(
        args["tokenizer_name"],
        do_lower_case=True,
        cache_dir=None,
    )
    model = model_class.from_pretrained(
        args["model_name_or_path"],
        from_tf=bool(".ckpt" in args["model_name_or_path"]),
        config=config,
        cache_dir=None,
    )
    
    
    model.to(args["device"])
    
    logger.info("Training/evaluation parameters %s", args)
    
    processor=FAQProcessor()
    processor.get_data(args["data_dir"])
    
    dataset,candidate_title,candidate_reply=load_examples(args,args["task_name"],tokenizer,processor)
    
    
    if args["do_train"]:
        train(args,dataset,model,processor,tokenizer)
        if not os.path.exists(args["output_dir"]):
            os.makedirs(args["output_dir"])
        
        logger.info("Saving model checkpoint to %s", args["output_dir"])
        
        model.save_pretrained(args["output_dir"])
        tokenizer.save_pretrained(args["output_dir"])
        
        torch.save(args,os.path.join(args["output_dir"],"training_args.bin"))
        
        model=model_class.from_pretrained(args["output_dir"])
        tokenizer=tokenizer_class.from_pretrained(args["output_dir"])
        model.to(args["device"])
    
    
    if args["do_eval"]:
        while True:
            title = input("你的问题是？\n")
            if len(title.strip()) == 0:
                continue

            examples = [InputExample(guid=0, text_a=title, text_b=c, label=1) for c in candidate_title]
            features = convert_examples_to_features(
                examples,
                tokenizer,
                label_list=[1],
                output_mode="classification",
                max_length=args["max_seq_length"],
                pad_on_left=bool(args["model_type"] in ["xlnet"]),  # pad on the left for xlnet
                pad_token=tokenizer.convert_tokens_to_ids([tokenizer.pad_token])[0],
                pad_token_segment_id=4 if args["model_type"] in ["xlnet"] else 0,
            )

            # Convert to Tensors and build dataset
            all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)
            all_attention_mask = torch.tensor([f.attention_mask for f in features], dtype=torch.long)
            all_token_type_ids = torch.tensor([f.token_type_ids for f in features], dtype=torch.long)
    
            dataset = TensorDataset(all_input_ids, all_attention_mask, all_token_type_ids)
            scores = evaluate(args, model, dataset)
            top5_indices = scores.argsort()[-5:][::-1]

            for index in top5_indices:
                print("可能的答案，参考问题：" + candidate_title[index] + "\t答案：" + candidate_reply[index] + "\t得分：" + str(scores[index]))
                print()
    

In [21]:
args_train={
    "model_type":"bert",
    "data_dir": "/content/drive/My Drive/Data/preprocessed.csv",
    "output_dir":"/content/drive/My Drive/hinge_models/",
    "model_name_or_path": "/content/drive/My Drive/chinese_wwm_ext_pytorch/",
    "config_name": "/content/drive/My Drive/chinese_wwm_ext_pytorch/",
    "tokenizer_name": "/content/drive/My Drive/chinese_wwm_ext_pytorch/",
    "do_train":True,
    "do_eval":False,
    "evaluate_during_training":False,
    "do_lower_case":False,
    "per_gpu_train_batch_size":32,
    "per_gpu_eval_batch_size":32,
    "batch_size":32,
    "gradient_accumulation_steps":1,
    "learning_rate":2e-5,
    "adam_epsilon":1e-8,
    "max_grad_norm":1.0,
    "weight_decay":0.0,
    "max_grad_norm":1.0,
    "max_seq_length":128,
    "weight_decay":0.0,
    "num_train_epochs":10,
    "device":"cpu",
    "margin":5,
    "warmup_steps":0,
    "task_name":"",
    
}

if torch.cuda.is_available():
    args_train["device"]="cuda"


main(args_train)

01/10/2020 14:57:28 - INFO - transformers.configuration_utils -   loading configuration file /content/drive/My Drive/chinese_wwm_ext_pytorch/config.json
01/10/2020 14:57:28 - INFO - transformers.configuration_utils -   Model config {
  "attention_probs_dropout_prob": 0.1,
  "directionality": "bidi",
  "finetuning_task": "",
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "is_decoder": false,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "num_labels": 2,
  "output_attentions": false,
  "output_hidden_states": false,
  "output_past": true,
  "pooler_fc_size": 768,
  "pooler_num_attention_heads": 12,
  "pooler_num_fc_layers": 3,
  "pooler_size_per_head": 128,
  "pooler_type": "first_token_transform",
  "pruned_heads"

In [25]:
args_eval={
    "model_type":"bert",
    "data_dir": "/content/drive/My Drive/Data/preprocessed.csv",
    "output_dir":"/content/drive/My Drive/hinge_models/",
    "model_name_or_path": "/content/drive/My Drive/hinge_models/",
    "config_name": "/content/drive/My Drive/hinge_models/",
    "tokenizer_name": "/content/drive/My Drive/hinge_models/",
    "do_train":False,
    "do_eval":True,
    "evaluate_during_training":False,
    "do_lower_case":False,
    "per_gpu_train_batch_size":32,
    "per_gpu_eval_batch_size":32,
    "batch_size":32,
    "gradient_accumulation_steps":1,
    "learning_rate":2e-5,
    "adam_epsilon":1e-8,
    "max_grad_norm":1.0,
    "weight_decay":0.0,
    "max_grad_norm":1.0,
    "max_seq_length":128,
    "weight_decay":0.0,
    "num_train_epochs":10,
    "device":"cpu",
    "margin":5,
    "warmup_steps":0,
    "task_name":"",
    
    
}

if torch.cuda.is_available():
    args_eval["device"]="cuda"


main(args_eval)

01/10/2020 16:29:38 - INFO - transformers.configuration_utils -   loading configuration file /content/drive/My Drive/hinge_models/config.json
01/10/2020 16:29:38 - INFO - transformers.configuration_utils -   Model config {
  "attention_probs_dropout_prob": 0.1,
  "directionality": "bidi",
  "finetuning_task": "",
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "is_decoder": false,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "num_labels": 2,
  "output_attentions": false,
  "output_hidden_states": false,
  "output_past": true,
  "pooler_fc_size": 768,
  "pooler_num_attention_heads": 12,
  "pooler_num_fc_layers": 3,
  "pooler_size_per_head": 128,
  "pooler_type": "first_token_transform",
  "pruned_heads": {},
  "to

你的问题是？
怎么着律师?


01/10/2020 16:30:05 - INFO - transformers.data.processors.glue -   Writing example 0
01/10/2020 16:30:05 - INFO - transformers.data.processors.glue -   *** Example ***
01/10/2020 16:30:05 - INFO - transformers.data.processors.glue -   guid: 0
01/10/2020 16:30:05 - INFO - transformers.data.processors.glue -   input_ids: 101 2582 720 4708 2526 2360 136 102 1762 3791 2526 704 2137 7032 680 6370 7032 4638 1277 1166 6370 7032 1469 2137 7032 1525 702 1358 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
01/10/2020 16:30:05 - INFO - transformers.data.processors.glue -   attention_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
01/10

可能的答案，参考问题：个人与个人纠纷怎么处理	答案：可以进行协商，协商不成可以提起诉讼。	得分：0.99999994

可能的答案，参考问题：怎么判定工伤	答案：第十四条职工有下列情形之一的，应当认定为工伤：(一)在工作时间和工作场所内，因工作原因受到事故伤害的；(二)工作时间前后在工作场所内，从事与工作有关的预备性或者收尾性工作受到事故伤害的；(三)在工作时间和工作场所内，因履行工作职责受到暴力等意外伤害的；(四)患职业病的；(五)因工外出期间，由于工作原因受到伤害或者发生事故下落不明的；(六)在上下班途中，受到非本人主要责任的交通事故或者城市轨道交通、客运轮渡、火车事故伤害的；(七)法律、行政法规规定应当认定为工伤的其他情形。第十五条职工有下列情形之一的，视同工伤：(一)在工作时间和工作岗位，突发疾病死亡或者在48小时之内经抢救无效死亡的；(二)在抢险救灾等维护国家利益、公共利益活动中受到伤害的；(三)职工原在军队服役，因战、因公负伤致残，已取得革命伤残军人证，到用人单位后旧伤复发的。职工有前款第(一)项、第(二)项情形的，按照本条例的有关规定享受工伤保险待遇；职工有前款第(三)项情形的，按照本条例的有关规定享受除一次性伤残补助金以外的工伤保险待遇。第十六条职工符合本条例第十四条、第十五条的规定，但是有下列情形之一的，不得认定为工伤或者视同工伤：(一)故意犯罪的；(二)醉酒或者吸毒的；(三)自残或者自杀的。	得分：0.99999994

可能的答案，参考问题：怎么知道自己被列入失信人上传的法院	答案：失*自然人是指*内欠人钱财却赖着拒不履*法*生效判决的债务人，其中自然人是在自然状态之下而作为*事主体存在的人。抽象的人的概念，代表着人格，代表其有权参加*事活动，享有权利并承担义务。最*人*法*执*局2013年11月14日与中*人*银*征*中*签署合作备忘录，记发布失*自然人名单。可上http://sh##in.court.gov.cn/查询。	得分：0.99999994

可能的答案，参考问题：父母有没有权利扣押儿女的身份证	答案：1、根据《中华人民共和国居民身份证法》第三章第十五条规定“任何组织或者个人不得扣押居民身份证。但是，公安机关依照《中华人民共和国刑事诉讼法》执行监视居住强制措施的情形除外。”该父母扣押孩子的身份证是违法的。2、报警的话，根据《中华人民

01/10/2020 16:32:23 - INFO - transformers.data.processors.glue -   Writing example 0
01/10/2020 16:32:23 - INFO - transformers.data.processors.glue -   *** Example ***
01/10/2020 16:32:23 - INFO - transformers.data.processors.glue -   guid: 0
01/10/2020 16:32:23 - INFO - transformers.data.processors.glue -   input_ids: 101 2582 3416 2823 2526 2360 136 102 1762 3791 2526 704 2137 7032 680 6370 7032 4638 1277 1166 6370 7032 1469 2137 7032 1525 702 1358 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
01/10/2020 16:32:23 - INFO - transformers.data.processors.glue -   attention_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
01/1

可能的答案，参考问题：个人与个人纠纷怎么处理	答案：可以进行协商，协商不成可以提起诉讼。	得分：0.99999994

可能的答案，参考问题：请问做伤残鉴定结果一般要等多久	答案：鉴定要最长60天才能出结果一般情况下交通事故致人损伤，3个月以后就可以评定伤残了，但特殊情形还是得依据具体的治疗情况来确定。交通事故赔偿案件中，受害者对伤残评定的时间的把握，影响着伤残评定等级，从而影响赔偿款的获得。一般发生事故主要是原发性损伤的，可在伤后1-3个月进行伤残评定。主要包括组织、器官、肢体、颅骨、颌骨、肋骨残损等。对于发生交通事故影响容貌的，听力、视力障碍以及脊柱骨折的，一般应在伤后3-6个月进行。在发生交通事故遗留肢体功能障碍的，在伤后6-9个月进行鉴定为最佳。而在对于事故造成颅脑损伤导致智力、精神问题的，如语言功能障碍、性功能障碍等，进行伤残评定的时间最好在伤后6-12个月。	得分：0.99999994

可能的答案，参考问题：独生子女遗嘱公证是怎么样的	答案：遗嘱公证程序，是指公证机构办理遗嘱公证，应当由2人共同办理。承办公证员应当全程亲自办理。特殊情况下只能由1名公证员办理时，应当请1名见证人在场，见证人应当在遗嘱和笔录上签名或者盖章。	得分：0.99999994

可能的答案，参考问题：一起买商品房（期房），购房合同的能写两个人的名字吗	答案：房产证可以写两人名字，如果你俩拿了结婚证，就很简单，凭结婚证就可以办理，如果没有结婚证，需要你俩先去公证处，办理协议房产份额手续，做个公证，明确你俩在房产上各自所占份额。	得分：0.99999994

可能的答案，参考问题：丈夫残疾六年了请允许我离婚	答案：如果双方同意，可直接办理离婚手续，如果一方不同意，可诉至法院。一方坚决不离。起诉到法院，法院在6个月后将强制判决离婚。	得分：0.99999994



KeyboardInterrupt: ignored