In [0]:
!pip install transformers

Collecting transformers
[?25l  Downloading https://files.pythonhosted.org/packages/50/10/aeefced99c8a59d828a92cc11d213e2743212d3641c87c82d61b035a7d5c/transformers-2.3.0-py3-none-any.whl (447kB)
[K     |▊                               | 10kB 29.4MB/s eta 0:00:01[K     |█▌                              | 20kB 2.1MB/s eta 0:00:01[K     |██▏                             | 30kB 2.7MB/s eta 0:00:01[K     |███                             | 40kB 2.0MB/s eta 0:00:01[K     |███▋                            | 51kB 2.3MB/s eta 0:00:01[K     |████▍                           | 61kB 2.7MB/s eta 0:00:01[K     |█████▏                          | 71kB 3.0MB/s eta 0:00:01[K     |█████▉                          | 81kB 3.2MB/s eta 0:00:01[K     |██████▋                         | 92kB 3.6MB/s eta 0:00:01[K     |███████▎                        | 102kB 3.3MB/s eta 0:00:01[K     |████████                        | 112kB 3.3MB/s eta 0:00:01[K     |████████▉                       | 122kB 3.3M

In [0]:
import argparse
import glob
import json
import logging
import os
import random

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, TensorDataset
from torch.utils.data.distributed import DistributedSampler
from tqdm import tqdm, trange

In [0]:
from transformers import (
    WEIGHTS_NAME,
    AdamW,
    get_linear_schedule_with_warmup,
    BertConfig,
    BertModel,
    BertPreTrainedModel,
    BertTokenizer,)

In [0]:
from transformers import glue_compute_metrics as compute_metrics
from transformers import glue_convert_examples_to_features as convert_examples_to_features
from transformers import glue_output_modes as output_modes
from transformers import glue_processors as processors

In [0]:
from transformers.data.processors.utils import InputExample, DataProcessor

In [0]:
try:
    from torch.utils.tensorboard import SummaryWriter   #version 1.14 or higher
except ImportError:
    from tensorboardX import SummaryWriter

In [0]:
import code
import os
import pickle
from sklearn.metrics.pairwise import cosine_similarity

In [0]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
logger=logging.getLogger(__name__)

In [0]:
MODEL_CLASSES={
    "bert":(BertConfig,BertTokenizer),
}

In [0]:
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)

In [0]:
class FAQProcessor(DataProcessor):
    def get_data(self, file_dir, neg_file_dir,args,model,tokenizer):
        # 读入原句和正例
        #get original title and translated title(aka, positive samples)
        train_df = pd.read_csv(file_dir, sep="\t")
        self.candidate_title = train_df["title"].tolist()
        self.candidate_reply = train_df["reply"].tolist()
        self.candidate_translated = train_df["translated"].tolist()

        #读入负例,负例不需要sep分割
        #get negative samples
        neg_train_df = pd.read_csv(neg_file_dir)
        self.neg_candidate_title = neg_train_df["title"].tolist()

        #get other pararmeters
        self.args=args
        self.model=model
        self.tokenizer=tokenizer


    def get_train(self,args=None,model=None):
        return self._create_pos_examples(self.candidate_title, self.candidate_translated, "pos"), \
                self._create_neg_examples(self.candidate_title, "neg")

        #return self._create_pos_examples(self.candidate_title, self.candidate_translated, "pos"), \
        #         self._create_neg_examples_new(self.candidate_title, self.neg_candidate_title,"neg",args,model)
        
        #return self._create_pos_examples(self.candidate_title, self.candidate_translated, "pos"), \
        #         self._create_neg_examples_new_2(self.candidate_title, "neg",args,model)


    def _create_pos_examples(self, lines_s, lines_s_dash, set_type):
        """制作同义句(正例), loss里的(s,s')"""
        # 把原句和同义句配对组合即可
        examples = []
        for (i, (line_s, line_s_dash)) in enumerate(zip(lines_s, lines_s_dash)):
            guid = "%s-%s" % (set_type, i)
            examples.append(InputExample(guid=guid, text_a=line_s, text_b=line_s_dash, label=1))
        return examples

    def _create_neg_examples(self, lines, set_type):
        """制作非同义句(负例),loss里的(s,t)"""
        # 方法1:随机选择一个句子作为非同义句(只要不是原句即可))
        examples = []
        for (i, line_s) in enumerate(zip(lines)):
            guid = "%s-%s" % (set_type, i)
            line_t = random.sample(lines, 1)[0]
            while line_t == line_s:
                line_t = random.sample(1)[0]

            examples.append(InputExample(guid=guid, text_a=line_s, text_b=line_t, label=1))
        return examples
    


    def _create_neg_examples_new(self,lines,neg_lines,set_type,args,model):
        """制作非同义句(负例),loss里的(s,t)"""
        # 方法2:通过计算选择分数最接近的句子
        neg_examples=[]

        for (i,line_s) in enumerate(zip(lines)):
            guid = "%s-%s" % (set_type, i)
            examples = [InputExample(guid=0, text_a=line_s, text_b=c, label=1) for c in neg_lines]
            features = convert_examples_to_features(
                    examples,
                    self.tokenizer,
                    label_list=[1],
                    output_mode="classification",
                    max_length=self.args["max_seq_length"],
                    pad_on_left=bool(self.args["model_type"] in ["xlnet"]),  # pad on the left for xlnet
                    pad_token=self.tokenizer.convert_tokens_to_ids([self.tokenizer.pad_token])[0],
                    pad_token_segment_id=4 if self.args["model_type"] in ["xlnet"] else 0,
                )

            # Convert to Tensors and build dataset
            all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)
            all_attention_mask = torch.tensor([f.attention_mask for f in features], dtype=torch.long)
            all_token_type_ids = torch.tensor([f.token_type_ids for f in features], dtype=torch.long)
    
            dataset = TensorDataset(all_input_ids, all_attention_mask, all_token_type_ids)
            scores = evaluate(self.args, self.model, dataset)

            index=scores.argsort()[::-1][0]
            line_t=self.neg_candidate_title[index]

            neg_examples.append(InputExample(guid=guid, text_a=line_s, text_b=line_t, label=1))
            
        return neg_examples


    def _create_neg_examples_new_2(self,lines,set_type,args,model):
        """制作非同义句(负例),loss里的(s,t)"""
        # 方法3:通过计算选择分数最接近的句子, 但是只在每个batch里选择一个
        neg_examples=[]

        for (i,line_s) in enumerate(zip(lines)):
            # 每次打乱顺序
            neg_lines=np.array(lines.copy())
            np.random.shuffle(neg_lines)
            neg_lines=neg_lines[:self.args["batch_size"]]

            guid = "%s-%s" % (set_type, i)
            examples = [InputExample(guid=0, text_a=line_s, text_b=c, label=1) for c in neg_lines]
            features = convert_examples_to_features(
                    examples,
                    self.tokenizer,
                    label_list=[1],
                    output_mode="classification",
                    max_length=self.args["max_seq_length"],
                    pad_on_left=bool(self.args["model_type"] in ["xlnet"]),  # pad on the left for xlnet
                    pad_token=self.tokenizer.convert_tokens_to_ids([self.tokenizer.pad_token])[0],
                    pad_token_segment_id=4 if self.args["model_type"] in ["xlnet"] else 0,
                )

            # Convert to Tensors and build dataset
            all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)
            all_attention_mask = torch.tensor([f.attention_mask for f in features], dtype=torch.long)
            all_token_type_ids = torch.tensor([f.token_type_ids for f in features], dtype=torch.long)
    
            dataset = TensorDataset(all_input_ids, all_attention_mask, all_token_type_ids)
            scores = evaluate(self.args, self.model, dataset)

            #找到第一个不为原句的句子
            line_t=None
            for index in scores.argsort()[::-1]:
                if self.candidate_title[index]!=line_s:
                    line_t=self.candidate_title[index]
                    break


            neg_examples.append(InputExample(guid=guid, text_a=line_s, text_b=line_t, label=1))
            
        return neg_examples



In [0]:
class BertForFAQHinge(BertPreTrainedModel):
    def __init__(self,config):
        super(BertPreTrainedModel,self).__init__(config)
        self.num_labels=config.num_labels
            
        self.bert=BertModel(config)
        self.dropout=nn.Dropout(config.hidden_dropout_prob)
        self.scoring=nn.Linear(config.hidden_size,1)
        
        self.init_weights()
    
    def forward(
        self,
        input_ids=None,
        attention_mask=None,
        token_type_ids=None,
        position_ids=None,
        head_mask=None,
        inputs_embeds=None,
        labels=None,
    ):
        outputs=self.bert(
            input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            position_ids=position_ids,
            head_mask=head_mask,
            inputs_embeds=inputs_embeds,
        )
        # 这个写法照抄自 BertForSequenceClassification,基本一样,就是名字换了下
        # score就对应BertForSequenceClassification里的logits
        # 取出pooled_output,做了一个dropout,过了一个linear层,维数从hidden_size降为1
        pooled_output=outputs[1]
        pooled_output=self.dropout(pooled_output) 
        score = self.scoring(pooled_output)
        
        outputs=(score,)+outputs[2:]
        
        return outputs
            

In [0]:
def train(args,train_dataset,model,processor,tokenizer):
    no_decay=["bias","LayerNorm.weight"]
    optimizer_grouped_parameters=[
        {
            "params":[p for n,p in model.named_parameters() if not any(nd in n for nd in no_decay)],
            "weight_decay":args["weight_decay"],

        },
        {
            "params": [p for n,p in model.named_parameters() if any(nd in n for nd in no_decay)],
            "weight_decay":0.0
        },
    ]

    
    t_total=len(train_dataset)//args["gradient_accumulation_steps"] * args["num_train_epochs"]
    optimizer=AdamW(optimizer_grouped_parameters,lr=args["learning_rate"],eps=args["adam_epsilon"])
    # bert里的小技巧, bert里的learning rate是不断变化的,先往上升,再往下降,这个scheduler就是用来设置这个
    scheduler=get_linear_schedule_with_warmup(
        optimizer,num_warmup_steps=args["warmup_steps"],num_training_steps=t_total
        )
    
    
    
    # *********************
    logger.info("*****Running training*****")
    logger.info("  Num examples = %d", len(train_dataset))
    logger.info("  Num Epochs = %d", args["num_train_epochs"])


    epochs_trained=0
    global_step=0
    steps_trained_in_current_epoch=0

    tr_loss,logging_loss=0.0,0.0
    model.zero_grad()
    train_iterator=trange(epochs_trained,args["num_train_epochs"],desc="Epoch",disable=False)

    set_seed()

    for k in train_iterator:
        '''
        # 针对方法1,每做一次iterator,期望负例都有所变化,所以每轮都做load example,重新找一个负例
        # 方法2则不用
        '''
        if k!=0:
            train_dataset,_,_=load_examples(args,args["task_name"],tokenizer,processor)
        
        
        train_sampler=RandomSampler(train_dataset)
        #train_sampler=SequentialSampler(train_dataset)
        train_dataloader=DataLoader(train_dataset,sampler=train_sampler,batch_size=args["batch_size"])
        epoch_iterator=tqdm(train_dataloader,desc="Iteration",disable=False)

        for step,batch in enumerate(epoch_iterator):
            if steps_trained_in_current_epoch>0:
                steps_traned_in_current_epoch-=1
                continue

            model.train()
            batch=tuple(t.to(args["device"]) for t in batch)
            pos_inputs={"input_ids":batch[0],"attention_mask":batch[1],"token_type_ids":batch[2]}
            neg_inputs={"input_ids":batch[3],"attention_mask":batch[4],"token_type_ids":batch[5]}
            pos_outputs=model(**pos_inputs)
            neg_outputs=model(**neg_inputs)

            pos_score=pos_outputs[0]
            neg_score=neg_outputs[0]

            loss=-(pos_score-neg_score-args["margin"])
            loss[loss<0]=0
            loss=torch.mean(loss)
            loss.backward()

            tr_loss+=loss.item()
            if (step+1)%args["gradient_accumulation_steps"]==0:
                torch.nn.utils.clip_grad_norm_(model.parameters(),args["max_grad_norm"])

                optimizer.step()
                scheduler.step()
                model.zero_grad()
                global_step+=1

        logger.info("average loss:" +str(tr_loss/global_step))

    return global_step,tr_loss/global_step

In [0]:
def evaluate(args,model,eval_dataset):
    scores=[]
    
    eval_sampler =RandomSampler(eval_dataset)
    eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args["batch_size"])
    
    logger.info("  Num examples = %d", len(eval_dataset))
    logger.info("  Batch size = %d", args["batch_size"])
    eval_loss=0.0
    nb_eval_steps=0
    preds=None
    out_label_ids=None
    
    for batch in tqdm(eval_dataloader,desc="Evaluating"):
        model.eval()
        batch=tuple(t.to(args["device"]) for t in batch)
        
        with torch.no_grad():
            inputs={"input_ids": batch[0], "attention_mask": batch[1], "token_type_ids": batch[2]} 
            outputs=model(**inputs)
            
            score=F.tanh(outputs[0])
            scores.append(score)
        
    scores=[score.data.cpu().view(-1) for score in scores]
    scores=torch.cat(scores)
    
    return scores.numpy()

In [0]:
def load_examples(args, task, tokenizer, processor):

    pos_data, neg_data = processor.get_train()
 
    # Load data features from cache or dataset file
    logger.info("Creating features from dataset file at %s", args["data_dir"])
    pos_features = convert_examples_to_features(
        pos_data,
        tokenizer,
        label_list=[1],
        output_mode="classification",
        max_length=args["max_seq_length"],
        pad_on_left=bool(args["model_type"] in ["xlnet"]),  # pad on the left for xlnet
        pad_token=tokenizer.convert_tokens_to_ids([tokenizer.pad_token])[0],
        pad_token_segment_id=4 if args["model_type"] in ["xlnet"] else 0,
    )
    neg_features = convert_examples_to_features(
        neg_data,
        tokenizer,
        label_list=[1],
        output_mode="classification",
        max_length=args["max_seq_length"],
        pad_on_left=bool(args["model_type"] in ["xlnet"]),  # pad on the left for xlnet
        pad_token=tokenizer.convert_tokens_to_ids([tokenizer.pad_token])[0],
        pad_token_segment_id=4 if args["model_type"] in ["xlnet"] else 0,
    )

    # Convert to Tensors and build dataset
    pos_input_ids = torch.tensor([f.input_ids for f in pos_features], dtype=torch.long)
    pos_attention_mask = torch.tensor([f.attention_mask for f in pos_features], dtype=torch.long)
    pos_token_type_ids = torch.tensor([f.token_type_ids for f in pos_features], dtype=torch.long)
    neg_input_ids = torch.tensor([f.input_ids for f in neg_features], dtype=torch.long)
    neg_attention_mask = torch.tensor([f.attention_mask for f in neg_features], dtype=torch.long)
    neg_token_type_ids = torch.tensor([f.token_type_ids for f in neg_features], dtype=torch.long)
    dataset = TensorDataset(pos_input_ids, pos_attention_mask, pos_token_type_ids, neg_input_ids, neg_attention_mask, neg_token_type_ids)

    return dataset, processor.candidate_title, processor.candidate_reply

In [0]:
def main(args):
        
    # Setup logging
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO,
    )
    
    
    set_seed()
    model_type = args["model_type"]
    
    
    config_class, tokenizer_class = MODEL_CLASSES[model_type]
    model_class=BertForFAQHinge

    config = config_class.from_pretrained(
        args["config_name"],
        finetuning_task=args["task_name"], 
        cache_dir=None,
    )
    tokenizer = tokenizer_class.from_pretrained(
        args["tokenizer_name"],
        do_lower_case=True,
        cache_dir=None,
    )
    model = model_class.from_pretrained(
        args["model_name_or_path"],
        from_tf=bool(".ckpt" in args["model_name_or_path"]),
        config=config,
        cache_dir=None,
    )
    
    
    model.to(args["device"])
    
    logger.info("Training/evaluation parameters %s", args)
    
    processor=FAQProcessor()
    processor.get_data(args["data_dir"],args["neg_data_dir"],args,model,tokenizer)
    
    dataset,candidate_title,candidate_reply=load_examples(args,args["task_name"],tokenizer,processor)
    
    
    if args["do_train"]:
        train(args,dataset,model,processor,tokenizer)
        if not os.path.exists(args["output_dir"]):
            os.makedirs(args["output_dir"])
        
        logger.info("Saving model checkpoint to %s", args["output_dir"])
        
        model.save_pretrained(args["output_dir"])
        tokenizer.save_pretrained(args["output_dir"])
        
        torch.save(args,os.path.join(args["output_dir"],"training_args.bin"))
        
        model=model_class.from_pretrained(args["output_dir"])
        tokenizer=tokenizer_class.from_pretrained(args["output_dir"])
        model.to(args["device"])
    
    
    if args["do_eval"]:
        while True:
            title = input("你的问题是？\n")
            if len(title.strip()) == 0:
                continue
            
            
            # [CLS]用户提问[SEP]已有问题[SEP]  拼到一起,算分数,而不是cosine similarity
            examples = [InputExample(guid=0, text_a=title, text_b=c, label=1) for c in candidate_title]
            features = convert_examples_to_features(
                examples,
                tokenizer,
                label_list=[1],
                output_mode="classification",
                max_length=args["max_seq_length"],
                pad_on_left=bool(args["model_type"] in ["xlnet"]),  # pad on the left for xlnet
                pad_token=tokenizer.convert_tokens_to_ids([tokenizer.pad_token])[0],
                pad_token_segment_id=4 if args["model_type"] in ["xlnet"] else 0,
            )

            # Convert to Tensors and build dataset
            all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)
            all_attention_mask = torch.tensor([f.attention_mask for f in features], dtype=torch.long)
            all_token_type_ids = torch.tensor([f.token_type_ids for f in features], dtype=torch.long)
    
            dataset = TensorDataset(all_input_ids, all_attention_mask, all_token_type_ids)
            scores = evaluate(args, model, dataset)
            top5_indices = scores.argsort()[-5:][::-1]   # 等价于  scores.argsort()[::-1][:5] (逆序,从大到小)(前5个)

            for index in top5_indices:
                print("可能的答案，参考问题：" + candidate_title[index] + "\t答案：" + candidate_reply[index] + "\t得分：" + str(scores[index]))
                print()
    

In [0]:
args_train={
    "model_type":"bert",
    "data_dir": "/content/drive/My Drive/Data/preprocessed.csv",
    "neg_data_dir": "/content/drive/My Drive/Data/nonghangzhidao_as_neg.csv",
    "output_dir":"/content/drive/My Drive/hinge_models/",
    "model_name_or_path": "/content/drive/My Drive/chinese_wwm_ext_pytorch/",
    "config_name": "/content/drive/My Drive/chinese_wwm_ext_pytorch/",
    "tokenizer_name": "/content/drive/My Drive/chinese_wwm_ext_pytorch/",
    "do_train":True,
    "do_eval":False,
    "evaluate_during_training":False,
    "do_lower_case":False,
    "per_gpu_train_batch_size":32,
    "per_gpu_eval_batch_size":32,
    "batch_size":32,
    "gradient_accumulation_steps":1,
    "learning_rate":2e-5,
    "adam_epsilon":1e-8,
    "max_grad_norm":1.0,
    "weight_decay":0.0,
    "max_grad_norm":1.0,
    "max_seq_length":128,
    "weight_decay":0.0,
    "num_train_epochs":10,
    "device":"cpu",
    "margin":5,
    "warmup_steps":0,
    "task_name":"",
    
}

if torch.cuda.is_available():
    args_train["device"]="cuda"


main(args_train)

01/28/2020 15:20:28 - INFO - transformers.configuration_utils -   loading configuration file /content/drive/My Drive/chinese_wwm_ext_pytorch/config.json
01/28/2020 15:20:28 - INFO - transformers.configuration_utils -   Model config {
  "attention_probs_dropout_prob": 0.1,
  "directionality": "bidi",
  "finetuning_task": "",
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "is_decoder": false,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "num_labels": 2,
  "output_attentions": false,
  "output_hidden_states": false,
  "output_past": true,
  "pooler_fc_size": 768,
  "pooler_num_attention_heads": 12,
  "pooler_num_fc_layers": 3,
  "pooler_size_per_head": 128,
  "pooler_type": "first_token_transform",
  "pruned_heads"

KeyboardInterrupt: ignored

In [0]:
args_eval={
    "model_type":"bert",
    "data_dir": "/content/drive/My Drive/Data/preprocessed.csv",
    "output_dir":"/content/drive/My Drive/hinge_models/",
    "model_name_or_path": "/content/drive/My Drive/hinge_models/",
    "config_name": "/content/drive/My Drive/hinge_models/",
    "tokenizer_name": "/content/drive/My Drive/hinge_models/",
    "do_train":False,
    "do_eval":True,
    "evaluate_during_training":False,
    "do_lower_case":False,
    "per_gpu_train_batch_size":32,
    "per_gpu_eval_batch_size":32,
    "batch_size":32,
    "gradient_accumulation_steps":1,
    "learning_rate":2e-5,
    "adam_epsilon":1e-8,
    "max_grad_norm":1.0,
    "weight_decay":0.0,
    "max_grad_norm":1.0,
    "max_seq_length":128,
    "weight_decay":0.0,
    "num_train_epochs":10,
    "device":"cpu",
    "margin":5,
    "warmup_steps":0,
    "task_name":"",
    
    
}

if torch.cuda.is_available():
    args_eval["device"]="cuda"


main(args_eval)

01/19/2020 10:05:22 - INFO - transformers.configuration_utils -   loading configuration file /content/drive/My Drive/hinge_models/config.json
01/19/2020 10:05:22 - INFO - transformers.configuration_utils -   Model config {
  "attention_probs_dropout_prob": 0.1,
  "directionality": "bidi",
  "finetuning_task": "",
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "is_decoder": false,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "num_labels": 2,
  "output_attentions": false,
  "output_hidden_states": false,
  "output_past": true,
  "pooler_fc_size": 768,
  "pooler_num_attention_heads": 12,
  "pooler_num_fc_layers": 3,
  "pooler_size_per_head": 128,
  "pooler_type": "first_token_transform",
  "pruned_heads": {},
  "to

你的问题是？
怎样找律师?


01/19/2020 10:05:41 - INFO - transformers.data.processors.glue -   Writing example 0
01/19/2020 10:05:41 - INFO - transformers.data.processors.glue -   *** Example ***
01/19/2020 10:05:41 - INFO - transformers.data.processors.glue -   guid: 0
01/19/2020 10:05:41 - INFO - transformers.data.processors.glue -   input_ids: 101 2582 3416 2823 2526 2360 136 102 1762 3791 2526 704 2137 7032 680 6370 7032 4638 1277 1166 6370 7032 1469 2137 7032 1525 702 1358 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
01/19/2020 10:05:41 - INFO - transformers.data.processors.glue -   attention_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
01/1

可能的答案，参考问题：非法拘禁罪最多判多少年	答案：最佳答案构成非法拘留罪的，处三年以下有期徒刑、拘役、管制或者剥夺政治权利。具有殴打、侮辱情节的，从重处罚。法律规定：《刑法》第二百三十八条【非法拘禁罪】非法拘禁他人或者以其他方法非法剥夺他人人身自由的，处三年以下有期徒刑、拘役、管制或者剥夺政治权利。具有殴打、侮辱情节的，从重处罚。犯前款罪，致人重伤的，处三年以上十年以下有期徒刑；致人死亡的，处十年以上有期徒刑。使用暴力致人伤残、死亡的，依照本法第二百三十四条、第二百三十二条的规定定罪处罚。为索取债务非法扣押、拘禁他人的，依照前两款的规定处罚。国家机关工作人员利用职权犯前三款罪的，依照前三款的规定从重处罚。	得分：0.99967724

可能的答案，参考问题：怎么开口叫那些欠我钱的人换钱比较好	答案：可以先进行协商，协商不成的可以通过诉讼的方式主张自己的债权。	得分：0.9996763

可能的答案，参考问题：犯毒品700克判刑多少年	答案：【走私、贩卖、运输、制造毒品罪】走私、贩卖、运输、制造毒品，无论数量多少，都应当追究刑事责任，予以刑事处罚。走私、贩卖、运输、制造毒品，有下列情形之一的，处十五年有期徒刑、无期徒刑或者死刑，并处没收财产：(一)走私、贩卖、运输、制造鸦片一千克以上、海洛因或者甲基苯丙胺五十克以上或者其他毒品数量大的；(二)走私、贩卖、运输、制造毒品集团的首要分子；(三)武装掩护走私、贩卖、运输、制造毒品的；(四)以暴力抗拒检查、拘留、逮捕，情节严重的；(五)参与有组织的国际贩毒活动的。走私、贩卖、运输、制造鸦片二百克以上不满一千克、海洛因或者甲基苯丙胺十克以上不满五十克或者其他毒品数量较大的，处七年以上有期徒刑，并处罚金。走私、贩卖、运输、制造鸦片不满二百克、海洛因或者甲基苯丙胺不满十克或者其他少量毒品的，处三年以下有期徒刑、拘役或者管制，并处罚金；情节严重的，处三年以上七年以下有期徒刑，并处罚金。单位犯第二款、第三款、第四款罪的，对单位判处罚金，并对其直接负责的主管人员和其他直接责任人员，依照各该款的规定处罚。利用、教唆未成年人走私、贩卖、运输、制造毒品，或者向未成年人出售毒品的，从重处罚。对多次走私、贩卖、运输、制造毒品，未经处理的，毒品数量累计计算。	得分：0.9996756

可能的答案，参考问题：如果犯了过失决水罪可以取保吗	答案：取保

01/19/2020 10:10:02 - INFO - transformers.data.processors.glue -   Writing example 0
01/19/2020 10:10:02 - INFO - transformers.data.processors.glue -   *** Example ***
01/19/2020 10:10:02 - INFO - transformers.data.processors.glue -   guid: 0
01/19/2020 10:10:02 - INFO - transformers.data.processors.glue -   input_ids: 101 7478 3791 2872 4881 1161 1126 2399 136 102 1762 3791 2526 704 2137 7032 680 6370 7032 4638 1277 1166 6370 7032 1469 2137 7032 1525 702 1358 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
01/19/2020 10:10:02 - INFO - transformers.data.processors.glue -   attention_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 

可能的答案，参考问题：后诉讼费给退吗，怎么撤诉	答案：*事案件的原告或者上诉人撤诉后诉讼费用由原告或者上诉人负担。行政案件由被告负担。《诉讼费用交纳办法》第三十四条*事案件的原告或者上诉人申请撤诉，人*法*裁定准许的，案件受理费由原告或者上诉人负担。行政案件的被告改变或者撤销具体行政行为，原告申请撤诉，人*法*裁定准许的，案件受理费由被告负担。	得分：0.99968827

可能的答案，参考问题：捷信变相高利贷公司为什么国家不整治	答案：在法律上认定的高利。贷是指超过银行同期基准利率的4倍，也就是产生法律纠纷时支持同期基准利率四倍以内的利率水平，超过银行同期基准利率的4倍，即为高利。贷，不受法律保护。因为不同期限的基准利率不同，高利，贷的法律认定还与期限相关。同期基准利率是指中央银行公布的基准利率，如一年期，基准利率为6%，4倍为24%，即超过24%为法律认定的高利，贷。就一年期来说，1####元，年利息不超过24%*1####=24##元不算高，利贷。	得分：0.9996864

可能的答案，参考问题：有过吸毒史满了5年申请重考了驾驶证还会被注销吗？	答案：2013.01.01日后有被强制隔离戒毒的都不可以考驾照，强制隔离戒毒后每个月尿检一次，三年后派出所出具无复吸证明才可以考驾照。	得分：0.99968356

可能的答案，参考问题：怎样写上诉状，一审判决我退还定金3万元	答案：上诉状上诉人：被上诉人：（刑事上诉状无被上诉人）上诉人因一案，不服人民法院年月日()字第号，现提出上诉。上诉请求：上诉理由：此致人民法院附：本上诉状副本份上诉人:年月日[填写说明]1、上诉请求。首先要综合叙述案情全貌，接着写明原审裁判结果。其次指明是对原判全部或哪一部分不服。最后写明具体诉讼请求，是要撤销原判、全部改变原判还是部分变更原判。2、上诉理由。主要是针对原审裁判而言，而不是针对对方当事人。针对原审判决、裁定论证不服的理由，主要是以下方面：（1）认定事实不清，主要证据不足；（2）原审确定性质不当；（3）适用实体法不当；（4）违反了法定程序。[注意事项]1、上诉只能采用书面形式。如果当事人仅在一审判决、裁定送达时口头表示上诉而未在法定期间内递交上诉状，则视为未提出上诉。2、上诉是当事人享有诉权，一审原、被告及被判决承担责任的第三人均有权上诉。	得分：0.9996823

可能的答案

KeyboardInterrupt: ignored