In [1]:
import re
import os
import sys
import json
import numpy as np
import pandas as pd
from collections import OrderedDict
from copy import deepcopy

import torch
from transformers import EvalPrediction
from transformers import AutoConfig, AutoModelForSequenceClassification, AutoTokenizer
from transformers import BertConfig, BertForSequenceClassification, BertTokenizer
from transformers import Trainer, TrainingArguments, set_seed
from transformers import GlueDataTrainingArguments, GlueDataset, glue_output_modes, glue_tasks_num_labels



### 1. 导入旧模型

In [23]:
model_dir = './multilingual_sentiment_vocab20k'

config = AutoConfig.from_pretrained(model_dir)
tokenizer = AutoTokenizer.from_pretrained(model_dir)
model = AutoModelForSequenceClassification.from_pretrained(model_dir, config=config)

In [30]:
input_ids = tokenizer.encode('a synthesis of cliches and absurdities that seems positively decadent in its cinematic flash and emptiness ')
input_ids = torch.tensor([input_ids])

model.eval()
with torch.no_grad():
    outputs = model(input_ids)
    predictions = outputs[0]

softmax = torch.nn.Softmax(dim=1)
predictions = softmax(predictions)
print(predictions)
index_pred = torch.argmax(predictions[0, :]).item()
label_pred = config.id2label[index_pred]
print(label_pred)

tensor([[0.1609, 0.1715, 0.1642, 0.2521, 0.2513]])
4 stars


### 2. 导入训练好的SST-2模型(from Glue)

In [22]:
DATA_DIR = '/home/edili/download/downloads/glue/SST-2/'
dev = pd.read_csv(DATA_DIR+'dev.tsv', delimiter='\t')
for i in dev.head(50).tail().values:
    print(i[1],i[0])

0 it offers little beyond the momentary joys of pretty and weightless intellectual entertainment . 
0 a synthesis of cliches and absurdities that seems positively decadent in its cinematic flash and emptiness . 
1 a subtle and well-crafted ( for the most part ) chiller . 
1 has a lot of the virtues of eastwood at his best . 
0 it 's hampered by a lifetime-channel kind of plot and a lead actress who is out of her depth . 


In [5]:
ckpt_path = '/tmp/SST-2'

config = AutoConfig.from_pretrained(ckpt_path)
tokenizer = AutoTokenizer.from_pretrained(ckpt_path)
model_ft = AutoModelForSequenceClassification.from_pretrained(ckpt_path, config=config)

In [6]:
input_ids = tokenizer.encode('a synthesis of cliches and absurdities that seems positively decadent in its cinematic flash and emptiness ')
input_ids = torch.tensor([input_ids])

model_ft.eval()
with torch.no_grad():
    outputs = model_ft(input_ids)
    predictions = outputs[0]

softmax = torch.nn.Softmax(dim=1)
predictions = softmax(predictions)
print(predictions)
index_pred = torch.argmax(predictions[0, :]).item()
index_pred

tensor([[0.9823, 0.0177]])


0

### 3. 导入训练好的新闻情感模型

In [4]:
ckpt_path = '/tmp/st_news'

config = AutoConfig.from_pretrained(ckpt_path)
tokenizer = AutoTokenizer.from_pretrained(ckpt_path)
model_ft = AutoModelForSequenceClassification.from_pretrained(ckpt_path, config=config)
_ = model_ft.eval()

In [5]:
def predict(sentence):
    input_ids = tokenizer.encode(sentence)
    input_ids = torch.tensor([input_ids])
    with torch.no_grad():
        outputs = model_ft(input_ids)
        predictions = outputs[0]

    softmax = torch.nn.Softmax(dim=1)
    predictions = softmax(predictions)
    index_pred = torch.argmax(predictions[0, :]).item()
    prob = predictions.numpy()[0, index_pred]
    return index_pred, prob

In [6]:
sentence = '【建投中小盘】一周策略回顾与展望2020-04-13'
predict(sentence)

(1, 0.9044954)