In [1]:
import numpy as np
import pandas as pd
from sklearn import metrics
from sklearn.preprocessing import OneHotEncoder
import transformers
import torch
import os
from torch.utils.data import Dataset, DataLoader, RandomSampler, SequentialSampler
from transformers import BertTokenizer, BertModel, BertConfig

In [2]:
transformers.__version__

'3.0.2'

In [3]:
# class BERTClass(torch.nn.Module):
#     def __init__(self):
#         super(BERTClass, self).__init__()
#         self.l1 = BertModel.from_pretrained('bert-base-chinese', cache_dir = 'D:lzl/模型/Bert/Bert-base-chinese/') #cache_dir 
#         self.l2 = torch.nn.Dropout(0.3)
#         self.l3 = torch.nn.Linear(768, 3)
    
#     def forward(self, ids, mask, token_type_ids):
#         _, output_1= self.l1(ids, attention_mask = mask, token_type_ids = token_type_ids)
#         output_2 = self.l2(output_1)
#         output = self.l3(output_2)
#         return output

In [12]:
class BERTClass(torch.nn.Module):
    def __init__(self):
        super(BERTClass, self).__init__()
        self.l1 = BertModel.from_pretrained('bert-base-chinese', cache_dir = 'D:lzl/模型/Bert/Bert-base-chinese/') #cache_dir 
        self.l2 = torch.nn.Dropout(0.3)
        self.l3 = torch.nn.Linear(768, 3)
    
    def forward(self, input_ids, attention_mask, token_type_ids):
        _, output_1= self.l1(input_ids = input_ids, attention_mask = attention_mask, token_type_ids = token_type_ids)
        output_2 = self.l2(output_1)
        output = self.l3(output_2)
        return output

In [17]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cuda


In [18]:
tokenizer = BertTokenizer.from_pretrained('bert-base-chinese')
model = torch.load('D:lzl/模型/中文影评情感分类/中文影评情感分类.pkl', map_location=torch.device(device))

In [16]:
# inputs = tokenizer(sentence, return_tensors="pt").to(cuda)
# ids = inputs['input_ids']
# mask = inputs['attention_mask']
# token_type_ids = inputs['token_type_ids']
# model(**inputs)

tensor([[-0.7821,  0.1671, -1.8785]], device='cuda:0', grad_fn=<AddmmBackward>)

In [20]:
np.set_printoptions(suppress=True)

def sentclsfy(sentence,):
    label = [-100, 0, 100]
    inputs = tokenizer(sentence, return_tensors="pt").to(device)
#     ids = inputs['input_ids'].to(cuda)
#     mask = inputs['attention_mask'].to(cuda)
#     token_type_ids = inputs['token_type_ids']
    outputs = model(**inputs) #在函数实参中使用，相当于拆解元组（*）or 拆解列表（**）
    m = torch.nn.Softmax(1)
    pb = m(outputs)
    print('正面： {}'.format(pb[0,2]))
    print('中性： {}'.format(pb[0,1]))
    print('负面： {}'.format(pb[0,0]))
    res = label[torch.argmax(pb)]
    return pb, res

In [21]:
sentence = '一般般吧。。。'
pb, res = sentclsfy(sentence)
print('判断结果： {}'.format(res))

正面： 0.055074434727430344
中性： 0.6427634358406067
负面： 0.3021621108055115
判断结果： 0


In [13]:
test_df = pd.read_excel('G:/lzl/lzl/学校（研究生）/大数据分析机器学习/作业/data/姜子牙10.11.xlsx')

test_df = test_df[['content', 'score']]

test_df.loc[(test_df['score']>=4) & (test_df['score']<=5), 'score'] = 100 #正面评价
test_df.loc[(test_df['score']>=0) & (test_df['score']<3), 'score'] = -100 #负面评价
test_df.loc[(test_df['score']>=3) & (test_df['score']<4), 'score'] = 0 #中性评价

In [14]:
test = test_df[:50]

In [15]:
test

Unnamed: 0,content,score
0,挺好看的，没有大家说的那么不好,100.0
1,不好看，全程只是再听巨大大音响效果,-100.0
2,一般般，剧情说实话只能用拉稀二字来形容，5分给片头的动画和九尾的配音，,-100.0
3,讲道理 给十分满分。\n不管是画面渲染还是背景音效，姜子牙都要比上一部好不止一个level。...,100.0
4,两个人，看困了一对，强打精神没睡着，和哪吒差距较大。画面昏暗，剧情老套，情节冗长，低端创新。...,-100.0
5,故事性不是特别强，剧情走得比较单一，但是动画制作效果没得商量。,100.0
6,看的我都困了，文戏飚不起来，武戏莫名其妙，配角是用来增光添彩的好吗？你这配角完全就是在分主角...,0.0
7,难看，不推荐,-100.0
8,画面比较精美，剧情有些混乱。看得累。,-100.0
9,少了点共鸣。,100.0


In [19]:
predict = [sentclsfy(sen)[1] for sen in test['content']]

In [20]:
metrics.accuracy_score(test['score'], predict)

0.66

In [21]:
new = pd.concat([test, pd.Series(predict, name='predict_score')], axis = 1)

In [22]:
new[new['score'] != new['predict_score']]

Unnamed: 0,content,score,predict_score
5,故事性不是特别强，剧情走得比较单一，但是动画制作效果没得商量。,100.0,0
6,看的我都困了，文戏飚不起来，武戏莫名其妙，配角是用来增光添彩的好吗？你这配角完全就是在分主角...,0.0,-100
8,画面比较精美，剧情有些混乱。看得累。,-100.0,0
9,少了点共鸣。,100.0,0
10,特效音乐不错，剧情牵强,100.0,0
12,还行…………,100.0,0
13,虽然故事性一般比起讲故事更像是在写论文但是画面精美立意不错比起神话故事更像是逆天以证道的顶级...,100.0,0
15,各种衔接生硬，**都是顿悟啊。故事简单，不大好看，各个人物性格也不太鲜明出彩，反正就想喝了杯白开水,0.0,-100
16,看个电影，特效在好，声音在大，都抵不过你一个塑料袋的声音刺耳，吃个爆米花，踢着前面的座椅，静...,100.0,-100
17,本片中你可以看到：\n轨道炮\n大清洗\n流放西伯利亚\n官大一级压死人\n战胜方一样没有好...,0.0,-100
