In [1]:
#require: pandas, tensorflow_hub, tensorflow_text, tensorflow_addons, sklearn
import random
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_hub as hub
from tensorflow import keras
import tensorflow_text as text  # Imports TF ops for preprocessing.
import model.tokenization as tokenization
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.callbacks import Callback, ModelCheckpoint, ReduceLROnPlateau, EarlyStopping,CSVLogger
from tensorflow.keras.layers import Input, Dense,Dropout,Embedding,LSTM,Bidirectional, Masking, TimeDistributed, Conv1D, MaxPooling1D, Flatten, concatenate, GRU

random.seed(0)
np.random.seed(0)
tf.random.set_seed(0)

BERT_src = 'https://tfhub.dev/tensorflow/bert_zh_L-12_H-768_A-12/3'#'https://tfhub.dev/tensorflow/bert_multi_cased_L-12_H-768_A-12/4' 
BERT_LAYER = hub.KerasLayer(BERT_src, trainable=False)

In [2]:
category = ['功能', '品質', '無', '配件', '售後', '外觀', '價位', '音量']
cate2idx = {cate:idx for idx, cate in enumerate(category)}
idx2cate = {idx: cate for cate, idx in cate2idx.items()}
sentiment = ['負向', '正向', '中立']
sent2idx = {sent:idx for idx, sent in enumerate(sentiment)}
idx2sent = {idx: sent for sent, idx in sent2idx.items()}

# Read Data

In [3]:
def twoSentence2BERT(inputs, target_, onlytarget=False): #input as list of dictionary
#     BERT_LAYER = hub.KerasLayer(BERT_src, trainable=False)
    VOCAB_FILE = BERT_LAYER.resolved_object.vocab_file.asset_path.numpy()
    tokenizer = tokenization.FullTokenizer(VOCAB_FILE, True) 
    output={'input_word_ids':[], 'input_mask':[], 'input_type_ids':[]}
    for data in inputs:
        count = 0
        #tokenize origin sentence
        tempword, tempmask, temptype=[], [], []
        #add cls
        tempword.append(tokenizer.convert_tokens_to_ids(['[CLS]'])[0])
        tempmask.append(1)
        temptype.append(0)
        if not onlytarget:
            sentence = data['sentence'] #string
            tokenize_sentence = tokenizer.tokenize(sentence)
            for ts in tokenize_sentence:
                try:
                    token_id = tokenizer.convert_tokens_to_ids([ts.lower()])
                except:
                    token_id = tokenizer.convert_tokens_to_ids(['[UNK]'])
                tempword.append(token_id[0])
                tempmask.append(1)
                temptype.append(0)
                count+=1
            #add sep
            if target_:
                tempword.append(tokenizer.convert_tokens_to_ids(['[SEP]'])[0])
                tempmask.append(1)
                temptype.append(0)
        if target_:
            target = data['target'] #string        
            tokenize_target = tokenizer.tokenize(target)        
            for tt in tokenize_target:
                try:
                    token_id = tokenizer.convert_tokens_to_ids([tt.lower()])
                except:
                    token_id = tokenizer.convert_tokens_to_ids(['[UNK]'])
                tempword.append(token_id[0])
                tempmask.append(1)
                temptype.append(1)
                count+=1
                if count>=128:
                    break
        if len(tempword)>127:
            tempword=tempword[:127]
            tempmask=tempmask[:127]
            temptype=temptype[:127]  
        #add sep
        tempword.append(tokenizer.convert_tokens_to_ids(['[SEP]'])[0])
        tempmask.append(1)
        temptype.append(1)                
        while(len(tempword)<128):
            tempword.append(0)
            tempmask.append(0)
            temptype.append(0)            
        output['input_word_ids'].append(tempword)
        output['input_mask'].append(tempmask)
        output['input_type_ids'].append(temptype)        
    return output

def BERTdata2Traindata(data, target=True, to_cate=True, objective = ['sentiment', 'aspect_category']):
    from tensorflow.keras.utils import to_categorical
    outputx = twoSentence2BERT(data, target)
    outputy_sentiment = []
    outputy_category = []
    category=True
    if 'aspect_category' not in objective:
        category=False
        
    if to_cate:
        for d in data:
            if 'sentiment' in objective:
                outputy_sentiment.append(sent2idx[d['sentiment']])
            elif 'aspect_category' in objective:
                outputy_category.append(cate2idx[d['aspect_category']])
    else:
        for d in data:
            if 'sentiment' in objective:
                outputy_sentiment.append(d['sentiment'])
            elif 'aspect_category' in objective:
                outputy_category.append(d['aspect_category'])
        
    if category:
        if to_cate:
            return outputx, to_categorical(outputy_sentiment, num_classes=len(sent2idx)), to_categorical(outputy_category, num_classes=len(cate2idx))
        else:
            return outputx, outputy_sentiment, outputy_category
            
    else:
        if to_cate:
            return outputx, to_categorical(outputy_sentiment, num_classes=len(sent2idx))
        else:
            return outputx, outputy_sentiment
def transBERTtype(data, toBERT=True):
    if toBERT: #input 每個資料都有三個key，每個key的維度都是128
        return {k:np.array([data[i][k] for i in range(len(data))]) for k in data[0].keys()}
    else: #原本BERT的形式
        return [{k:data[k][i] for k in data.keys()} for i in range(len(data['input_word_ids']))]        

In [4]:
train, test = [], []
import json
for c in category:
    BERT_train = []
    dir_ = './data/homeapp/diff_cate/'+c+'/train.json'
    with open(dir_, 'r', encoding='utf8') as file:
        data = file.readlines()
    for d in data:
        BERT_train.append(json.loads(d))
    train.append(BERT_train)
    
    BERT_test = []
    dir_ = './data/homeapp/diff_cate/'+c+'/test.json'
    with open(dir_, 'r', encoding='utf8') as file:
        data = file.readlines()
    for d in data:
        BERT_test.append(json.loads(d))
    test.append(BERT_test)
all_train, all_test = [], []
for i in train:
    all_train+=i
for i in test:
    all_test+=i

 ## 資料統計

In [48]:
print('cate \t train \t test')
for c in range(len(category)):
    print(category[c],'\t',len(train[c]),'\t', len(test[c]))
print('overall\t{}\t{}'.format(len(all_train), len(all_test)))
print('----------')
print('train')
print('cate\t正向\t負向\t中立')
for c in range(len(category)):
    po, ne, nu = 0, 0, 0
    for i in train[c]:
        if i['sentiment']=='正向':
            po+=1
        elif i['sentiment']=='負向':
            ne+=1
        else:
            nu+=1
    print('{}\t{}\t{}\t{}\t'.format(category[c], round(po/len(train[c]),2), round(ne/len(train[c]),2), round(nu/len(train[c]),2)))
print('test')
print('cate\t正向\t負向\t中立')
for c in range(len(category)):
    po, ne, nu = 0, 0, 0
    for i in test[c]:
        if i['sentiment']=='正向':
            po+=1
        elif i['sentiment']=='負向':
            ne+=1
        else:
            nu+=1
    print('{}\t{}\t{}\t{}\t'.format(category[c], round(po/len(test[c]),2), round(ne/len(test[c]),2), round(nu/len(test[c]),2)))

print('train+test')
print('cate\t正向\t負向\t中立')
for c in range(len(category)):
    po, ne, nu = 0, 0, 0
    for i in test[c]:
        if i['sentiment']=='正向':
            po+=1
        elif i['sentiment']=='負向':
            ne+=1
        else:
            nu+=1
    for i in train[c]:
        if i['sentiment']=='正向':
            po+=1
        elif i['sentiment']=='負向':
            ne+=1
        else:
            nu+=1
    print('{}\t{}\t{}\t{}\t'.format(category[c], round(po/(len(test[c])+len(train[c])),2), round(ne/(len(test[c])+len(train[c])),2), round(nu/(len(test[c])+len(train[c])), 2)))
po, ne, nu = 0, 0, 0
for i in all_train:
    if i['sentiment']=='正向':
        po+=1
    elif i['sentiment']=='負向':
        ne+=1
    else:
        nu+=1
for i in all_test:
    if i['sentiment']=='正向':
        po+=1
    elif i['sentiment']=='負向':
        ne+=1
    else:
        nu+=1
print('{}\t{}\t{}\t{}\t'.format('total', round(po/(len(all_test)+len(all_train)),2), round(ne/(len(all_test)+len(all_train)),2), round(nu/(len(all_test)+len(all_train)), 2)))


cate 	 train 	 test
功能 	 375 	 422
品質 	 235 	 204
無 	 411 	 494
配件 	 106 	 83
售後 	 73 	 50
外觀 	 66 	 69
價位 	 49 	 63
音量 	 30 	 37
overall	1345	1422
----------
train
cate	正向	負向	中立
功能	0.18	0.02	0.81	
品質	0.56	0.39	0.05	
無	0.27	0.12	0.62	
配件	0.31	0.14	0.55	
售後	0.44	0.3	0.26	
外觀	0.15	0.08	0.77	
價位	0.31	0.33	0.37	
音量	0.4	0.6	0.0	
test
cate	正向	負向	中立
功能	0.15	0.03	0.82	
品質	0.59	0.36	0.04	
無	0.3	0.1	0.6	
配件	0.31	0.1	0.59	
售後	0.34	0.38	0.28	
外觀	0.26	0.09	0.65	
價位	0.44	0.16	0.4	
音量	0.51	0.35	0.14	
train+test
cate	正向	負向	中立
功能	0.16	0.02	0.81	
品質	0.57	0.38	0.05	
無	0.28	0.11	0.61	
配件	0.31	0.12	0.57	
售後	0.4	0.33	0.27	
外觀	0.21	0.08	0.71	
價位	0.38	0.23	0.38	
音量	0.46	0.46	0.07	
total	0.31	0.15	0.54	


In [8]:
po, ne, nu = 0, 0, 0
total = 0
for j in train:
    for i in j:
        if i['sentiment']=='正向':
            po+=1
        elif i['sentiment']=='負向':
            ne+=1
        else:
            nu+=1
        total+=1
print(round(po/total, 2), round(ne/total, 2), round(nu/total, 2))
po, ne, nu = 0, 0, 0
total=0
for j in test:
    for i in j:
        if i['sentiment']=='正向':
            po+=1
        elif i['sentiment']=='負向':
            ne+=1
        else:
            nu+=1
        total+=1
print(round(po/total, 2), round(ne/total, 2), round(nu/total, 2))


0.3 0.17 0.53
0.31 0.14 0.55


# Model

In [6]:
#Gradient Reverse Layer
@tf.custom_gradient
def grad_reverse(x):
    y = tf.identity(x)
    def custom_grad(dy):
        return -dy
    return y, custom_grad

class GradReverse(tf.keras.layers.Layer):
    def __init__(self):
        super().__init__()

    def call(self, x):
        return grad_reverse(x)

In [7]:
#sentence base, multi-task approach
def create_classify_model(data_size, batch_size = 16, epochs=10, category_len = 8, sentiment_len = 2):
    import model.optimization as optimization
    input1 = Input(shape=(128,), name='input_word_ids', dtype=tf.int32)
    input2 = Input(shape=(128,),name='input_mask', dtype=tf.int32)
    input3 = Input(shape=(128,),name='input_type_ids', dtype=tf.int32)
    bert_layer = hub.KerasLayer(BERT_src, trainable=True, output_key='pooled_output', name='bert_layer')
    output = bert_layer({'input_word_ids':input1, 'input_mask':input2, 'input_type_ids':input3})
#     output = Dense(128, name = 'presentation_')(output)
    
    sentiment_output = Dense(64, activation='relu', name = 'sentiment_pre', 
                             kernel_initializer=keras.initializers.glorot_normal(0), bias_initializer='zeros')(output)
#     sentiment_output = Dropout(0.2, name='sentiment_drop')(sentiment_output)
    sentiment_output = Dense(sentiment_len, activation='softmax', name = 'sentiment', 
                             kernel_initializer=keras.initializers.glorot_normal(0), bias_initializer='zeros')(sentiment_output) #softmax會讓所有的output總和=1
    
    output_model = Model(inputs = [input1, input2, input3], outputs = sentiment_output)
    optimizer = optimization.create_optimizer(
    5e-5, (data_size//batch_size)*epochs, int((epochs*data_size*0.1)//batch_size), 0.0, 'adamw')
    
    output_model.compile(optimizer=optimizer, 
                         loss={'sentiment':'categorical_crossentropy'})#'categorical_crossentropy'})
    return output_model

In [8]:
import random
def sample_data_(data_list, datasize, random_=True):
    #data_list = [BERT_x, sentiment, (category)]
    if random_:
        samples = random.sample(range(len(data_list[1])), datasize)
    else:
        samples = list(range(datasize))        
    bert_x = data_list[0]
    bert_x = {k:np.array([bert_x[k][i] for i in samples]) for k in bert_x.keys()}
    sentiment = np.array(data_list[1])
    sentiment = np.array([sentiment[i] for i in samples])
    if len(data_list)>2:
        category = np.array(data_list[2])
        category = np.array([category[i] for i in samples])
        return bert_x, sentiment, category
    else:
        return bert_x, sentiment
def sample_data(data_list, datasize, random_=True):
    #data_list = [BERT_x, sentiment]
    if random_:
        samples = random.sample(range(len(data_list[1])), datasize)
    else:
        samples = list(range(datasize))        
    bert_x = data_list[0]
    bert_x = {k:np.array([bert_x[k][i] for i in samples]) for k in bert_x.keys()}
    sentiment = np.array(data_list[1])
    sentiment = np.array([sentiment[i] for i in samples])
    return bert_x, sentiment
def model_get_weight(model, keyword='', not_=False):
    origin_weight = []
    for layer in model.layers:
        if not_:
            if not layer.name.startswith(keyword): 
                origin_weight.append(np.array(layer.get_weights()))
        else:
            if layer.name.startswith(keyword): 
                origin_weight.append(np.array(layer.get_weights()))
    return np.array(origin_weight)

def update_weights(model, update_weight, keyword='', not_=False):
    k=0
    for layer in model.layers:
        if not_:
            if not layer.name.startswith(keyword):
                layer.set_weights(update_weight[k])
                k+=1
        else:
            if layer.name.startswith(keyword):
                layer.set_weights(update_weight[k])
                k+=1
def update_weights_forsame(model, model_src):
    for layer in model.layers:
        flag = False
        for layer_src in model_src.layers:
            if layer.name==layer_src.name and len(layer.get_weights())==len(layer_src.get_weights()) and flag==False:
                try: 
                    layer.set_weights(layer_src.get_weights())
                    flag = True
                except:
                    print('error!')
        if flag==False:
            print('model layer: "', layer.name, '" not in source model')
        

# Experiment

names 這個變數是讓你可以選擇多個已訓練模型 這些模型放在 "Meta-ACS_weight_save" 這個資料夾

meta 就是看要不要用已讀取的pretrain model

In [9]:
from sklearn import metrics
from tensorflow.keras.models import load_model
from sklearn.metrics import f1_score, accuracy_score, classification_report
import model.optimization as optimization
data_size=1000
batch_size=32
epochs=7
optimizer = optimization.create_optimizer(5e-5, (data_size//batch_size)*epochs, int((epochs*data_size*0.1)//batch_size), 0.0, 'adamw')
meta = True
names = ['rep_adv_opt_lamb.h5'] #pre-trained model choice
mamicros = []
total_micros = []
for name in names:
    sent_pred_total, sent_ans_total = [], []
    tmp_model = load_model('./Meta-ACS_weight_save/'+name, custom_objects={'KerasLayer':BERT_LAYER, 'AdamWeightDecay':optimizer})
    for itr in range(5):
        mamicro = []
        total_pred, total_ans = [], []
        print(itr)
        for c in range(len(category)):
            print(category[c])
            epochs=7
            tmp_train, train_sentiment = BERTdata2Traindata(train[c], to_cate=True, objective=['sentiment'])
            tmp_test, test_sentiment = BERTdata2Traindata(test[c], to_cate=True, objective=['sentiment'])
            x, sent = sample_data_([tmp_train, train_sentiment], datasize=len(train_sentiment),random_=False)
            with tf.device('/cpu:0'):
                model = create_classify_model(data_size=len(sent), epochs=epochs, sentiment_len = 3)
            if meta:
                update_weights_forsame(model, tmp_model)                
            history = model.fit(x, sent, batch_size=16, epochs=epochs, verbose=1)
            sent_pred = model.predict([np.array(tmp_test['input_word_ids']), 
                            np.array(tmp_test['input_mask']),
                            np.array(tmp_test['input_type_ids'])])
            sent_predict = [idx2sent[np.argmax(i)] for i in sent_pred]
            sent_ans = [idx2sent[np.argmax(i)] for i in test_sentiment]
            total_pred+=sent_predict
            total_ans+=sent_ans
            mamicro.append(f1_score(sent_ans, sent_predict, average='micro'))
#             print('category:', category[c])
            print('sentiment accuracy:',accuracy_score(sent_ans, sent_predict))
#             print('sentiment f1 score(macro):', f1_score(sent_ans, sent_predict, average='macro'))
#             print('sentiment f1 score(micro):', f1_score(sent_ans, sent_predict, average='micro'))
#             print('sentiment f1 score(weight):', f1_score(sent_ans, sent_predict, average='weighted'))
#             print(classification_report(sent_ans, sent_predict))
#             print('-------------------')
            del model, tmp_train, tmp_test, train_sentiment, test_sentiment
        print('overall:')
        print('sentiment f1 score(ma-micro):', np.mean(mamicro))
        print('sentiment accuracy:',accuracy_score(total_ans, total_pred))
        print('sentiment f1 score(macro):', f1_score(total_ans, total_pred, average='macro'))
        print('sentiment f1 score(micro):', f1_score(total_ans, total_pred, average='micro'))
        print('sentiment f1 score(weight):', f1_score(total_ans, total_pred, average='weighted'))
        print(classification_report(total_ans, total_pred))
        print('-------------------')
        sent_pred_total+=total_pred
        sent_ans_total+=total_ans
        mamicros.append(np.mean(mamicro))
        total_micros.append(f1_score(total_ans, total_pred, average='micro'))
#         np.save('./predict_result/'+name[:-3]+'_multiple_more.npy', sent_pred_total)
        #['功能', '品質', '無', '配件', '售後', '外觀', '價位', '音量']
        np.save('./predict_result/'+name[:-3]+'_multiple.npy', sent_pred_total)
    try:
        del tmp_model
    except:
        ;





0
功能
Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
sentiment accuracy: 0.8293838862559242
品質
Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
sentiment accuracy: 0.8137254901960784
無
Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
sentiment accuracy: 0.694331983805668
配件
Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
sentiment accuracy: 0.6987951807228916
售後
Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
sentiment accuracy: 0.7
外觀
Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
sentiment accuracy: 0.6666666666666666
價位
Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7




sentiment accuracy: 0.5714285714285714
音量
Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7




sentiment accuracy: 0.5675675675675675
overall:
sentiment f1 score(ma-micro): 0.692737418330421
sentiment accuracy: 0.7419127988748242
sentiment f1 score(macro): 0.6858271942305555
sentiment f1 score(micro): 0.7419127988748242
sentiment f1 score(weight): 0.7354685732783807
              precision    recall  f1-score   support

          中立       0.78      0.87      0.83       788
          正向       0.69      0.58      0.63       441
          負向       0.64      0.56      0.60       193

    accuracy                           0.74      1422
   macro avg       0.70      0.67      0.69      1422
weighted avg       0.74      0.74      0.74      1422

-------------------
1
功能
Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7




sentiment accuracy: 0.8341232227488151
品質
Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
sentiment accuracy: 0.8333333333333334
無
Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
sentiment accuracy: 0.6821862348178138
配件
Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
sentiment accuracy: 0.6867469879518072
售後
Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
sentiment accuracy: 0.78
外觀
Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
sentiment accuracy: 0.7246376811594203
價位
Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7




sentiment accuracy: 0.5873015873015873
音量
Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7




sentiment accuracy: 0.5135135135135135
overall:
sentiment f1 score(ma-micro): 0.7052303201032863
sentiment accuracy: 0.7461322081575246
sentiment f1 score(macro): 0.6926535620293874
sentiment f1 score(micro): 0.7461322081575245
sentiment f1 score(weight): 0.7415790656971359
              precision    recall  f1-score   support

          中立       0.80      0.86      0.83       788
          正向       0.69      0.61      0.65       441
          負向       0.63      0.58      0.60       193

    accuracy                           0.75      1422
   macro avg       0.70      0.68      0.69      1422
weighted avg       0.74      0.75      0.74      1422

-------------------
2
功能


DataLossError: TensorBundle at /tmp/tfhub_modules/be4cff0fcfe466313112ea7bc4d88770583b60b6/variables/variables shard 0 (64905216 bytes): Checksum does not match: stored 114360999 vs. calculated on the restored bytes 2314620158 [Op:RestoreV2]

# Evaluate

In [26]:
cate_split = [0]
for c in range(len(category)):
    tmp_test, test_sentiment = BERTdata2Traindata(test[c], to_cate=True, objective=['sentiment'])
    cate_split.append(cate_split[-1]+len(test_sentiment))
cate_split

[0, 422, 626, 1120, 1203, 1253, 1322, 1385, 1422]

In [None]:
#這邊看你要用的預測結果是剛剛跑出來的 還是之前已經存好的

#之前存好的
#mypred = np.load('./predict_result/dann_multiple.npy')
#剛剛跑出來的
mypred = sent_pred_total

In [11]:
#multiple
from sklearn.metrics import f1_score, accuracy_score, classification_report
cate_split = [0, 422, 626, 1120, 1203, 1253, 1322, 1385, 1422]

myans = [i['sentiment'] for i in all_test]*5
category_perf = {_:[] for _ in category}
macros = []
for i in range(5):
    macro = []
    start = int(i*1422)
    for c in range(len(category)):
        predicts = mypred[start+cate_split[c]:start+cate_split[c+1]]
        answers = myans[start+cate_split[c]:start+cate_split[c+1]]
        macro.append(f1_score(answers, predicts, average='micro'))
#         print(category[c])
#         print('sentiment f1 score(macro):', f1_score(answers, predicts, average='macro'))
#         print('sentiment f1 score(micro):', f1_score(answers, predicts, average='micro'))
        category_perf[category[c]].append(f1_score(answers, predicts, average='micro'))
    macros.append(np.mean(macro))
micro = []
for i in range(5):
    macro = []
    start = int(i*1422)
    predicts = mypred[start:start+1422]
    answers = myans[start:start+1422]
    micro.append(f1_score(answers, predicts, average='micro'))

# print(macros)
print('micro', np.mean(micro))
print('macro',np.mean(macros))
#print(category_perf)

total-micro 0.7257383966244725
ma-micro 0.6324329086588832


## 統計數據

信賴區間、每個實驗的數據(在每個aspect category中的sentiment f1 score)

總之這邊有沒有看其實沒差

In [19]:
for i in category:
    print(i,'的信賴區間',np.std([_*1 for _ in category_perf[i]], ddof=1)*2)
print('micro的單側信賴區間:',np.std([_*1 for _ in micro], ddof=1)*2)
print('macro的單側信賴區間:',np.std([_*1 for _ in macros], ddof=1)*2)

功能 的信賴區間 0.018170890533346792
品質 的信賴區間 0.03053414020077924
無 的信賴區間 0.012930137184349239
配件 的信賴區間 0.03959440174736142
售後 的信賴區間 0.043817804600413325
外觀 的信賴區間 0.03304856304635183
價位 的信賴區間 0.028394513999997317
音量 的信賴區間 0.09044973259827843
micro的單側信賴區間: 0.010784973002914642
macro的單側信賴區間: 0.00868209932538389


In [71]:
#single
import numpy as np
temp = np.load('./predict_result/reptile_single.npy')
from sklearn import metrics
from sklearn.metrics import f1_score, accuracy_score, classification_report, precision_recall_fscore_support
mamicro = []
category_wf = {_:0 for _ in category}
category_perf = {_:0 for _ in category}
category_po, category_ne, category_nu = {_:0 for _ in category}, {_:0 for _ in category}, {_:0 for _ in category} 
for i in range(5):
    micro = []
    for j in category:
        pred, ans = [], []
        for k in range(1422):
            if BERT_test[k]['target'].split('-')[1]==j:
                pred.append(temp[i*1422+k])
                ans.append(BERT_test[k]['sentiment'])
        for label in ['正向', '負向', '中立']:
            p = [1 if l==label else 0 for l in pred]
            a = [1 if l==label else 0 for l in ans]
            if label=='正向':
                category_po[j]+=f1_score(a, p, average='binary')
            elif label=='負向':
                category_ne[j]+=f1_score(a, p, average='binary')
            else:
                category_nu[j]+=f1_score(a, p, average='binary')

        micro.append(f1_score(ans, pred, average='micro'))
        category_perf[j]+=f1_score(ans, pred, average='micro')
        category_wf[j]+=f1_score(ans, pred, average='weighted')
    mamicro.append(np.mean(micro))
for c in category:
    category_perf[c]/=5
    category_po[c]/=5
    category_ne[c]/=5
    category_nu[c]/=5
    category_wf[c]/=5
print('ma-micro')
print(np.mean(mamicro))
print('positive')
print(category_po)

print('negative')
print(category_ne)

print('neutral')
print(category_nu)

print('micro f1 score')
print(category_perf)

print('weighted f1 score')
print(category_wf)
pof, nef, nuf, totalmif, totalmaf, totalwef = 0, 0, 0, 0, 0, 0
for i in range(5):
    pred=temp[i*1422:i*1422+1422]
    ans = [BERT_test[i]['sentiment'] for i in range(len(all_test)) ]   
    totalmif+=f1_score(ans, pred, average='micro')
    totalmaf+=f1_score(ans, pred, average='macro')
    totalwef+=f1_score(ans, pred, average='weighted')
    for label in ['正向', '負向', '中立']:
        p = [1 if l==label else 0 for l in pred]
        a = [1 if l==label else 0 for l in ans]
        if label=='正向':
            pof+=f1_score(a, p, average='binary')
        elif label=='負向':
            nef+=f1_score(a, p, average='binary')
        else:
            nuf+=f1_score(a, p, average='binary')
print('positive:{}\nneutral:{}\nnegative:{}\nmicro:{}\nmacro:{}\nweighted:{}'.format(pof/5, nuf/5, nef/5, totalmif/5, totalmaf/5, totalwef/5))

ma-micro
0.6905102557221909
positive
{'功能': 0.40258436019163935, '品質': 0.8701903643384081, '無': 0.5471333553757458, '配件': 0.56772444946358, '售後': 0.6944807965860595, '外觀': 0.25819548872180453, '價位': 0.6345188104701782, '音量': 0.5987468671679198}
negative
{'功能': 0.028571428571428574, '品質': 0.8151485094631956, '無': 0.3115720021938542, '配件': 0.8099206349206349, '售後': 0.6955038759689923, '外觀': 0.20969696969696966, '價位': 0.39604554865424435, '音量': 0.5620677361853832}
neutral
{'功能': 0.9117929847996477, '品質': 0.07999999999999999, '無': 0.7863647203172357, '配件': 0.7654563449930448, '售後': 0.3332323232323232, '外觀': 0.8291805460029759, '價位': 0.6368816368816369, '音量': 0.0}
micro f1 score
{'功能': 0.842654028436019, '品質': 0.8323529411764706, '無': 0.682591093117409, '配件': 0.710843373493976, '售後': 0.62, '外觀': 0.7014492753623188, '價位': 0.5936507936507937, '音量': 0.5405405405405406}
weighted f1 score
{'功能': 0.8061521297510993, '品質': 0.8153628616922738, '無': 0.6671204965266121, '配件': 0.7078019478322574, '售後'