In [35]:
#require: pandas, tensorflow_hub, tensorflow_text, tensorflow_addons, sklearn
import random
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_hub as hub
from tensorflow import keras
import tensorflow_text as text  # Imports TF ops for preprocessing.
import model.tokenization as tokenization
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.callbacks import Callback, ModelCheckpoint, ReduceLROnPlateau, EarlyStopping,CSVLogger
from tensorflow.keras.layers import Input, Dense,Dropout,Embedding,LSTM,Bidirectional, Masking, TimeDistributed, Conv1D, MaxPooling1D, Flatten, concatenate, GRU

random.seed(0)
np.random.seed(0)
tf.random.set_seed(0)

BERT_src = "https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/4"
BERT_src = 'https://tfhub.dev/tensorflow/bert_zh_L-12_H-768_A-12/3'#'https://tfhub.dev/tensorflow/bert_multi_cased_L-12_H-768_A-12/4' 
BERT_LAYER = hub.KerasLayer(BERT_src, trainable=False)

In [36]:
category = ['功能', '品質','配件','售後','外觀','價位','音量', '無']
cate2idx = {cate:idx for idx, cate in enumerate(category)}
idx2cate = {idx: cate for cate, idx in cate2idx.items()}
sentiment = ['正向', '負向', '中立']
sent2idx = {sent:idx for idx, sent in enumerate(sentiment)}
idx2sent = {idx: sent for sent, idx in sent2idx.items()}

In [37]:
def twoSentence2BERT(inputs, target_, onlytarget=False): #input as list of dictionary
#     BERT_LAYER = hub.KerasLayer(BERT_src, trainable=False)
    VOCAB_FILE = BERT_LAYER.resolved_object.vocab_file.asset_path.numpy()
    tokenizer = tokenization.FullTokenizer(VOCAB_FILE, True) 
    output={'input_word_ids':[], 'input_mask':[], 'input_type_ids':[]}
    for data in inputs:
        count = 0
        #tokenize origin sentence
        tempword, tempmask, temptype=[], [], []
        #add cls
        tempword.append(tokenizer.convert_tokens_to_ids(['[CLS]'])[0])
        tempmask.append(1)
        temptype.append(0)
        if not onlytarget:
            sentence = data['sentence'] #string
            tokenize_sentence = tokenizer.tokenize(sentence)
            for ts in tokenize_sentence:
                try:
                    token_id = tokenizer.convert_tokens_to_ids([ts.lower()])
                except:
                    token_id = tokenizer.convert_tokens_to_ids(['[UNK]'])
                tempword.append(token_id[0])
                tempmask.append(1)
                temptype.append(0)
                count+=1
            #add sep
            if target_:
                tempword.append(tokenizer.convert_tokens_to_ids(['[SEP]'])[0])
                tempmask.append(1)
                temptype.append(0)
        if target_:
            target = data['target'] #string        
            tokenize_target = tokenizer.tokenize(target)        
            for tt in tokenize_target:
                try:
                    token_id = tokenizer.convert_tokens_to_ids([tt.lower()])
                except:
                    token_id = tokenizer.convert_tokens_to_ids(['[UNK]'])
                tempword.append(token_id[0])
                tempmask.append(1)
                temptype.append(1)
                count+=1
                if count>=128:
                    break
        if len(tempword)>127:
            tempword=tempword[:127]
            tempmask=tempmask[:127]
            temptype=temptype[:127]  
        #add sep
        tempword.append(tokenizer.convert_tokens_to_ids(['[SEP]'])[0])
        tempmask.append(1)
        temptype.append(1)                
        while(len(tempword)<128):
            tempword.append(0)
            tempmask.append(0)
            temptype.append(0)            
        output['input_word_ids'].append(tempword)
        output['input_mask'].append(tempmask)
        output['input_type_ids'].append(temptype)        
    return output

def BERTdata2Traindata(data, target=True, to_cate=True, file_name):
    from tensorflow.keras.utils import to_categorical
    outputx = twoSentence2BERT(data, target)
    outputy = []
    if to_cate:
        for d in data:
            if 'Sent' in sent:
                outputy.append(sent2idx[d['sentiment']])
            else:
                outputy.append(cate2idx[d['aspect_category']])
                    
    else:
        for d in data:
            if 'Sent' in sent:
                outputy.append(d['sentiment'])
            else:
                outputy.append(d['aspect_category'])
        
    if to_cate:
        return outputx, to_categorical(outputy)
    else:
        return outputx, outputy
            
        
def transBERTtype(data, toBERT=True):
    if toBERT: #input 每個資料都有三個key，每個key的維度都是128
        return {k:np.array([data[i][k] for i in range(len(data))]) for k in data[0].keys()}
    else: #原本BERT的形式
        return [{k:data[k][i] for k in data.keys()} for i in range(len(data['input_word_ids']))]        

## Read Sentiment

In [38]:
import json
#PB-Sent_BERT_same_aux_NLI-B_train06-16.json
file_name = 'PB-Sent_BERT_same_aux_NLI-M'
type_ = []
BERT_train, BERT_test = [], []
with open('./data/homeapp/Sent_data/'+file_name+'_train06-16.json', 'r', encoding='utf8') as file:
    data = file.readlines()
for d in data:
    BERT_train.append(json.loads(d))
    
with open('./data/homeapp/Sent_data/'+file_name+'_test06-16.json', 'r', encoding='utf8') as file:
    data = file.readlines()
for d in data:
    BERT_test.append(json.loads(d))

print('train:{}, test:{}'.format(len(BERT_train), len(BERT_test)))

train:1345, test:1422


## sentiment only with BN 

In [1]:
#第一部分實驗有提到 用只把產品廠牌(brand name)當成輔助句子的
#這邊注意一下變數名字 train/test 我有另外取名

BERT_train_mod, BERT_test_mod = [], []
for i in BERT_train:
    tmp = i.copy()
    tmp['target'] = i['target'].split('-')[0]
    BERT_train_mod.append(i)
for i in BERT_test:
    tmp = i.copy()
    tmp['target'] = i['target'].split('-')[0]
    BERT_test_mod.append(i)
BERT_train_mod[:3]

NameError: name 'BERT_train' is not defined

# Transfer data into BERT format

In [39]:
#to_cate: 看要不要轉成category的形式 (e.g. 2 -> [0, 1, 0] 3 -> [0, 0, 1]) 
#objective: 看現在的主要目標是sentiment 還是 aspect category 再自己設定
train_x, train_sentiment = BERTdata2Traindata(BERT_train, to_cate=True, file_name)
test_x, test_sentiment = BERTdata2Traindata(BERT_test, to_cate=True, file_name)

# Model

In [40]:
#Gradient Reverse Layer
@tf.custom_gradient
def grad_reverse(x):
    y = tf.identity(x)
    def custom_grad(dy):
        return -dy
    return y, custom_grad

class GradReverse(tf.keras.layers.Layer):
    def __init__(self):
        super().__init__()

    def call(self, x):
        return grad_reverse(x)

In [41]:
#這邊要注意一下 對於不同的輔助句子 要用不同的激活函數跟Loss function 
#多元分類、二元分類 -> categorical crossentropy (我記得這邊如果二元分類 softmax+categorical_crossentropy會跑不出來，只能用categorical)
#多元標籤 -> binary crossentropy

def create_classify_model(data_size, file_name, batch_size = 16, epochs=10):
    if 'Sent' in file_name:
        task = 'sentiment'
    else:
        task = 'aspect_category'
    
    if '-B' in file_name:
        output_len = 2
    elif 'Sent' in file_name:
        output_len = 3
    else:
        output_len = 8
        
    import model.optimization as optimization
    input1 = Input(shape=(128,), name='input_word_ids', dtype=tf.int32)
    input2 = Input(shape=(128,),name='input_mask', dtype=tf.int32)
    input3 = Input(shape=(128,),name='input_type_ids', dtype=tf.int32)
    bert_layer = hub.KerasLayer(BERT_src, trainable=True, output_key='pooled_output', name='bert_layer')
    output = bert_layer({'input_word_ids':input1, 'input_mask':input2, 'input_type_ids':input3})
#     output = Dense(128, name = 'presentation_')(output)
    
    output = Dense(64, activation='relu', name = task+'pre', 
                             kernel_initializer=keras.initializers.glorot_normal(0), bias_initializer='zeros')(output)
#     sentiment_output = Dropout(0.2, name='sentiment_drop')(output)
    if output_len!=8:
        output = Dense(output_len, activation='softmax', name = task, 
                                 kernel_initializer=keras.initializers.glorot_normal(0), bias_initializer='zeros')(output) #softmax會讓所有的output總和=1
    else:
        output = Dense(output_len, activation='sigmoid', name = 'aspect_category', 
                                 kernel_initializer=keras.initializers.glorot_normal(0), bias_initializer='zeros')(output) #softmax會讓所有的output總和=1
        
    
    output_model = Model(inputs = [input1, input2, input3], outputs = sentiment_output)
    optimizer = optimization.create_optimizer(
    5e-5, (data_size//batch_size)*epochs, int((epochs*data_size*0.1)//batch_size), 0.0, 'adamw')
    if output_len!=8:
        output_model.compile(optimizer=optimizer, 
                             loss={task:'categorical_crossentropy'})})
    else:
        output_model.compile(optimizer=optimizer, 
                             loss={task:'binary_crossentropy'})})
        
    return output_model

# 其他設定

In [42]:
import random
def sample_data_(data_list, datasize, random_=True):
    #data_list = [BERT_x, sentiment, (category)]
    if random_:
        samples = random.sample(range(len(data_list[1])), datasize)
    else:
        samples = list(range(datasize))        
    bert_x = data_list[0]
    bert_x = {k:np.array([bert_x[k][i] for i in samples]) for k in bert_x.keys()}
    sentiment = np.array(data_list[1])
    sentiment = np.array([sentiment[i] for i in samples])
    if len(data_list)>2:
        category = np.array(data_list[2])
        category = np.array([category[i] for i in samples])
        return bert_x, sentiment, category
    else:
        return bert_x, sentiment
def sample_data(data_list, datasize, random_=True):
    #data_list = [BERT_x, sentiment]
    if random_:
        samples = random.sample(range(len(data_list[1])), datasize)
    else:
        samples = list(range(datasize))        
    bert_x = data_list[0]
    bert_x = {k:np.array([bert_x[k][i] for i in samples]) for k in bert_x.keys()}
    sentiment = np.array(data_list[1])
    sentiment = np.array([sentiment[i] for i in samples])
    return bert_x, sentiment
def model_get_weight(model, keyword='', not_=False):
    origin_weight = []
    for layer in model.layers:
        if not_:
            if not layer.name.startswith(keyword): 
                origin_weight.append(np.array(layer.get_weights()))
        else:
            if layer.name.startswith(keyword): 
                origin_weight.append(np.array(layer.get_weights()))
    return np.array(origin_weight)

def update_weights(model, update_weight, keyword='', not_=False):
    k=0
    for layer in model.layers:
        if not_:
            if not layer.name.startswith(keyword):
                layer.set_weights(update_weight[k])
                k+=1
        else:
            if layer.name.startswith(keyword):
                layer.set_weights(update_weight[k])
                k+=1
def update_weights_forsame(model, model_src):
    for layer in model.layers:
        flag = False
        for layer_src in model_src.layers:
            if layer.name==layer_src.name and len(layer.get_weights())==len(layer_src.get_weights()) and flag==False:
                try: 
                    layer.set_weights(layer_src.get_weights())
                    flag = True
                except:
                    print('error!')
        if flag==False:
            print('model layer: "', layer.name, '" not in source model')
        

## Load pre-train model

In [23]:
import model.optimization as optimization
try:
    del tmp_model
except:
    ;
src_model = 'rep_adv.h5'
data_size=1000
batch_size=32
epochs=7
optimizer = optimization.create_optimizer(5e-5, (data_size//batch_size)*epochs, int((epochs*data_size*0.1)//batch_size), 0.0, 'adamw')
from tensorflow.keras.models import load_model
print('loading model...')
tmp_model = load_model('./Meta-ACS_weight_save/'+src_model, custom_objects={'KerasLayer':BERT_LAYER, 'AdamWeightDecay':optimizer})
print('done!')

loading model...




done!


# Training

meta這個變數是看要不要用已讀取的pretrain model

In [43]:
# train_x, train_sentiment
# test_x, test_sentiment
from sklearn import metrics
from sklearn.metrics import f1_score, accuracy_score, classification_report, precision_recall_fscore_support

import time
meta = False
epochs = 7
sent_pred_total = []
mamicro = []
micros = []
for itr in range(1): #看要重複跑幾次實驗
    starttime = time.time()
    print(itr)
    try:
        del model
    except:
        ;
    x, sent = sample_data_([train_x, train_sentiment], datasize=len(train_sentiment),random_=False)
    model = create_classify_model(data_size=len(sent), epochs=epochs, sentiment_len = 3)
    if meta:
        update_weights_forsame(model, tmp_model)    
    history = model.fit(x, sent, batch_size=16, epochs=epochs, verbose=1)
    sent_pred = model.predict([np.array(test_x['input_word_ids']), 
                        np.array(test_x['input_mask']),
                        np.array(test_x['input_type_ids'])])
    sent_predict = [sentiment[np.argmax(i)] for i in sent_pred]
    sent_pred_total+=sent_predict
    sent_ans = [idx2sent[np.argmax(i)] for i in test_sentiment]
    micro = []
    for j in category:
        pred, ans = [], []
        for k in range(1422):
            if BERT_test[k]['target'].split('-')[1]==j:
                pred.append(sent_predict[k])
                ans.append(BERT_test[k]['sentiment'])
        micro.append(f1_score(ans, pred, average='micro'))
    micros.append(f1_score(sent_ans, sent_predict, average='micro'))
    mamicro.append(np.mean(micro))
    print('ma-micro:', np.mean(micro))
    print('micro:', f1_score(sent_ans, sent_predict, average='micro'))
    print('spend:', int(time.time()-starttime), 's')
print(len(sent_pred_total))

0
Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
ma-micro: 0.6946830640599477
micro: 0.7433192686357243
spend: 180
1422


# Evaluate of AC

In [97]:
#CATEGORY
#multiple
from sklearn.metrics import f1_score, accuracy_score, classification_report, precision_recall_fscore_support
if '-M' in file_name:
    sent_pred = [j for i in sent_pred for j in i]
    test_sentiment = [j for i in test_sentiment for j in i ]
threds = [.1, .2, .3, .4, .5, .6, .7, .8, .9]
max_ = 0.0
thrd = 0
for thred in threds:
    sent_predict = [1 if i>=thred else 0 for i in sent_pred]
    p, r, f, _ = precision_recall_fscore_support(test_sentiment, sent_predict, average='binary')
    print('threshold:{}\t p:{}\t r:{}\t f1:{}'.format(thred, round(p, 5), round(r, 5), round(f, 5)))
    if f>=max_:
        max_=f
        thrd = thred
        
sent_predict = [1 if i>=thrd else 0 for i in sent_pred]    
acs = []
categories = ['功能', '品質','配件','售後','外觀','價位','音量', '無']
for i in range(8):
    acs.append({'true':0, 'pred':0, 'ans':0})
for i in range(len(sent_predict)//8):
    for j in range(8):
        if test_sentiment[i*8+j]==1:
            acs[j]['ans']+=1
        if sent_predict[i*8+j]==1:
            acs[j]['pred']+=1
        if sent_predict[i*8+j]==1 and test_sentiment[i*8+j]==1:
            acs[j]['true']+=1
f_all = 0
print('每個aspect category各自的準確度')
for i in range(8):
    p = acs[i]['true']/acs[i]['pred']
    r = acs[i]['true']/acs[i]['ans']
    f = 2*p*r/(p+r)
    print('{}: p:{}\t r:{}\t f1:{}'.format(categories[i],round(p,3), round(r,3), round(f,3)))
    f_all+=f
print('----avg. f1:{}-----------'.format(f_all/8))
    

threshold:0.1	0.41101	0.84249	0.55249
threshold:0.2	0.49436	0.70696	0.58185
threshold:0.3	0.60498	0.56996	0.58695
threshold:0.4	0.63636	0.51282	0.56795
threshold:0.5	0.66095	0.47985	0.55603
threshold:0.6	0.67505	0.45201	0.54147
threshold:0.7	0.70303	0.42491	0.52968
threshold:0.8	0.74548	0.36264	0.48793
threshold:0.9	0.84918	0.18974	0.31018
功能:0.674	0.868	0.758
品質:0.5	0.005	0.01
配件:0.487	0.487	0.487
售後:0.411	0.6	0.488
外觀:0.339	0.328	0.333
價位:0.525	0.5	0.512
音量:1.0	0.081	0.15
無:0.622	0.644	0.633
----avg. macro f1:0.4215550392755408-----------


QA-B CATE  
0.59234	0.68293	0.63442
0.60582	0.65854	0.63108
0.62117	0.64206	0.63144
0.6328	0.62821	0.6305
0.64496	0.6203	0.63239
0.6478	0.61107	0.6289
0.65352	0.60053	0.6259
0.66244	0.58603	0.6219
0.69032	0.54515	0.60921
功能:0.7	0.814	0.752

品質:0.405	0.559	0.47

配件:0.5	0.74	0.597

售後:0.528	0.734	0.614

外觀:0.391	0.486	0.433

價位:0.429	0.343	0.381

音量:0.694	0.829	0.756

無:0.671	0.667	0.669
----avg. f1:0.5840626999534889-----------  



NLI-B CATE  
(0.5471421592574499, 0.7382992748846408, 0.6285072951739618, None)  
(0.5929203539823009, 0.6624917600527357, 0.6257783312577833, None)  
(0.6156330749354005, 0.6282135794330916, 0.6218597063621534, None)  
(0.6302864938608458, 0.6090969017798286, 0.6195105598390881, None)  
(0.6396848137535817, 0.5886618325642716, 0.6131136285616203, None)  
(0.6469248291571754, 0.5616348055372445, 0.6012702893436839, None)  
(0.6636661211129297, 0.5346077785102176, 0.5921869295363272, None)  
(0.685981308411215, 0.4838497033618985, 0.5674526478546579, None)  
(0.7284533648170012, 0.4067237969676994, 0.5219966159052454, None)  


NLI-M CATE
threshold:0.1	0.53935	0.85861	0.66252
threshold:0.2	0.61089	0.78095	0.68553
threshold:0.3	0.64913	0.74139	0.6922
threshold:0.4	0.67749	0.7033	0.69015
threshold:0.5	0.70367	0.65934	0.68079
threshold:0.6	0.72917	0.61538	0.66746
threshold:0.7	0.762	0.56996	0.65214
threshold:0.8	0.8016	0.51502	0.62712
threshold:0.9	0.85255	0.42784	0.56976
功能:0.731	0.905	0.809
品質:0.477	0.629	0.543
配件:0.511	0.6	0.552
售後:0.548	0.92	0.687
外觀:0.5	0.531	0.515
價位:0.63	0.468	0.537
音量:0.71	0.595	0.647
無:0.721	0.731	0.726
----avg. macro f1:0.6268860290191695-----------


QA-M CATE
threshold:0.1	0.53996	0.85128	0.66079
threshold:0.2	0.60398	0.77875	0.68032
threshold:0.3	0.63734	0.73773	0.68387
threshold:0.4	0.66643	0.68791	0.677
threshold:0.5	0.69345	0.65128	0.6717
threshold:0.6	0.7342	0.62125	0.67302
threshold:0.7	0.75814	0.56264	0.64592
threshold:0.8	0.79677	0.50549	0.61856
threshold:0.9	0.85952	0.41685	0.56142
功能:0.719	0.887	0.794
品質:0.475	0.614	0.535
配件:0.457	0.6	0.519
售後:0.597	0.8	0.684
外觀:0.534	0.484	0.508
價位:0.66	0.565	0.609
音量:0.71	0.595	0.647
無:0.687	0.747	0.716
----avg. macro f1:0.6264922851780835-----------

# Evaluate of Sentiment

## 1. 只適用於剛剛預測出的sentiment

此部分是在第一部分實驗時，sentiment任務中 binary output / multiple output 的實驗比較

由於兩種方式的輸出略有不同，因此要evaluate方式會有些微差異

In [50]:
#Multiple
from sklearn import metrics
from sklearn.metrics import f1_score, accuracy_score, classification_report, precision_recall_fscore_support

# sent_predict = [sentiment[np.argmax(i)] for i in sent_pred]
# sent_ans = [sentiment[np.argmax(i)] for i in test_sentiment]
if '-B' in file_name:
    for i in range(int(len(test_sentiment)/3)):
        sent_predict.append(sentiments[np.argmax([sent_pred[i*3][0], sent_pred[i*3+1][0], sent_pred[i*3+2][0]])])
        sent_ans.append(sentiments[np.argmax([test_sentiment[i*3], test_sentiment[i*3+1], test_sentiment[i*3+2]])])
else:
    sent_predict = sent_pred_total
    sent_ans = [sentiment[np.argmax(i)] for i in test_sentiment]
    
print('sentiment f1 score(macro):', f1_score(sent_ans, sent_predict, average='macro'))
print('sentiment f1 score(micro):', f1_score(sent_ans, sent_predict, average='micro'))
print('sentiment f1 score(weight):', f1_score(sent_ans, sent_predict, average='weighted'))
print(classification_report(sent_ans, sent_predict))
predict, answer = [], []
for i in range(8):
    predict.append([])
    answer.append([])
for d in range(len(sent_predict)):
    flag = True
    for c in range(len(category)):
        if category[c] in BERT_test[d]['target'] and flag:
            answer[c].append(sent_ans[d])
            predict[c].append(sent_predict[d])
            flag=False
    if flag:
        answer[7].append(sent_ans[d])
        predict[7].append(sent_predict[d])            
            
for c in range(len(category)):
    print(category[c], len(answer[c]))
    print('sentiment f1 score(micro):', f1_score(answer[c], predict[c], average='micro'))
    print('sentiment f1 score(macro):', f1_score(answer[c], predict[c], average='macro'))
    print(classification_report(answer[c], predict[c]))


sentiment accuracy: 0.7481012658227848
sentiment f1 score(macro): 0.6937461105584766
sentiment f1 score(micro): 0.7481012658227848
sentiment f1 score(weight): 0.741133191601366
              precision    recall  f1-score   support

          中立       0.78      0.88      0.83      3940
          正向       0.72      0.59      0.65      2205
          負向       0.65      0.57      0.61       965

    accuracy                           0.75      7110
   macro avg       0.72      0.68      0.69      7110
weighted avg       0.74      0.75      0.74      7110



IndexError: list index out of range

In [63]:
#Binary
from sklearn import metrics
from sklearn.metrics import f1_score, accuracy_score, classification_report, precision_recall_fscore_support
sentiments = ['正向', '負向', '中立']
sent_predict = []
sent_ans = []
    
print('sentiment f1 score(macro):', f1_score(sent_ans, sent_predict, average='macro'))
print('sentiment f1 score(micro):', f1_score(sent_ans, sent_predict, average='micro'))
print('sentiment f1 score(weight):', f1_score(sent_ans, sent_predict, average='weighted'))
print(classification_report(sent_ans, sent_predict))

predict, answer = [], []
for i in range(8):
    predict.append([])
    answer.append([])
for c in range(len(category)):
    for d in range(len(sent_predict)):
        if category[c] in BERT_test[d]['target']:
            answer[c].append(sent_ans[d])
            predict[c].append(sent_predict[d])
        flag=True
        for cc in category:
            if cc in  BERT_test[d]['target']:
                flag=False
        if flag:
            answer[7].append(sent_predict[d])
            predict[7].append(sent_ans[d])
            
for c in range(len(category)):
    print(category[c])
    print('sentiment f1 score(micro):', f1_score(answer[c], predict[c], average='micro'))
    print('sentiment f1 score(macro):', f1_score(answer[c], predict[c], average='macro'))
    print(classification_report(answer[c], predict[c]))


sentiment accuracy: 0.7032348804500703
sentiment f1 score(macro): 0.6452078116106083
sentiment f1 score(micro): 0.7032348804500703
sentiment f1 score(weight): 0.6984181962127978
              precision    recall  f1-score   support

          中立       0.77      0.83      0.80       788
          正向       0.63      0.56      0.59       441
          負向       0.57      0.53      0.55       193

    accuracy                           0.70      1422
   macro avg       0.66      0.64      0.65      1422
weighted avg       0.70      0.70      0.70      1422

功能
sentiment f1 score(micro): 0.7045908183632734
sentiment f1 score(macro): 0.6536575347536633
              precision    recall  f1-score   support

          中立       0.83      0.76      0.79       293
          正向       0.57      0.65      0.61       147
          負向       0.54      0.59      0.56        61

    accuracy                           0.70       501
   macro avg       0.65      0.66      0.65       501
weighted avg       0

## 2. 讀取之前訓練好的模型

此部分是屬於第二部分(transfer learning)的evaluate

我有將每個模型跑出的五次實驗數據存下來 放在 predict_result中

所以這部分可以直接跑evaluate 不用再跑模型

In [None]:
#這邊看你要用的預測結果是剛剛跑出來的 還是之前已經存好的

#之前存好的
predicts = np.load('./predict_result/baseline_single.npy')
#剛剛跑出來的
# predicts = pred_total


In [52]:
import numpy as np
from sklearn import metrics
from sklearn.metrics import f1_score, accuracy_score, classification_report, precision_recall_fscore_support
mamicro = []
micros = []
micro_dict = {_:[] for _ in category}
for i in range(5):
    micro = []
    preds, anss = [], []
    for j in category:
        pred, ans = [], []
        for k in range(1422):
            if BERT_test[k]['target'].split('-')[1]==j:
                pred.append(predicts[i*1422+k])
                ans.append(BERT_test[k]['sentiment'])
#         print('sentiment f1 score(macro):', f1_score(ans, pred, average='macro'))
#         print('sentiment f1 score(micro):', f1_score(ans, pred, average='micro'))
        preds+=pred
        anss+=ans
        micro.append(f1_score(ans, pred, average='micro'))
        micro_dict[j].append(f1_score(ans, pred, average='micro'))
    mamicro.append(np.mean(micro))
    micros.append(f1_score(anss, preds, average='micro'))
print('macro')
print(np.mean(mamicro))
print('micro')
print(np.mean(micros))

print('每個aspect category 在每次實驗中的sentiment f1:')
for i in micro_dict.keys():
    print(i)
    for j in micro_dict[i]:
        print(j)
    print()

macro
0.6863360066977348
micro
0.7364275668073136
