In [8]:
import pandas as pd
import numpy as np
import ast
import os
import re
import openpyxl

from mvp.process import *
from mvp.eval_utils import *

# 결과파일 위치지정

In [10]:
method ='mvp'

task_datasets = [['asqp','rest15'],['asqp','rest16'],['acos','rest16'],['acos','laptop16']]

for task_dataset in task_datasets:
    task, dataset = task_dataset[0], task_dataset[1]

    for i in range(0, 11):  # 1부터 10까지의 파일을 생성

        source_file = f'train_zero_{i}'
        output_file = f'result_{task}_{dataset}_{source_file}_path1_beam1'
        save_file = f'{task}_{dataset}_{source_file}'
        
        home = os.path.expanduser('~/')
        data_dir = os.path.join(home, f'ABSA/data/{task}/{dataset}')
        
        if method == 'mvp':
            output_dir = os.path.join(home, f'ABSA/outputs/{method}/{task}/{dataset}/top_5_post_data1.0')
        elif method == 'dlo':
            output_dir = os.path.join(home, f'ABSA/outputs/{method}/{task}/{dataset}/top_5_post_data1.0')
        elif method == 'paraphrase':
            output_dir = os.path.join(home, f'ABSA/outputs/{method}/{task}/{dataset}/post_data1.0')
        
        data_path = os.path.join(data_dir, f'{source_file}.txt')
        output_path = os.path.join(output_dir,f'{output_file}.pickle')
        save_path = os.path.join(output_dir,f'{save_file}.xlsx')
        
        print('data_path: ',data_path)
        print('output_path: ',output_path)
        print('save_path: ',save_path)
        print()
        with open(output_path, 'rb') as f:
            loaded_object = pd.read_pickle(f)
        
        predict = extract_quad(loaded_object[0],'pred')
        predict_df = trans_tuple(predict)
        columns_to_rename = {col: col + '_p' for col in predict_df.columns if col != 'sent_id'}
        predict_df.rename(columns=columns_to_rename, inplace=True)
        
        target = extract_quad(loaded_object[1],'gold')
        target_df = trans_tuple(target)
        columns_to_rename = {col: col + '_t' for col in target_df.columns if col != 'sent_id'}
        target_df.rename(columns=columns_to_rename, inplace=True)
        
        scores = compute_f1_scores(predict, target, verbose=True)
        exp_results = "{} ==>> precision: {:.2f} recall: {:.2f} F1 = {:.2f}".format(
            output_file, scores['precision'], scores['recall'], scores['f1'])
        print(exp_results)
        print()
        
        df = error_type(predict_df,target_df)
        df['match'] = np.where(
            df['quad_ord_t'].notna() & df['quad_ord_p'].notna(), 'match',
            np.where(df['quad_ord_t'].isna() & df['quad_ord_p'].notna(), 'over prediction',
                     np.where(df['quad_ord_t'].notna() & df['quad_ord_p'].isna(), 'missing quad', 'other')
                    )
        )
        df['A_err'] = df['score'].str[0]
        df['C_err'] = df['score'].str[1]
        df['O_err'] = df['score'].str[2]
        df['S_err'] = df['score'].str[3]
        
        inputs, _ = split_read(data_path)
        s_df = pd.DataFrame({'sent_id': [index + 1 for index, value in enumerate(inputs)],'sentence': inputs})
        result = pd.merge(df, s_df, on = 'sent_id', how='left')
        
        result.to_excel(save_path)

data_path:  /home/elicer/ABSA/data/asqp/rest15/train_zero_0.txt
output_path:  /home/elicer/ABSA/outputs/mvp/asqp/rest15/top_5_post_data1.0/result_asqp_rest15_train_zero_0_path1_beam1.pickle
save_path:  /home/elicer/ABSA/outputs/mvp/asqp/rest15/top_5_post_data1.0/asqp_rest15_train_zero_0.xlsx

number of gold spans: 6363, predicted spans: 6198, hit: 887
result_asqp_rest15_train_zero_0_path1_beam1 ==>> precision: 14.31 recall: 13.94 F1 = 14.12

init: target= 6363  predict= 6198
ACOS: 884  + target 5479  =  6363   :: predict= 5319
AOS_gold: 1763  + target 3716  =  5479   :: predict= 3574
COS_gold: 116  + target 3600  =  3716   :: predict= 3465
ACS_gold: 463  + target 3137  =  3600   :: predict= 3015
ACO_gold: 65  + target 3072  =  3137   :: predict= 2950
AO_gold: 80  + target 2992  =  3072   :: predict= 2870
AC_gold: 65  + target 2927  =  2992   :: predict= 2806
AS_gold: 935  + target 1992  =  2927   :: predict= 1927
CO_gold: 2  + target 1990  =  1992   :: predict= 1925
CS_gold: 93  + targ