### Low-resource setting: Fine-tuning data sizes are [50, 100, 200, 300, 400]
* For test set containing company names

In [1]:
import pandas as pd
import os, re, copy
from glob import glob
from reports import get_average_classification_report
pd.options.display.float_format = '{:.2f}'.format
from functools import reduce
pd.options.display.float_format = '{:.2f}'.format
import seaborn as sns
from tqdm import tqdm

In [2]:
target_trained_with_nums = [50, 100, 200, 300, 400]
dirnames = sorted(glob(os.path.join('C:\DATA\ComBERT', 'results_spamFiltering', 'testCompanyName=y_*_seed4')))
print(len(dirnames))
dirnames

16


['C:\\DATA\\ComBERT\\results_spamFiltering\\testCompanyName=y_Araci_CM_epoch7_seed4',
 'C:\\DATA\\ComBERT\\results_spamFiltering\\testCompanyName=y_Araci_NoPT_epoch7_seed4',
 'C:\\DATA\\ComBERT\\results_spamFiltering\\testCompanyName=y_Araci_SM_epoch7_seed4',
 'C:\\DATA\\ComBERT\\results_spamFiltering\\testCompanyName=y_Araci_WWM_epoch7_seed4',
 'C:\\DATA\\ComBERT\\results_spamFiltering\\testCompanyName=y_BERT_CM_epoch7_seed4',
 'C:\\DATA\\ComBERT\\results_spamFiltering\\testCompanyName=y_BERT_NoPT_epoch7_seed4',
 'C:\\DATA\\ComBERT\\results_spamFiltering\\testCompanyName=y_BERT_SM_epoch7_seed4',
 'C:\\DATA\\ComBERT\\results_spamFiltering\\testCompanyName=y_BERT_WWM_epoch7_seed4',
 'C:\\DATA\\ComBERT\\results_spamFiltering\\testCompanyName=y_SECBERT_CM_epoch7_seed4',
 'C:\\DATA\\ComBERT\\results_spamFiltering\\testCompanyName=y_SECBERT_NoPT_epoch7_seed4',
 'C:\\DATA\\ComBERT\\results_spamFiltering\\testCompanyName=y_SECBERT_SM_epoch7_seed4',
 'C:\\DATA\\ComBERT\\results_spamFiltering\\

### Precision, Recall, F1-score
* All values are macro average values.

* Download the table below in an Excel format, then add a "range" row that indicates the maximum -- maximum value.
    * The term “range” in the table indicates the minimum and maximum values of the average scores obtained through a model.
    * Based on the minimum values indicated in the “range” row, SEC-BERT shows robust performance regardless of the post-training methods, recording the highest value among the minimum performance scores of each model.

In [18]:
dfs1, dfs2, dfs3 = [], [], []
for dirname in tqdm(dirnames):
    test_company, model_name, method_name, epoch_num, seed_num = os.path.basename(dirname).split('_')
    
    records1, records2, records3 = [], [], []
    for target_trained_with_num in target_trained_with_nums:
        filepaths = glob(os.path.join(os.path.dirname(dirname), '{}_*'.format('_'.join([test_company, model_name, method_name, epoch_num])), \
                  'classification_report_train_{}.csv'.format(target_trained_with_num)))
        
        averaged_classification_report_df = get_average_classification_report([pd.read_csv(filepath) for filepath \
                                                                               in filepaths])
        item = averaged_classification_report_df.loc['macro avg']
        records1.append(('precision_{:03d}'.format(target_trained_with_num), 100*item['precision']))
        records2.append(('recall_{:03d}'.format(target_trained_with_num), 100*item['recall']))
        records3.append(('f1-score_{:03d}'.format(target_trained_with_num), 100*item['f1-score']))
    dfs1.append(pd.DataFrame(records1, columns=['FinetuningSize_Measure', '{}_{}'.format(model_name, method_name)]))
    dfs2.append(pd.DataFrame(records2, columns=['FinetuningSize_Measure', '{}_{}'.format(model_name, method_name)]))
    dfs3.append(pd.DataFrame(records3, columns=['FinetuningSize_Measure', '{}_{}'.format(model_name, method_name)]))

result1 = reduce(lambda df1,df2: pd.merge(df1,df2,on='FinetuningSize_Measure'), dfs1)
result1.set_index('FinetuningSize_Measure', inplace=True)
result1.loc['precision_Average'] = result1.mean()
result2 = reduce(lambda df1,df2: pd.merge(df1,df2,on='FinetuningSize_Measure'), dfs2)
result2.set_index('FinetuningSize_Measure', inplace=True)
result2.loc['recall_Average'] = result2.mean()
result3 = reduce(lambda df1,df2: pd.merge(df1,df2,on='FinetuningSize_Measure'), dfs3)
result3.set_index('FinetuningSize_Measure', inplace=True)
result3.loc['f1-score_Average'] = result3.mean()

result = pd.concat([result1, result2, result3])
result.sort_values(by=['FinetuningSize_Measure'], inplace=True)

filepath = os.path.join('results', 'LowResource_TestSetContainingCompanyNames_PrecisionRecallF1score.csv')
if not os.path.exists(os.path.dirname(filepath)): os.makedirs(os.path.dirname(filepath))
result.to_csv(filepath, index=False)
print('Created {}'.format(filepath))

result

100%|██████████████████████████████████████████████████████████████████████████████████| 16/16 [00:02<00:00,  6.76it/s]


Created results\LowResource_TestSetContainingCompanyNames_PrecisionRecallF1score.csv


Unnamed: 0_level_0,Araci_CM,Araci_NoPT,Araci_SM,Araci_WWM,BERT_CM,BERT_NoPT,BERT_SM,BERT_WWM,SECBERT_CM,SECBERT_NoPT,SECBERT_SM,SECBERT_WWM,Yang_CM,Yang_NoPT,Yang_SM,Yang_WWM
FinetuningSize_Measure,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
f1-score_050,49.48,28.84,52.55,35.48,47.65,49.15,42.11,49.86,53.65,50.84,53.45,45.57,55.05,42.59,51.43,43.65
f1-score_100,59.77,44.39,65.34,40.29,63.09,62.54,56.34,51.5,62.35,64.93,68.54,60.38,67.57,62.33,66.55,59.98
f1-score_200,76.83,50.56,79.38,59.83,73.02,75.58,76.98,58.94,78.94,76.07,75.23,68.33,77.28,74.16,79.03,68.04
f1-score_300,79.25,61.43,79.32,67.5,79.12,78.51,78.37,65.91,80.52,77.19,76.84,75.89,80.12,79.07,80.81,74.43
f1-score_400,76.7,64.67,77.93,70.64,78.5,76.6,79.26,68.6,80.34,77.43,78.82,76.87,79.69,79.43,79.02,75.22
f1-score_Average,68.4,49.98,70.91,54.75,68.28,68.48,66.61,58.96,71.16,69.29,70.57,65.41,71.94,67.52,71.37,64.26
precision_050,58.22,30.29,69.26,43.14,54.03,57.79,46.99,54.18,62.53,60.62,65.69,51.65,63.57,47.05,60.94,55.96
precision_100,66.19,45.02,75.53,49.42,69.05,65.59,67.76,54.61,74.1,68.58,73.75,62.46,70.67,63.69,70.72,65.04
precision_200,79.22,51.53,81.59,61.54,75.13,77.2,78.62,61.46,80.76,77.76,77.07,73.37,79.25,75.16,81.36,69.94
precision_300,82.52,64.32,81.72,75.0,82.45,80.94,81.43,73.01,82.62,78.58,78.68,77.69,82.61,81.85,83.23,75.74
