In [1]:
import pandas as pd
import os
from sklearn.metrics import classification_report
from tqdm import tqdm

import warnings
warnings.filterwarnings("ignore")

In [2]:
import sys
sys.path.append('../src')

from models.classification_methods import get_classification_report

In [3]:
test_results_path = '../reports/test_results/'

list_df_t = os.listdir(test_results_path)
list_df_t.sort()
list_df_t

['DummyClassifier_bo_top_mentioned_timelines_Texts_test_results.csv',
 'DummyClassifier_bo_users_Stance_test_results.csv',
 'DummyClassifier_bo_users_Timeline_test_results.csv',
 'DummyClassifier_cl_top_mentioned_timelines_Texts_test_results.csv',
 'DummyClassifier_cl_users_Stance_test_results.csv',
 'DummyClassifier_cl_users_Timeline_test_results.csv',
 'DummyClassifier_co_top_mentioned_timelines_Texts_test_results.csv',
 'DummyClassifier_co_users_Stance_test_results.csv',
 'DummyClassifier_co_users_Timeline_test_results.csv',
 'DummyClassifier_gl_top_mentioned_timelines_Texts_test_results.csv',
 'DummyClassifier_gl_users_Stance_test_results.csv',
 'DummyClassifier_gl_users_Timeline_test_results.csv',
 'DummyClassifier_ig_top_mentioned_timelines_Texts_test_results.csv',
 'DummyClassifier_ig_users_Stance_test_results.csv',
 'DummyClassifier_ig_users_Timeline_test_results.csv',
 'DummyClassifier_lu_top_mentioned_timelines_Texts_test_results.csv',
 'DummyClassifier_lu_users_Stance_test_r

In [4]:
# Target list
target_list = [
    'ig',
    'bo', 
    'cl', 
    'co', 
    'gl', 
    'lu'
]

dict_cp = {
    'cl':'Hydrox.',
    'lu':'Lula',
    'co':'Sinovac',
    'ig':'Church',
    'gl':'Globo TV',
    'bo':'Bolsonaro',
}

names = list(dict_cp.values())
names

['Hydrox.', 'Lula', 'Sinovac', 'Church', 'Globo TV', 'Bolsonaro']

## Create complete table

In [5]:
# (vectorizer,estimator, path_sring) 
results_tuples_stance = [
    # Stance
    ("Stance", "-" ,"dummy", "DummyClassifier_{target}_users_Stance_test_results.csv"),
    ("Stance", "tf-idf" ,"xgb", "XGBClassifier_TfidfVectorizer_{target}_users_Stance_test_results.csv"),
    ("Stance", "bertabaporu-base" ,"xgb", "bertimbau_xgb_{target}_users_emb_Stance_test_results.csv"),
    ("Stance", "-" ,"bertabaporu-base", "bert_classifier_pablocosta_bertabaporu_base_uncased_{target}_Stance_test_results.csv"),
    ("Stance", "-",  "llama3:7b", "llama3_{target}_Stance_prompt2_Stance_test_results.csv"),
    
    # Texts
    ("Texts", "-" ,"dummy", "DummyClassifier_{target}_top_mentioned_timelines_Texts_test_results.csv"),
    ("Texts", "tf-idf" ,"xgb", "XGBClassifier_TfidfVectorizer_{target}_top_mentioned_timelines_Texts_test_results.csv"),
    ("Texts", "bertabaporu-base" ,"xgb", "bertimbau_xgb_{target}_top_mentioned_timelines_emb_Texts_test_results.csv"),
    
    # Timeline
    ("Timeline", "-" ,"dummy", "DummyClassifier_{target}_users_Timeline_test_results.csv"),
    ("Timeline", "tf-idf" ,"xgb", "XGBClassifier_TfidfVectorizer_{target}_users_Timeline_test_results.csv"),
    ("Timeline", "bertabaporu-base" ,"xgb", "bertimbau_xgb_{target}_users_emb_Timeline_test_results.csv")
    
]

list_results = []
for text_col, vectorizer, estimator, path_results in results_tuples_stance:
    
    list_cr = []
    
    for target in target_list:
        
        
        path = test_results_path + path_results.format(target = target)
        df_results = pd.read_csv(path)
        df_results_or = df_results.copy()
        
        # get classification report df
        df_classification_report = get_classification_report(df_results.test, df_results.pred, cr_args = {})
        
        # create multindex
        column_indexes = [(metric,dict_cp[target]) for metric in df_classification_report.columns]
        multi_index_cols = pd.MultiIndex.from_tuples(column_indexes, names=['metric', 'target'])
        rows_indexes = [(text_col, vectorizer, estimator, cl) for cl in df_classification_report.index]
        multi_index_rows = pd.MultiIndex.from_tuples(rows_indexes, names=['text_col','vectorizer', 'estimator', 'class'])
        df_classification_report.columns = multi_index_cols
        df_classification_report.index = multi_index_rows
        
        # print(text_col, vectorizer, estimator,target)
        # print(path)
        # display(df_classification_report)
        
        list_cr.append(df_classification_report)
        
    df_results = pd.concat(list_cr, axis = 1)
    
    list_results.append(df_results)
    
df_results_final = pd.concat(list_results)

In [6]:
df_results_final

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,metric,precision,recall,f1-score,support,precision,recall,f1-score,support,precision,recall,...,f1-score,support,precision,recall,f1-score,support,precision,recall,f1-score,support
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,target,Church,Church,Church,Church,Bolsonaro,Bolsonaro,Bolsonaro,Bolsonaro,Hydrox.,Hydrox.,...,Sinovac,Sinovac,Globo TV,Globo TV,Globo TV,Globo TV,Lula,Lula,Lula,Lula
text_col,vectorizer,estimator,class,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2
Stance,-,dummy,against,0.565943,1.0,0.722814,339.0,0.861702,1.0,0.925714,162.0,0.503484,1.0,...,0.0,354.0,0.0,0.0,0.0,167.0,0.525735,1.0,0.689157,143.0
Stance,-,dummy,accuracy,0.565943,0.565943,0.565943,0.565943,0.861702,0.861702,0.861702,0.861702,0.503484,0.503484,...,0.542636,0.542636,0.593674,0.593674,0.593674,0.593674,0.525735,0.525735,0.525735,0.525735
Stance,-,dummy,weighted avg,0.320292,0.565943,0.409072,599.0,0.742531,0.861702,0.79769,188.0,0.253496,0.503484,...,0.381754,774.0,0.352449,0.593674,0.44231,411.0,0.276398,0.525735,0.362314,272.0
Stance,-,dummy,macro avg,0.282972,0.5,0.361407,599.0,0.430851,0.5,0.462857,188.0,0.251742,0.5,...,0.351759,774.0,0.296837,0.5,0.372519,411.0,0.262868,0.5,0.344578,272.0
Stance,-,dummy,for,0.0,0.0,0.0,260.0,0.0,0.0,0.0,26.0,0.0,0.0,...,0.703518,420.0,0.593674,1.0,0.745038,244.0,0.0,0.0,0.0,129.0
Stance,tf-idf,xgb,against,0.732394,0.766962,0.74928,339.0,0.881356,0.962963,0.920354,162.0,0.716088,0.785467,...,0.719403,354.0,0.653846,0.508982,0.572391,167.0,0.689922,0.622378,0.654412,143.0
Stance,tf-idf,xgb,accuracy,0.709516,0.709516,0.709516,0.709516,0.856383,0.856383,0.856383,0.856383,0.735192,0.735192,...,0.757106,0.757106,0.690998,0.690998,0.690998,0.690998,0.654412,0.654412,0.654412,0.654412
Stance,tf-idf,xgb,weighted avg,0.708016,0.709516,0.708254,599.0,0.822329,0.856383,0.830449,188.0,0.737273,0.735192,...,0.755474,774.0,0.686106,0.690998,0.682639,411.0,0.657888,0.654412,0.654412,272.0
Stance,tf-idf,xgb,macro avg,0.704312,0.700789,0.702021,599.0,0.667951,0.577635,0.595312,188.0,0.737422,0.734839,...,0.75264,774.0,0.681016,0.662278,0.665243,411.0,0.65615,0.65615,0.654412,272.0
Stance,tf-idf,xgb,for,0.67623,0.634615,0.654762,260.0,0.454545,0.192308,0.27027,26.0,0.758755,0.684211,...,0.785877,420.0,0.708185,0.815574,0.758095,244.0,0.622378,0.689922,0.654412,129.0


In [7]:
df_results_final.to_excel("../reports/table_complete_results.xlsx")

## Create table f1 macro

In [8]:
mask_f1 = [True if  "f1-score" in col else False for col in df_results_final.columns]
mask_macro = [True if  "macro avg" in col else False for col in df_results_final.index]

f1_macro_df = df_results_final.loc[mask_macro,mask_f1]
f1_macro_df[('f1-score','overall')] = f1_macro_df.mean(axis=1)

f1_macro_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,metric,f1-score,f1-score,f1-score,f1-score,f1-score,f1-score,f1-score
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,target,Church,Bolsonaro,Hydrox.,Sinovac,Globo TV,Lula,overall
text_col,vectorizer,estimator,class,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
Stance,-,dummy,macro avg,0.361407,0.462857,0.334878,0.351759,0.372519,0.344578,0.371333
Stance,tf-idf,xgb,macro avg,0.702021,0.595312,0.734366,0.75264,0.665243,0.654412,0.683999
Stance,bertabaporu-base,xgb,macro avg,0.853801,0.625289,0.830948,0.808036,0.781858,0.766993,0.777821
Stance,-,bertabaporu-base,macro avg,0.866621,0.739125,0.844778,0.840499,0.862638,0.796338,0.825
Stance,-,llama3:7b,macro avg,0.729458,0.462857,0.638889,0.578779,0.770678,0.699381,0.646674
Texts,-,dummy,macro avg,0.361407,0.462857,0.334878,0.351759,0.372519,0.344578,0.371333
Texts,tf-idf,xgb,macro avg,0.596115,0.50122,0.603727,0.670805,0.54562,0.579427,0.582819
Texts,bertabaporu-base,xgb,macro avg,0.594754,0.495578,0.609524,0.660542,0.534455,0.569597,0.577408
Timeline,-,dummy,macro avg,0.361407,0.462857,0.334878,0.351759,0.372519,0.344578,0.371333
Timeline,tf-idf,xgb,macro avg,0.699122,0.72822,0.898944,0.863813,0.598358,0.75271,0.756861


### Table for docs

In [9]:
f1_report = f1_macro_df.copy()
f1_report

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,metric,f1-score,f1-score,f1-score,f1-score,f1-score,f1-score,f1-score
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,target,Church,Bolsonaro,Hydrox.,Sinovac,Globo TV,Lula,overall
text_col,vectorizer,estimator,class,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
Stance,-,dummy,macro avg,0.361407,0.462857,0.334878,0.351759,0.372519,0.344578,0.371333
Stance,tf-idf,xgb,macro avg,0.702021,0.595312,0.734366,0.75264,0.665243,0.654412,0.683999
Stance,bertabaporu-base,xgb,macro avg,0.853801,0.625289,0.830948,0.808036,0.781858,0.766993,0.777821
Stance,-,bertabaporu-base,macro avg,0.866621,0.739125,0.844778,0.840499,0.862638,0.796338,0.825
Stance,-,llama3:7b,macro avg,0.729458,0.462857,0.638889,0.578779,0.770678,0.699381,0.646674
Texts,-,dummy,macro avg,0.361407,0.462857,0.334878,0.351759,0.372519,0.344578,0.371333
Texts,tf-idf,xgb,macro avg,0.596115,0.50122,0.603727,0.670805,0.54562,0.579427,0.582819
Texts,bertabaporu-base,xgb,macro avg,0.594754,0.495578,0.609524,0.660542,0.534455,0.569597,0.577408
Timeline,-,dummy,macro avg,0.361407,0.462857,0.334878,0.351759,0.372519,0.344578,0.371333
Timeline,tf-idf,xgb,macro avg,0.699122,0.72822,0.898944,0.863813,0.598358,0.75271,0.756861


In [10]:
f1_report.reset_index(drop=False, inplace=True)
f1_report

metric,text_col,vectorizer,estimator,class,f1-score,f1-score,f1-score,f1-score,f1-score,f1-score,f1-score
target,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Church,Bolsonaro,Hydrox.,Sinovac,Globo TV,Lula,overall
0,Stance,-,dummy,macro avg,0.361407,0.462857,0.334878,0.351759,0.372519,0.344578,0.371333
1,Stance,tf-idf,xgb,macro avg,0.702021,0.595312,0.734366,0.75264,0.665243,0.654412,0.683999
2,Stance,bertabaporu-base,xgb,macro avg,0.853801,0.625289,0.830948,0.808036,0.781858,0.766993,0.777821
3,Stance,-,bertabaporu-base,macro avg,0.866621,0.739125,0.844778,0.840499,0.862638,0.796338,0.825
4,Stance,-,llama3:7b,macro avg,0.729458,0.462857,0.638889,0.578779,0.770678,0.699381,0.646674
5,Texts,-,dummy,macro avg,0.361407,0.462857,0.334878,0.351759,0.372519,0.344578,0.371333
6,Texts,tf-idf,xgb,macro avg,0.596115,0.50122,0.603727,0.670805,0.54562,0.579427,0.582819
7,Texts,bertabaporu-base,xgb,macro avg,0.594754,0.495578,0.609524,0.660542,0.534455,0.569597,0.577408
8,Timeline,-,dummy,macro avg,0.361407,0.462857,0.334878,0.351759,0.372519,0.344578,0.371333
9,Timeline,tf-idf,xgb,macro avg,0.699122,0.72822,0.898944,0.863813,0.598358,0.75271,0.756861


In [11]:
f1_report.columns

MultiIndex([(  'text_col',          ''),
            ('vectorizer',          ''),
            ( 'estimator',          ''),
            (     'class',          ''),
            (  'f1-score',    'Church'),
            (  'f1-score', 'Bolsonaro'),
            (  'f1-score',   'Hydrox.'),
            (  'f1-score',   'Sinovac'),
            (  'f1-score',  'Globo TV'),
            (  'f1-score',      'Lula'),
            (  'f1-score',   'overall')],
           names=['metric', 'target'])

In [12]:
new_columns = [col[0] if col[1] == '' else col[1] for col in f1_report.columns]
new_columns

['text_col',
 'vectorizer',
 'estimator',
 'class',
 'Church',
 'Bolsonaro',
 'Hydrox.',
 'Sinovac',
 'Globo TV',
 'Lula',
 'overall']

In [13]:
f1_report.columns = new_columns
f1_report

Unnamed: 0,text_col,vectorizer,estimator,class,Church,Bolsonaro,Hydrox.,Sinovac,Globo TV,Lula,overall
0,Stance,-,dummy,macro avg,0.361407,0.462857,0.334878,0.351759,0.372519,0.344578,0.371333
1,Stance,tf-idf,xgb,macro avg,0.702021,0.595312,0.734366,0.75264,0.665243,0.654412,0.683999
2,Stance,bertabaporu-base,xgb,macro avg,0.853801,0.625289,0.830948,0.808036,0.781858,0.766993,0.777821
3,Stance,-,bertabaporu-base,macro avg,0.866621,0.739125,0.844778,0.840499,0.862638,0.796338,0.825
4,Stance,-,llama3:7b,macro avg,0.729458,0.462857,0.638889,0.578779,0.770678,0.699381,0.646674
5,Texts,-,dummy,macro avg,0.361407,0.462857,0.334878,0.351759,0.372519,0.344578,0.371333
6,Texts,tf-idf,xgb,macro avg,0.596115,0.50122,0.603727,0.670805,0.54562,0.579427,0.582819
7,Texts,bertabaporu-base,xgb,macro avg,0.594754,0.495578,0.609524,0.660542,0.534455,0.569597,0.577408
8,Timeline,-,dummy,macro avg,0.361407,0.462857,0.334878,0.351759,0.372519,0.344578,0.371333
9,Timeline,tf-idf,xgb,macro avg,0.699122,0.72822,0.898944,0.863813,0.598358,0.75271,0.756861


In [14]:
f1_report.drop(['class'],axis = 1, inplace=True)
f1_report

Unnamed: 0,text_col,vectorizer,estimator,Church,Bolsonaro,Hydrox.,Sinovac,Globo TV,Lula,overall
0,Stance,-,dummy,0.361407,0.462857,0.334878,0.351759,0.372519,0.344578,0.371333
1,Stance,tf-idf,xgb,0.702021,0.595312,0.734366,0.75264,0.665243,0.654412,0.683999
2,Stance,bertabaporu-base,xgb,0.853801,0.625289,0.830948,0.808036,0.781858,0.766993,0.777821
3,Stance,-,bertabaporu-base,0.866621,0.739125,0.844778,0.840499,0.862638,0.796338,0.825
4,Stance,-,llama3:7b,0.729458,0.462857,0.638889,0.578779,0.770678,0.699381,0.646674
5,Texts,-,dummy,0.361407,0.462857,0.334878,0.351759,0.372519,0.344578,0.371333
6,Texts,tf-idf,xgb,0.596115,0.50122,0.603727,0.670805,0.54562,0.579427,0.582819
7,Texts,bertabaporu-base,xgb,0.594754,0.495578,0.609524,0.660542,0.534455,0.569597,0.577408
8,Timeline,-,dummy,0.361407,0.462857,0.334878,0.351759,0.372519,0.344578,0.371333
9,Timeline,tf-idf,xgb,0.699122,0.72822,0.898944,0.863813,0.598358,0.75271,0.756861


In [15]:
f1_report.insert(
    1, 
    "classifier", 
    f1_report.apply(
        lambda x: f"{x['vectorizer']} + {x['estimator']}" if x['vectorizer'] != '-' else x['estimator'],
        axis = 1
        ).to_list()

)
f1_report.drop(['estimator', 'vectorizer'],axis =1, inplace = True)
f1_report

Unnamed: 0,text_col,classifier,Church,Bolsonaro,Hydrox.,Sinovac,Globo TV,Lula,overall
0,Stance,dummy,0.361407,0.462857,0.334878,0.351759,0.372519,0.344578,0.371333
1,Stance,tf-idf + xgb,0.702021,0.595312,0.734366,0.75264,0.665243,0.654412,0.683999
2,Stance,bertabaporu-base + xgb,0.853801,0.625289,0.830948,0.808036,0.781858,0.766993,0.777821
3,Stance,bertabaporu-base,0.866621,0.739125,0.844778,0.840499,0.862638,0.796338,0.825
4,Stance,llama3:7b,0.729458,0.462857,0.638889,0.578779,0.770678,0.699381,0.646674
5,Texts,dummy,0.361407,0.462857,0.334878,0.351759,0.372519,0.344578,0.371333
6,Texts,tf-idf + xgb,0.596115,0.50122,0.603727,0.670805,0.54562,0.579427,0.582819
7,Texts,bertabaporu-base + xgb,0.594754,0.495578,0.609524,0.660542,0.534455,0.569597,0.577408
8,Timeline,dummy,0.361407,0.462857,0.334878,0.351759,0.372519,0.344578,0.371333
9,Timeline,tf-idf + xgb,0.699122,0.72822,0.898944,0.863813,0.598358,0.75271,0.756861


In [16]:
f1_report.rename({"text_col":"input"}, axis = 1, inplace=True)
f1_report

Unnamed: 0,input,classifier,Church,Bolsonaro,Hydrox.,Sinovac,Globo TV,Lula,overall
0,Stance,dummy,0.361407,0.462857,0.334878,0.351759,0.372519,0.344578,0.371333
1,Stance,tf-idf + xgb,0.702021,0.595312,0.734366,0.75264,0.665243,0.654412,0.683999
2,Stance,bertabaporu-base + xgb,0.853801,0.625289,0.830948,0.808036,0.781858,0.766993,0.777821
3,Stance,bertabaporu-base,0.866621,0.739125,0.844778,0.840499,0.862638,0.796338,0.825
4,Stance,llama3:7b,0.729458,0.462857,0.638889,0.578779,0.770678,0.699381,0.646674
5,Texts,dummy,0.361407,0.462857,0.334878,0.351759,0.372519,0.344578,0.371333
6,Texts,tf-idf + xgb,0.596115,0.50122,0.603727,0.670805,0.54562,0.579427,0.582819
7,Texts,bertabaporu-base + xgb,0.594754,0.495578,0.609524,0.660542,0.534455,0.569597,0.577408
8,Timeline,dummy,0.361407,0.462857,0.334878,0.351759,0.372519,0.344578,0.371333
9,Timeline,tf-idf + xgb,0.699122,0.72822,0.898944,0.863813,0.598358,0.75271,0.756861


In [17]:
#f1_report.set_index(['input'],inplace=True)
#f1_report.drop('input', axis = 1,inplace=True)

In [18]:
def generate_latex_with_multirow_and_bold(df):
    latex_code = ''
    latex_code += '\\begin{table}[H]'
    latex_code += "\\begin{tabular}{ll|rrrrrrr}\n\\toprule\n"
    latex_code += "input & classifier & Church & Bolsonaro & Hydrox. & Sinovac & Globo TV & Lula & overall \\\\ \n\\midrule\n"

    last_input = None
    multirow_count = 0

    for input_value in df['input'].unique():
        subset = df[df['input'] == input_value]
        max_overall_idx = subset['overall'].idxmax()

        for i, row in subset.iterrows():
            if row['input'] == last_input:
                latex_code += "& "
                multirow_count += 1
            else:
                if multirow_count > 0:
                    latex_code = latex_code.replace(f"multirow{{{multirow_count}}}", f"multirow{{{multirow_count + 1}}}", 1)
                if last_input is not None:
                    latex_code += "\\cmidrule(lr){1-9}\n"
                latex_code += f"\\multirow{{{1}}}{{*}}{{{row['input']}}} & "
                multirow_count = 1

            if i == max_overall_idx:
                row_data = [f"\\textbf{{{row[col]:.2f}}}" if col not in ['input', 'classifier'] else f"\\textbf{{{row[col]}}}" for col in df.columns[1:]]
                latex_code += " & ".join(row_data) + " \\\\ \n"
            else:
                latex_code += " & ".join([f"{row[col]:.2f}" if isinstance(row[col], float) else str(row[col]) for col in df.columns[1:]]) + " \\\\ \n"
            
            last_input = row['input']

    if multirow_count > 0:
        latex_code = latex_code.replace(f"multirow{{{multirow_count}}}", f"multirow{{{multirow_count + 1}}}", 1)

    latex_code += "\\cmidrule(lr){1-9}\n"
    latex_code += "\\bottomrule\n\\end{tabular}"
    latex_code += "\caption{F1 macro results}"
    latex_code += '\label{table:results_f1_macro}\n'
    latex_code += '\end{table}'

    return latex_code

In [19]:
f1_report.input = f1_report.input.map({
    "Stance": "S",
    "Timeline": 'UT',
    "Texts": 'UFT'
})

In [20]:
str_latex = generate_latex_with_multirow_and_bold(f1_report)

In [21]:
print(str_latex)

egin{table}[H]\begin{tabular}{ll|rrrrrrr}
\toprule
input & classifier & Church & Bolsonaro & Hydrox. & Sinovac & Globo TV & Lula & overall \\ 
\midrule
\multirow{1}{*}{S} & dummy & 0.36 & 0.46 & 0.33 & 0.35 & 0.37 & 0.34 & 0.37 \\ 
& tf-idf + xgb & 0.70 & 0.60 & 0.73 & 0.75 & 0.67 & 0.65 & 0.68 \\ 
& bertabaporu-base + xgb & 0.85 & 0.63 & 0.83 & 0.81 & 0.78 & 0.77 & 0.78 \\ 
& \textbf{bertabaporu-base} & \textbf{0.87} & \textbf{0.74} & \textbf{0.84} & \textbf{0.84} & \textbf{0.86} & \textbf{0.80} & \textbf{0.82} \\ 
& llama3:7b & 0.73 & 0.46 & 0.64 & 0.58 & 0.77 & 0.70 & 0.65 \\ 
\cmidrule(lr){1-9}
\multirow{1}{*}{UFT} & dummy & 0.36 & 0.46 & 0.33 & 0.35 & 0.37 & 0.34 & 0.37 \\ 
& \textbf{tf-idf + xgb} & \textbf{0.60} & \textbf{0.50} & \textbf{0.60} & \textbf{0.67} & \textbf{0.55} & \textbf{0.58} & \textbf{0.58} \\ 
& bertabaporu-base + xgb & 0.59 & 0.50 & 0.61 & 0.66 & 0.53 & 0.57 & 0.58 \\ 
\cmidrule(lr){1-9}
\multirow{1}{*}{UT} & dummy & 0.36 & 0.46 & 0.33 & 0.35 & 0.37 & 0.34 & 0.