In [1]:
wd = '/mnt/openfact/users/msawinski/factue-task2'
import sys, os
os.chdir(wd)

In [2]:
import pandas as pd
from pathlib import Path
import os
root = Path("data/llm_output/persuasion")
# Example path template
files = root.rglob("*/*.parquet")

# Read all files and add file path as a column
df_list = []
for f in files:
    df_part = pd.read_parquet(f)
    df_part['source_file'] = f  # add the file path
    df_list.append(df_part)

# Combine into one DataFrame
df = pd.concat(df_list, ignore_index=True)

In [3]:
def normalize_binary(x):
    return 1 if str(x).strip().lower() in {'1', 'true'} else 0

In [4]:
df['source_file'].value_counts().sort_index()
df['gold'] = df['gold'].apply(normalize_binary)
df['pred'] = df['pred'].apply(normalize_binary)
df['split'] = df.source_file.astype(str).str.split('/',expand=True)[7]

In [8]:
df.technique_id.value_counts()

technique_id
Appeal_to_Values              781
Questioning_the_Reputation    566
Loaded_Language               545
Name: count, dtype: int64

In [5]:
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score
# Define a function to compute metrics for a group
# Assume df has columns: technique_id, text_lang, gold, pred
rows = []

def is_valid_label(x):
    return x in (0, 1)

rows = []

for technique_id in df['technique_id'].unique():
    for text_lang in df.loc[df['technique_id'] == technique_id, 'text_lang'].unique():
        for split in df.loc[(df['technique_id'] == technique_id) & (df['text_lang'] == text_lang), 'split'].unique():
            group = df[
                (df['technique_id'] == technique_id) &
                (df['text_lang'] == text_lang) &
                (df['split'] == split)
            ]
            if len(group) > 0:
                row = {
                    'technique_id': technique_id,
                    'text_lang': text_lang,
                    'split': split,
                    'accuracy': accuracy_score(group['gold'], group['pred']),
                    'precision': precision_score(group['gold'], group['pred'], zero_division=0),
                    'recall': recall_score(group['gold'], group['pred'], zero_division=0),
                    'f1': f1_score(group['gold'], group['pred'], zero_division=0),
                    'support': len(group)
                }
                rows.append(row)

results = pd.DataFrame(rows)
results[['accuracy', 'precision','recall', 'f1']] = results[['accuracy', 'precision','recall', 'f1']].round(2)
results

Unnamed: 0,technique_id,text_lang,split,accuracy,precision,recall,f1,support
0,Questioning_the_Reputation,RU,train,0.57,0.04,1.0,0.08,160
1,Questioning_the_Reputation,PL,train,0.82,0.53,0.74,0.62,140
2,Questioning_the_Reputation,PL,trail,0.8,0.0,0.0,0.0,20
3,Questioning_the_Reputation,SI,train,0.79,0.33,0.67,0.44,48
4,Questioning_the_Reputation,BG,train,0.69,0.36,0.7,0.47,163
5,Questioning_the_Reputation,BG,trail,0.8,0.12,1.0,0.22,35
6,Loaded_Language,BG,train,0.37,0.17,0.83,0.28,163
7,Loaded_Language,BG,trail,0.5,0.17,1.0,0.29,20
8,Loaded_Language,PL,train,0.52,0.34,0.96,0.51,189
9,Loaded_Language,PL,trail,0.43,0.43,1.0,0.6,7


In [6]:
import seaborn as sns
import matplotlib.pyplot as plt

In [7]:
fig, axes = plt.subplots(len(metrics), 1, figsize=(10, 18), sharex=True)

for i, metric in enumerate(metrics):
    ax = axes[i]
    sns.barplot(
        data=results,
        y='technique_id',
        x=metric,
        hue='text_lang',
        ax=ax,
        palette='muted',
        errorbar=None  # fallback for compatibility
    )
    ax.set_title(metric.capitalize())
    ax.set_xlim(0, 1.05)
    ax.set_ylabel('Technique ID')
    ax.set_xlabel('Score')
    ax.legend(title='Text Language', loc='lower right')

plt.tight_layout()
plt.show()

NameError: name 'metrics' is not defined

In [None]:
# # Create a melt of metrics for faceted visualization
# metrics_df = results.melt(
#     id_vars=['technique_id', 'text_lang'],
#     value_vars=['accuracy', 'precision', 'recall', 'f1'],
#     var_name='metric',
#     value_name='score'
# )

# # Plot
# plt.figure(figsize=(12, 6))
# sns.barplot(data=metrics_df, x='technique_id', y='score', hue='text_lang', palette='muted', errorbar=None)
# plt.title('Metrics by Technique and Language')
# plt.ylim(0, 1.05)
# plt.ylabel('Score')
# plt.xlabel('Technique ID')
# plt.legend(title='Text Language')
# plt.grid(axis='y', linestyle='--', alpha=0.7)
# plt.tight_layout()
# plt.show()

# total

In [None]:
# Metrics
acc = accuracy_score(df['gold'], df['pred'])
precision = precision_score(df['gold'], df['pred'], zero_division=0)
recall = recall_score(df['gold'], df['pred'], average='binary')
f1 = f1_score(df['gold'], df['pred'], average='binary')

print(f"Accuracy: {acc:.3f}")
print(f"Precision: {precision:.3f}")
print(f"Recall: {recall:.3f}")
print(f"F1-score: {f1:.3f}")