# Analyse MagicBrush Ratings from VieScore 

In [60]:
import pandas as pd
import re
from ast import literal_eval
from scipy.stats import kendalltau
import numpy as np
import pingouin as pg

Convert .tsv files in .csv. Filter for only MagicBrush ratings, ignore other datasets.

In [61]:
def extract_numbers(uid):
    match = re.search(r'_(\d+)_(\d+)', uid)
    if match:
        return int(match.group(1)), int(match.group(2))  # Erste und zweite Zahl zurückgeben
    return None, None

In [62]:
def get_rater_df(rater: int):
    rater = pd.read_csv(f"Text-Guided_IE_rater{rater}.tsv", sep="\t")
    rater = rater[[ "uid", "MagicBrush"]]
    rater[['id', 'turn']] = rater['uid'].apply(lambda x: pd.Series(extract_numbers(x)))
    rater["MagicBrush"] = rater["MagicBrush"].apply(literal_eval)
    rater["SC"] = rater["MagicBrush"].apply(lambda x: x[0])
    rater["PQ"] = rater["MagicBrush"].apply(lambda x: x[1])
    rater = rater.drop(["MagicBrush"], axis=1)
    rater = rater.drop(["uid"], axis=1)
    return rater

In [63]:
r1 = get_rater_df(1)
r2 = get_rater_df(2)
r3 = get_rater_df(3)

In [64]:
r1.to_csv("rater1.csv", index=False)
r2.to_csv("rater2.csv", index=False)
r3.to_csv("rater3.csv", index=False)

Compute kendall's tau for interrater agreement.

In [66]:
merged_df = r1.merge(r2, on=['turn', 'id'], suffixes=('_r1', '_r2'))
r3 = r3.rename(columns={'SC': 'SC_r3', 'PQ': 'PQ_r3'})
merged_df = merged_df.merge(r3, on=['turn', 'id'])

In [67]:
sc_ratings = merged_df[['SC_r1', 'SC_r2', 'SC_r3']]
kendall_w_sc = pg.friedman(data=sc_ratings.T).round(3)

print(f"Kendall's W for SC: {kendall_w_sc['W'].values[0]}")

Kendall's W for SC: 0.774


In [68]:
sc_ratings = merged_df[['PQ_r1', 'PQ_r2', 'PQ_r3']]
kendall_w_sc = pg.friedman(data=sc_ratings.T).round(3)

print(f"Kendall's W for SC: {kendall_w_sc['W'].values[0]}")

Kendall's W for SC: 0.66


Compute Mean and Standard Deviation for both *Semantic Consistency (SC)* and *Perceptual Quality (PQ)*.

In [72]:
annotator_files = ['rater1.csv', 'rater2.csv', 'rater3.csv']

all_sc_values = []
all_pq_values = []

for i, file in enumerate(annotator_files, start=1):
    df = pd.read_csv(file)
    all_sc_values.extend(df['SC'].tolist())
    all_pq_values.extend(df['PQ'].tolist())
    
    for col in ['SC', 'PQ']:
        std_value = df[col].std()
        mean_value = df[col].mean()
        
        print(f"annotator {i} - {col}:")
        print(f"  std dev: {round(std_value, 3)}")
        print(f"  mean: {round(mean_value, 3)}")

annotator 1 - SC:
  std dev: 0.404
  mean: 0.506
annotator 1 - PQ:
  std dev: 0.347
  mean: 0.712
annotator 2 - SC:
  std dev: 0.409
  mean: 0.508
annotator 2 - PQ:
  std dev: 0.391
  mean: 0.575
annotator 3 - SC:
  std dev: 0.401
  mean: 0.52
annotator 3 - PQ:
  std dev: 0.353
  mean: 0.656


In [73]:
mean_sc_all = pd.Series(all_sc_values).mean()
std_sc_all = pd.Series(all_sc_values).std()

mean_pq_all = pd.Series(all_pq_values).mean()
std_pq_all = pd.Series(all_pq_values).std()

print(f"Global mean for SC: {round(mean_sc_all, 3)}")
print(f"Global std dev for SC: {round(std_sc_all, 3)}")
print(f"Global mean for PQ: {round(mean_pq_all, 3)}")
print(f"Global std dev for PQ: {round(std_pq_all, 3)}")

Global mean for SC: 0.511
Global std dev for SC: 0.404
Global mean for PQ: 0.648
Global std dev for PQ: 0.368
