In [1]:
!pip install scikit-learn



In [2]:
import pandas as pd
from sklearn.metrics import cohen_kappa_score

In [3]:
#from google.colab import drive
#drive.mount('/content/drive')

In [11]:
#folder_path = "/content/drive/MyDrive/Progetti Mike-Pier/Talkingraph/evaluation"
folder_path = "."
file_name = "talkingraph_eval_mike_pier.xlsx"

In [23]:
def safe_cohen_kappa(ann1, ann2, label):
    try:
        # Riempie i NaN con 0 e converte in int
        ann1_filled = pd.Series(ann1).fillna(0).astype(int)
        ann2_filled = pd.Series(ann2).fillna(0).astype(int)

        # Controlla che abbiano la stessa lunghezza
        if len(ann1_filled) != len(ann2_filled):
            print(f"[{label}] Lunghezze diverse: {len(ann1_filled)} vs {len(ann2_filled)}")
            return None

        # Controlla variazione
        if ann1_filled.nunique() <= 1 and ann2_filled.nunique() <= 1:
            print(f"[{label}] Variazione insufficiente: annotazioni troppo uniformi")
            return 1.0  # Massimo accordo

        return cohen_kappa_score(ann1_filled, ann2_filled)

    except Exception as e:
        print(f"[{label}] Errore nel calcolo di Cohen's Kappa: {e}")
        return None

## K di Cohen LLaMA

In [24]:
# read pier annotations
df_pier = pd.read_excel(f'{folder_path}/{file_name}', sheet_name='llama-pier')

# mantieni solamente la colonna 2 e 3
df_pier = df_pier[['#INCORRECT', '#NOT_CHECKABLE', '#MISLEADING', '#OTHER']]

# ripristina l'indice
df_pier = df_pier.reset_index(drop=True)

# ottieni solo le prime 50 righe
df_pier = df_pier.iloc[:50]

# read pier annotations
df_michael = pd.read_excel(f'{folder_path}/{file_name}', sheet_name='llama-mike')

# mantieni solamente la colonna 2 e 3
df_michael = df_michael[['#INCORRECT', '#NOT_CHECKABLE', '#MISLEADING', '#OTHER']]

# ripristina l'indice
df_michael = df_michael.reset_index(drop=True)

# ottieni solo le prime 50 righe
df_michael = df_michael.iloc[:50]

# Estrazione delle annotazioni
incorrect_annotations_1 = df_pier['#INCORRECT'].astype(int)
incorrect_annotations_2 = df_michael['#INCORRECT'].astype(int)

not_checkable_annotations_1 = df_pier['#NOT_CHECKABLE'].astype(int)
not_checkable_annotations_2 = df_michael['#NOT_CHECKABLE'].astype(int)

misleading_annotations_1 = df_pier['#MISLEADING'].astype(int)
misleading_annotations_2 = df_michael['#MISLEADING'].astype(int)

other_annotations_1 = df_pier['#OTHER'].astype(int)
other_annotations_2 = df_michael['#OTHER'].astype(int)

# Calcolo del kappa di Cohen per incorrect
kappa_incorrect = safe_cohen_kappa(incorrect_annotations_1, incorrect_annotations_2, '#INCORRECT')

# Calcolo del kappa di Cohen per not_checkable
kappa_not_checkable = safe_cohen_kappa(not_checkable_annotations_1, not_checkable_annotations_2, '#NOT_CHECKABLE')

# Calcolo del kappa di Cohen per misleading
kappa_misleading = safe_cohen_kappa(misleading_annotations_1, misleading_annotations_2, '#MISLEADING')

# Calcolo del kappa di Cohen per not_other
kappa_other = safe_cohen_kappa(other_annotations_1, other_annotations_2, '#OTHER')

print(f"Cohen's kappa per LLaMA incorrect: {kappa_incorrect}")
print(f"Cohen's kappa per LLaMA not_checkable: {kappa_not_checkable}")
print(f"Cohen's kappa per LLaMA misleading: {kappa_misleading}")
print(f"Cohen's kappa per LLaMA other: {kappa_other}")

total = kappa_incorrect + kappa_not_checkable + kappa_misleading + kappa_other

[#NOT_CHECKABLE] Variazione insufficiente: annotazioni troppo uniformi
Cohen's kappa per LLaMA incorrect: 0.8407643312101911
Cohen's kappa per LLaMA not_checkable: 1.0
Cohen's kappa per LLaMA misleading: 1.0
Cohen's kappa per LLaMA other: 1.0


## K di Cohen mistral

In [25]:
# read pier annotations
df_pier = pd.read_excel(f'{folder_path}/{file_name}', sheet_name='mistral-pier')

# mantieni solamente la colonna 2 e 3
df_pier = df_pier[['#INCORRECT', '#NOT_CHECKABLE', '#MISLEADING', '#OTHER']]

# ripristina l'indice
df_pier = df_pier.reset_index(drop=True)

# ottieni solo le prime 50 righe
df_pier = df_pier.iloc[:50]

# read pier annotations
df_michael = pd.read_excel(f'{folder_path}/{file_name}', sheet_name='mistral-mike')

# mantieni solamente la colonna 2 e 3
df_michael = df_michael[['#INCORRECT', '#NOT_CHECKABLE', '#MISLEADING', '#OTHER']]

# ripristina l'indice
df_michael = df_michael.reset_index(drop=True)

# ottieni solo le prime 50 righe
df_michael = df_michael.iloc[:50]

# Estrazione delle annotazioni
incorrect_annotations_1 = df_pier['#INCORRECT'].astype(int)
incorrect_annotations_2 = df_michael['#INCORRECT'].astype(int)

not_checkable_annotations_1 = df_pier['#NOT_CHECKABLE'].astype(int)
not_checkable_annotations_2 = df_michael['#NOT_CHECKABLE'].astype(int)

misleading_annotations_1 = df_pier['#MISLEADING'].astype(int)
misleading_annotations_2 = df_michael['#MISLEADING'].astype(int)

other_annotations_1 = df_pier['#OTHER'].astype(int)
other_annotations_2 = df_michael['#OTHER'].astype(int)

# Calcolo del kappa di Cohen per incorrect
kappa_incorrect = safe_cohen_kappa(incorrect_annotations_1, incorrect_annotations_2, '#INCORRECT')

# Calcolo del kappa di Cohen per not_checkable
kappa_not_checkable = safe_cohen_kappa(not_checkable_annotations_1, not_checkable_annotations_2, '#NOT_CHECKABLE')

# Calcolo del kappa di Cohen per misleading
kappa_misleading = safe_cohen_kappa(misleading_annotations_1, misleading_annotations_2, '#MISLEADING')

# Calcolo del kappa di Cohen per not_other
kappa_other = safe_cohen_kappa(other_annotations_1, other_annotations_2, '#OTHER')

print(f"Cohen's kappa per LLaMA + SGA incorrect: {kappa_incorrect}")
print(f"Cohen's kappa per LLaMA + SGA not_checkable: {kappa_not_checkable}")
print(f"Cohen's kappa per LLaMA + SGA misleading: {kappa_misleading}")
print(f"Cohen's kappa per LLaMA + SGA other: {kappa_other}")

total += kappa_incorrect + kappa_not_checkable + kappa_misleading + kappa_other

[#NOT_CHECKABLE] Variazione insufficiente: annotazioni troppo uniformi
[#OTHER] Variazione insufficiente: annotazioni troppo uniformi
Cohen's kappa per LLaMA + SGA incorrect: 1.0
Cohen's kappa per LLaMA + SGA not_checkable: 1.0
Cohen's kappa per LLaMA + SGA misleading: 1.0
Cohen's kappa per LLaMA + SGA other: 1.0


## K di Cohen qwen

In [26]:
# read pier annotations
df_pier = pd.read_excel(f'{folder_path}/{file_name}', sheet_name='qwen-pier')

# mantieni solamente la colonna 2 e 3
df_pier = df_pier[['#INCORRECT', '#NOT_CHECKABLE', '#MISLEADING', '#OTHER']]

# ripristina l'indice
df_pier = df_pier.reset_index(drop=True)

# ottieni solo le prime 50 righe
df_pier = df_pier.iloc[:50]

# read pier annotations
df_michael = pd.read_excel(f'{folder_path}/{file_name}', sheet_name='qwen-mike')

# mantieni solamente la colonna 2 e 3
df_michael = df_michael[['#INCORRECT', '#NOT_CHECKABLE', '#MISLEADING', '#OTHER']]

# ripristina l'indice
df_michael = df_michael.reset_index(drop=True)

# ottieni solo le prime 50 righe
df_michael = df_michael.iloc[:50]

# Estrazione delle annotazioni
incorrect_annotations_1 = df_pier['#INCORRECT'].astype(int)
incorrect_annotations_2 = df_michael['#INCORRECT'].astype(int)

not_checkable_annotations_1 = df_pier['#NOT_CHECKABLE'].astype(int)
not_checkable_annotations_2 = df_michael['#NOT_CHECKABLE'].astype(int)

misleading_annotations_1 = df_pier['#MISLEADING'].astype(int)
misleading_annotations_2 = df_michael['#MISLEADING'].astype(int)

other_annotations_1 = df_pier['#OTHER'].fillna(0).astype(int)
other_annotations_2 = df_michael['#OTHER'].fillna(0).astype(int)

# Calcolo del kappa di Cohen per incorrect
kappa_incorrect = safe_cohen_kappa(incorrect_annotations_1, incorrect_annotations_2, '#INCORRECT')

# Calcolo del kappa di Cohen per not_checkable
kappa_not_checkable = safe_cohen_kappa(not_checkable_annotations_1, not_checkable_annotations_2, '#NOT_CHECKABLE')

# Calcolo del kappa di Cohen per misleading
kappa_misleading = safe_cohen_kappa(misleading_annotations_1, misleading_annotations_2, '#MISLEADING')

# Calcolo del kappa di Cohen per not_other
kappa_other = safe_cohen_kappa(other_annotations_1, other_annotations_2, '#OTHER')

print(f"Cohen's kappa per LLaMAntino incorrect: {kappa_incorrect}")
print(f"Cohen's kappa per LLaMAntino not_checkable: {kappa_not_checkable}")
print(f"Cohen's kappa per LLaMAntino misleading: {kappa_misleading}")
print(f"Cohen's kappa per LLaMAntino other: {kappa_other}")

total += kappa_incorrect + kappa_not_checkable + kappa_misleading + kappa_other

[#NOT_CHECKABLE] Variazione insufficiente: annotazioni troppo uniformi
[#MISLEADING] Variazione insufficiente: annotazioni troppo uniformi
Cohen's kappa per LLaMAntino incorrect: 0.8648648648648649
Cohen's kappa per LLaMAntino not_checkable: 1.0
Cohen's kappa per LLaMAntino misleading: 1.0
Cohen's kappa per LLaMAntino other: 0.8466257668711656


In [27]:
print(total / 12)

0.9626879135788519
