# Cross-trait correlation analysis 🔬

By default we look at agreement between target annotations, representing either a model or human annotations, with personality annotations. In this tutorial we show another way to analyse the traits: *how do they correlate with each other?*

In [3]:
import pandas as pd
import feedback_forensics as ff
import pathlib

# Load results (e.g. Arena data)
dataset_name = "chatbot_arena.json"
dataset = ff.DatasetHandler()
data_path = pathlib.Path("../../data/output/results_sets/feedback-forensics-results-paper")
dataset.add_data_from_path(data_path / dataset_name)
df = dataset.first_handler.df

annotator_metadata = dataset.get_available_annotators()
metrics = dataset.get_annotator_metrics()

# Get top and bottom 5 annotators according to strength metric
strength_metrics = metrics["chatbot_arena"]["metrics"]["strength"]
annotators = list(strength_metrics.keys())
top_annotators = sorted(annotators, key=lambda x: strength_metrics[x], reverse=True)
top5_annotators = top_annotators[:5]
bottom5_annotators = top_annotators[-5:][::-1]

def get_annotator_key(in_row_name: str) -> str:
    for annotator_key, metadata in annotator_metadata.items():
        if metadata["annotator_in_row_name"] in in_row_name:
            return annotator_key
    return None

annotators = {
    "top5": {
        annotator_name: {"key": get_annotator_key(annotator_name), "name": annotator_name}
        for annotator_name in top5_annotators
    },
    "bottom5": {
        annotator_name: {"key": get_annotator_key(annotator_name), "name": annotator_name}
        for annotator_name in bottom5_annotators
    }
}

full_set_of_annotators = {}
for category, annotator_subset in annotators.items():
    for annotator_name in annotator_subset.keys():
        annotator_key = annotator_subset[annotator_name]["key"]
        annotator_data = df[annotator_key]
        annotator_subset[annotator_name]["data"] = annotator_data
        full_set_of_annotators[annotator_name] = annotator_data

# create df that underlies correlation analysis
# this df includes the full set of annotations per annotator
corr_df = pd.DataFrame(full_set_of_annotators, dtype="category")

📜  | INFO | AnnotatedPairs format version: 2.0[0m
📜  | INFO | Created 20000 annotations for 55 model annotators with 55 reference models in 0.26 seconds[0m
📜  | INFO | Loaded data from path: ../../data/output/results_sets/feedback-forensics-results-paper/chatbot_arena.json[0m


In [4]:
import sklearn.metrics

# create a correlation matrix for all annotators
correlation_matrix = pd.DataFrame(index=corr_df.columns, columns=corr_df.columns)
for annotator_1 in corr_df.columns:
    for annotator_2 in corr_df.columns:
        # print(f"Comparing {annotator_1} and {annotator_2}")

        val = sklearn.metrics.cohen_kappa_score(
            corr_df[annotator_1].to_numpy(dtype="str"),
            corr_df[annotator_2].to_numpy(dtype="str"),
        )

        correlation_matrix.loc[annotator_1, annotator_2] = f"{val:.2f}"

correlation_matrix
#correlation_matrix.to_markdown("correlation_matrix.md")

Unnamed: 0,is more verbose,has more structured formatting,makes more confident statements,is more factually correct,more strictly follows the requested output format,is more concise,has a more avoidant tone,refuses to answer the question,ends with a follow-up question,is more polite
is more verbose,1.0,0.44,0.13,0.07,0.07,-0.99,-0.02,-0.02,0.01,0.12
has more structured formatting,0.44,1.0,0.24,0.18,0.12,-0.44,-0.03,-0.02,-0.0,0.21
makes more confident statements,0.13,0.24,1.0,0.55,0.12,-0.13,0.03,0.02,0.01,0.17
is more factually correct,0.07,0.18,0.55,1.0,0.1,-0.07,0.06,0.05,-0.01,0.11
more strictly follows the requested output format,0.07,0.12,0.12,0.1,1.0,-0.07,0.0,0.0,-0.06,0.0
is more concise,-0.99,-0.44,-0.13,-0.07,-0.07,1.0,0.02,0.02,-0.01,-0.11
has a more avoidant tone,-0.02,-0.03,0.03,0.06,0.0,0.02,1.0,0.8,0.06,0.15
refuses to answer the question,-0.02,-0.02,0.02,0.05,0.0,0.02,0.8,1.0,0.05,0.13
ends with a follow-up question,0.01,-0.0,0.01,-0.01,-0.06,-0.01,0.06,0.05,1.0,0.24
is more polite,0.12,0.21,0.17,0.11,0.0,-0.11,0.15,0.13,0.24,1.0
