In [27]:
import json

import pandas
import rich

In [28]:
data = json.load(open("../../data/moral_foundations/collections.json"))["graham_et_al"]
rich.print(data)

In [42]:
records = [
    (country, ideology, values)
    for country, ideologies in data.items()
    for ideology, values in ideologies.items()
]
rich.print(records)

In [48]:
df = pandas.DataFrame(
    [values for _, _, values in records],
    index=pandas.MultiIndex.from_tuples(
        [(c, i) for c, i, _ in records], names=["country", "ideology"]
    ),
)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,authority,purity,ingroup,harm,fairness
country,ideology,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
anonymous,liberal,2.2,2.2,2.5,4.25,4.35
anonymous,moderate,2.8,3.0,3.0,4.0,3.95
anonymous,conservative,3.1,3.4,3.12,3.5,3.65
us,liberal,1.9,1.2,2.05,3.6,3.75
us,moderate,2.65,2.2,2.65,3.4,3.45
us,conservative,3.5,3.1,3.4,3.0,3.1
korean,liberal,2.25,2.4,2.3,3.35,3.5
korean,moderate,2.55,2.65,2.65,3.25,3.35
korean,conservative,2.9,2.75,2.8,3.35,3.1


In [79]:
import itertools

import numpy as np


# Create a function to compute MSE between two rows
def mae(row1, row2):
    return np.mean(abs(np.subtract(row1, row2)))


# Get all index combinations
index_combinations = list(itertools.combinations(df.index, 2))

# Compute MSE for each combination
mse_results = {
    (idx1, idx2): mae(df.loc[idx1], df.loc[idx2]) for idx1, idx2 in index_combinations
}

# Convert to DataFrame
mse_df = pandas.DataFrame.from_dict(mse_results, orient="index", columns=["MSE"])

mse_df

Unnamed: 0,MSE
"((anonymous, liberal), (anonymous, moderate))",0.51
"((anonymous, liberal), (anonymous, conservative))",0.834
"((anonymous, liberal), (us, liberal))",0.6
"((anonymous, liberal), (us, moderate))",0.47
"((anonymous, liberal), (us, conservative))",1.12
"((anonymous, liberal), (korean, liberal))",0.44
"((anonymous, liberal), (korean, moderate))",0.59
"((anonymous, liberal), (korean, conservative))",0.74
"((anonymous, moderate), (anonymous, conservative))",0.324
"((anonymous, moderate), (us, liberal))",0.85


In [80]:
mse_df.sort_values("MSE")

Unnamed: 0,MSE
"((us, moderate), (korean, moderate))",0.16
"((korean, moderate), (korean, conservative))",0.19
"((us, moderate), (korean, liberal))",0.21
"((korean, liberal), (korean, moderate))",0.23
"((us, moderate), (korean, conservative))",0.27
"((anonymous, moderate), (anonymous, conservative))",0.324
"((anonymous, conservative), (korean, conservative))",0.374
"((korean, liberal), (korean, conservative))",0.38
"((us, conservative), (korean, conservative))",0.38
"((anonymous, conservative), (us, conservative))",0.406


In [81]:
mse_df.min(), mse_df.max()

(MSE    0.16
 dtype: float64,
 MSE    1.22
 dtype: float64)

In [83]:
# Convert MSE results into a DataFrame with a multi-index for better visualization
mse_matrix = pandas.DataFrame(index=df.index, columns=df.index)

# Fill the matrix with computed MSE values
for (idx1, idx2), mse_value in mse_results.items():
    mse_matrix.loc[idx1, idx2] = mse_value
    mse_matrix.loc[idx2, idx1] = mse_value  # Ensure symmetry

# Convert values to float (avoids dtype issues)
mse_matrix = mse_matrix.astype(float)
mse_matrix

Unnamed: 0_level_0,country,anonymous,anonymous,anonymous,us,us,us,korean,korean,korean
Unnamed: 0_level_1,ideology,liberal,moderate,conservative,liberal,moderate,conservative,liberal,moderate,conservative
country,ideology,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
anonymous,liberal,,0.51,0.834,0.6,0.47,1.12,0.44,0.59,0.74
anonymous,moderate,0.51,,0.324,0.85,0.48,0.61,0.59,0.46,0.41
anonymous,conservative,0.834,0.324,,0.934,0.484,0.406,0.594,0.464,0.374
us,liberal,0.6,0.85,0.934,,0.57,1.22,0.46,0.69,0.84
us,moderate,0.47,0.48,0.484,0.57,,0.65,0.21,0.16,0.27
us,conservative,1.12,0.61,0.406,1.22,0.65,,0.76,0.53,0.38
korean,liberal,0.44,0.59,0.594,0.46,0.21,0.76,,0.23,0.38
korean,moderate,0.59,0.46,0.464,0.69,0.16,0.53,0.23,,0.19
korean,conservative,0.74,0.41,0.374,0.84,0.27,0.38,0.38,0.19,
