In [1]:
import itertools
import json


import numpy
import pandas
import rich

In [12]:
data = json.load(open("../../../data/moral_foundations/surveys.json"))["graham_et_al"]
rich.print(data)

In [13]:
records = [
    (country, ideology, values)
    for country, ideologies in data.items()
    for ideology, values in ideologies.items()
]

In [14]:
df = pandas.DataFrame(
    [values for _, _, values in records],
    index=pandas.MultiIndex.from_tuples(
        [(c, i) for c, i, _ in records], names=["country", "ideology"]
    ),
)
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,authority,purity,ingroup,harm,fairness
country,ideology,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
anonymous,liberal,2.2,2.2,2.5,4.25,4.35
anonymous,moderate,2.8,3.0,3.0,4.0,3.95
anonymous,conservative,3.1,3.4,3.12,3.5,3.65
U.S.,liberal,1.9,1.2,2.05,3.6,3.75
U.S.,moderate,2.65,2.2,2.65,3.4,3.45


In [15]:
mse_df = (
    pandas.DataFrame.from_dict(
        {
            (idx1, idx2): numpy.mean(abs(numpy.subtract(df.loc[idx1], df.loc[idx2])))
            for idx1, idx2 in list(itertools.combinations(df.index, 2))
        }, 
        orient="index", 
        columns=["MSE"]
    )
    .sort_values("MSE")
)
mse_df.head()

Unnamed: 0,MSE
"((U.S., moderate), (Korean, moderate))",0.16
"((Korean, moderate), (Korean, conservative))",0.19
"((U.S., moderate), (Korean, liberal))",0.21
"((Korean, liberal), (Korean, moderate))",0.23
"((U.S., moderate), (Korean, conservative))",0.27


In [16]:
mse_df.min(), mse_df.max()

(MSE    0.16
 dtype: float64,
 MSE    1.22
 dtype: float64)

In [17]:
mse_matrix = pandas.DataFrame(index=df.index, columns=df.index)

for (idx1, idx2), mse_value in {
    (idx1, idx2): numpy.mean(abs(numpy.subtract(df.loc[idx1], df.loc[idx2])))
    for idx1, idx2 in list(itertools.combinations(df.index, 2))
}.items():
    mse_matrix.loc[idx1, idx2] = mse_value
    mse_matrix.loc[idx2, idx1] = mse_value

mse_matrix = mse_matrix.astype(float)
mse_matrix

Unnamed: 0_level_0,country,anonymous,anonymous,anonymous,U.S.,U.S.,U.S.,Korean,Korean,Korean
Unnamed: 0_level_1,ideology,liberal,moderate,conservative,liberal,moderate,conservative,liberal,moderate,conservative
country,ideology,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
anonymous,liberal,,0.51,0.834,0.6,0.47,1.12,0.44,0.59,0.74
anonymous,moderate,0.51,,0.324,0.85,0.48,0.61,0.59,0.46,0.41
anonymous,conservative,0.834,0.324,,0.934,0.484,0.406,0.594,0.464,0.374
U.S.,liberal,0.6,0.85,0.934,,0.57,1.22,0.46,0.69,0.84
U.S.,moderate,0.47,0.48,0.484,0.57,,0.65,0.21,0.16,0.27
U.S.,conservative,1.12,0.61,0.406,1.22,0.65,,0.76,0.53,0.38
Korean,liberal,0.44,0.59,0.594,0.46,0.21,0.76,,0.23,0.38
Korean,moderate,0.59,0.46,0.464,0.69,0.16,0.53,0.23,,0.19
Korean,conservative,0.74,0.41,0.374,0.84,0.27,0.38,0.38,0.19,
