In [14]:
import typing
import json
import glob

import numpy
import pandas 

import statsmodels
import statsmodels.stats
import statsmodels.stats.descriptivestats

import scipy
import scipy.spatial.distance


In [48]:
MODELS: typing.List[str] = [
    "llama3.1-8b",
    "llama3.3-70b",
    "mistral-7b",
    "mistral-large-123b",
    "qwen2.5-7b",
    "qwen2.5-72b"
]

In [80]:
raw: typing.Dict[str, pandas.DataFrame]= {
    **{
        model.replace("large-", ""): (
                pandas.concat(
                [
                    pandas.json_normalize(json.load(open(file))).assign(participant=n)
                    for n, file in enumerate(glob.glob(f"data/base/{model}/*.json", recursive=True))
                ],
            )
            .pivot(index="participant", columns="id", values="response")
            .apply(pandas.to_numeric)
        )
        for model in MODELS
    },
    "human": (
        pandas.read_csv("../../../data/humor_styles/survey.csv")
        .rename_axis(index="participant")
        .filter(like='Q', axis=1)
        .pipe(lambda _df: (
            _df
            .rename(columns={col: col[1:] for col in _df.columns})
        ))
        .replace(-1, None)
        .apply(pandas.to_numeric)
    )
}

raw.keys()

dict_keys(['llama3.1-8b', 'llama3.3-70b', 'mistral-7b', 'mistral-123b', 'qwen2.5-7b', 'qwen2.5-72b', 'human'])

In [53]:
for key, values in raw.items():
    print(key)
    print("=" * 32)
    print(values.to_csv(index=False))
    print("=" * 32)

llama3.1-8b
1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32
2.0,3.0,1.0,3.0,2.0,4.0,5.0,3.0,2.0,5.0,4.0,1.0,,3.0,5.0,4.0,3.0,3.0,3.0,5.0,4.0,4.0,5.0,,4.0,4.0,4.0,4.0,3.0,5.0,4.0,5.0
2.0,2.0,1.0,1.0,3.0,4.0,1.0,4.0,2.0,4.0,3.0,4.0,5.0,4.0,5.0,4.0,1.0,3.0,3.0,4.0,4.0,3.0,3.0,4.0,2.0,4.0,5.0,5.0,3.0,5.0,5.0,5.0
1.0,2.0,4.0,4.0,3.0,5.0,4.0,3.0,2.0,4.0,4.0,4.0,5.0,5.0,5.0,4.0,3.0,4.0,3.0,4.0,4.0,5.0,5.0,5.0,4.0,4.0,5.0,1.0,4.0,5.0,5.0,
1.0,1.0,4.0,2.0,4.0,5.0,2.0,2.0,2.0,5.0,5.0,2.0,4.0,5.0,5.0,4.0,4.0,3.0,3.0,4.0,3.0,5.0,4.0,4.0,1.0,5.0,3.0,1.0,4.0,5.0,5.0,4.0
1.0,5.0,5.0,5.0,4.0,5.0,,3.0,2.0,4.0,4.0,5.0,4.0,5.0,5.0,3.0,4.0,4.0,3.0,3.0,4.0,2.0,5.0,5.0,4.0,5.0,2.0,4.0,3.0,4.0,4.0,5.0
5.0,3.0,4.0,2.0,4.0,4.0,3.0,4.0,2.0,4.0,4.0,4.0,4.0,3.0,5.0,4.0,2.0,5.0,4.0,4.0,3.0,3.0,4.0,3.0,4.0,4.0,4.0,4.0,3.0,5.0,5.0,5.0
2.0,5.0,4.0,5.0,1.0,3.0,1.0,5.0,2.0,5.0,4.0,5.0,5.0,5.0,4.0,4.0,4.0,3.0,5.0,5.0,4.0,3.0,4.0,3.0,4.0,5.0,4.0,2.0,4.0,4.0,5.0,3.0
4.0,5.0,2.0,2.0,3

In [55]:
pandas.concat({
    key: statsmodels.stats.descriptivestats.describe(values).T
    for key, values in raw.items()
})

  skew = stats.skew(resids, axis=axis)
  kurtosis = 3 + stats.kurtosis(resids, axis=axis)
  skew = stats.skew(resids, axis=axis)
  kurtosis = 3 + stats.kurtosis(resids, axis=axis)


Unnamed: 0,Unnamed: 1,nobs,missing,mean,std_err,upper_ci,lower_ci,std,iqr,iqr_normal,mad,...,median,1%,5%,10%,25%,50%,75%,90%,95%,99%
llama3.1-8b,1,1000.0,14.0,2.280933,0.036799,2.353058,2.208808,1.155513,2.0,1.482602,0.945256,...,2.0,1.0,1.0,1.0,1.0,2.0,3.0,4.0,4.0,5.0
llama3.1-8b,2,1000.0,11.0,2.984833,0.040777,3.064755,2.904911,1.282383,2.0,1.482602,1.049594,...,3.0,1.0,1.0,1.0,2.0,3.0,4.0,5.0,5.0,5.0
llama3.1-8b,3,1000.0,10.0,2.612121,0.047262,2.704753,2.519489,1.487071,3.0,2.223903,1.363832,...,2.0,1.0,1.0,1.0,1.0,2.0,4.0,5.0,5.0,5.0
llama3.1-8b,4,1000.0,5.0,2.858291,0.041374,2.939383,2.777200,1.305082,2.0,1.482602,1.141808,...,3.0,1.0,1.0,1.0,2.0,3.0,4.0,4.0,5.0,5.0
llama3.1-8b,5,1000.0,11.0,4.029323,0.025395,4.079095,3.979550,0.798626,2.0,1.482602,0.592810,...,4.0,2.0,3.0,3.0,3.0,4.0,5.0,5.0,5.0,5.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
human,28,1071.0,4.0,3.221181,0.039668,3.298929,3.143433,1.295758,2.0,1.482602,1.106672,...,3.0,1.0,1.0,1.0,2.0,3.0,4.0,5.0,5.0,5.0
human,29,1071.0,6.0,2.339906,0.036395,2.411238,2.268574,1.187717,2.0,1.482602,0.989067,...,2.0,1.0,1.0,1.0,1.0,2.0,3.0,4.0,5.0,5.0
human,30,1071.0,8.0,3.983067,0.032354,4.046479,3.919655,1.054849,2.0,1.482602,0.805436,...,4.0,1.0,2.0,2.0,3.0,4.0,5.0,5.0,5.0,5.0
human,31,1071.0,7.0,2.792293,0.039167,2.869060,2.715527,1.277601,2.0,1.482602,1.089982,...,3.0,1.0,1.0,1.0,2.0,3.0,4.0,5.0,5.0,5.0


In [58]:
pandas.concat({
    (key_1, key_2): (
        pandas.Series(
            scipy
            .stats.f_oneway(
                values_1,
                values_2,
                nan_policy="omit"
            )
            .pvalue
            > 0.05,
            name="f_oneway_significant"
        )
        .value_counts()
    )
    for key_1, values_1 in raw.items()
    for key_2, values_2 in raw.items()
}).to_frame()

  res = hypotest_fun_out(*samples, axis=axis, **kwds)
  return result_to_tuple(hypotest_fun_out(*samples, **kwds), n_out)


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,count
Unnamed: 0_level_1,Unnamed: 1_level_1,f_oneway_significant,Unnamed: 3_level_1
llama3.1-8b,llama3.1-8b,True,23
llama3.1-8b,llama3.1-8b,False,9
llama3.1-8b,llama3.3-70b,False,31
llama3.1-8b,llama3.3-70b,True,1
llama3.1-8b,mistral-7b,False,30
...,...,...,...
human,qwen2.5-7b,True,5
human,qwen2.5-72b,False,30
human,qwen2.5-72b,True,2
human,human,True,22


In [97]:
sim = (
    pandas.concat({
        (key_1, key_2): pandas.Series(
            (
                1 - scipy.spatial.distance.cosine(
                    values_1.corr("pearson").fillna(0.0).to_numpy().ravel(), 
                    values_2.corr("pearson").fillna(0.0).to_numpy().ravel()
                )
            ),
            name="similarity"
        )
        for key_1, values_1 in raw.items()
        for key_2, values_2 in raw.items()
    })
    .to_frame()
    .reset_index(level=[0,1])
    .pivot_table(values="similarity", index="level_0", columns="level_1")
    # 
    .style
    .background_gradient(axis=None)
    .format(precision=3)  
)
sim

level_1,human,llama3.1-8b,llama3.3-70b,mistral-123b,mistral-7b,qwen2.5-72b,qwen2.5-7b
level_0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
human,1.0,0.629,0.434,0.598,0.617,0.599,0.629
llama3.1-8b,0.629,1.0,0.673,0.944,0.955,0.938,0.969
llama3.3-70b,0.434,0.673,1.0,0.653,0.662,0.694,0.672
mistral-123b,0.598,0.944,0.653,1.0,0.925,0.909,0.942
mistral-7b,0.617,0.955,0.662,0.925,1.0,0.928,0.959
qwen2.5-72b,0.599,0.938,0.694,0.909,0.928,1.0,0.942
qwen2.5-7b,0.629,0.969,0.672,0.942,0.959,0.942,1.0


In [None]:
human_chunked = {
    f"Sample {n:02d}": chunk
    for n, chunk in enumerate(numpy.array_split(raw["human"], 7), start=1)
}

sim = (
    pandas.concat({
        (key_1, key_2): pandas.Series(
            (
                1 - scipy.spatial.distance.cosine(
                    values_1.corr("pearson").fillna(0.0).to_numpy().ravel(), 
                    values_2.corr("pearson").fillna(0.0).to_numpy().ravel()
                )
            ),
            name="similarity"
        )
        for key_1, values_1 in human_chunked.items()
        for key_2, values_2 in human_chunked.items()
    })
    .to_frame()
    .reset_index(level=[0,1])
    .pivot_table(values="similarity", index="level_0", columns="level_1")
    #
    .style
    .background_gradient(axis=None)
    .format(precision=3)  
)
sim

  return bound(*args, **kwds)


level_1,sample_01,sample_02,sample_03,sample_04,sample_05,sample_06,sample_07
level_0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
sample_01,1.0,0.929,0.941,0.925,0.924,0.928,0.933
sample_02,0.929,1.0,0.92,0.918,0.904,0.915,0.933
sample_03,0.941,0.92,1.0,0.919,0.913,0.918,0.923
sample_04,0.925,0.918,0.919,1.0,0.91,0.919,0.923
sample_05,0.924,0.904,0.913,0.91,1.0,0.924,0.918
sample_06,0.928,0.915,0.918,0.919,0.924,1.0,0.926
sample_07,0.933,0.933,0.923,0.923,0.918,0.926,1.0
