In [135]:
import typing
import json
import glob

import pandas 

import statsmodels
import statsmodels.stats
import statsmodels.stats.descriptivestats

import scipy


In [160]:
MODEL: str = "mistral-7b"

In [161]:
raw: typing.Dict[str, pandas.DataFrame]= {
    "llm": (
            pandas.concat(
            [
                pandas.json_normalize(json.load(open(file))).assign(participant=n)
                for n, file in enumerate(glob.glob(f"data/base/{MODEL}/*.json", recursive=True))
            ],
        )
        .pivot(index="participant", columns="id", values="response")
        .apply(pandas.to_numeric)
    ),
    "human": (
        pandas.read_csv("../../../data/humor_styles/survey.csv")
        .rename_axis(index="participant")
        .filter(like='Q', axis=1)
        .pipe(lambda _df: (
            _df
            .rename(columns={col: col[1:] for col in _df.columns})
        ))
        .replace(-1, None)
        .apply(pandas.to_numeric)
    )
}

len(raw["llm"]), len(raw["human"])

(1000, 1071)

In [162]:
print(
    raw["llm"].to_csv(index=False)
)

1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32
3.0,3.0,3.0,3.0,3.0,3.0,1.0,3.0,3.0,3.0,4.0,3.0,,3.0,3.0,5.0,3.0,3.0,3.0,3.0,,3.0,5.0,3.0,1.0,3.0,5.0,3.0,,3.0,5.0,3.0
,,3.0,1.0,4.0,3.0,,,3.0,3.0,,3.0,,,5.0,5.0,1.0,3.0,,5.0,,3.0,,,1.0,3.0,,3.0,3.0,,,
5.0,3.0,3.0,,3.0,3.0,1.0,3.0,3.0,3.0,4.0,3.0,3.0,3.0,3.0,5.0,,3.0,,3.0,,3.0,1.0,3.0,1.0,5.0,5.0,3.0,,3.0,4.0,
3.0,,3.0,5.0,5.0,3.0,1.0,3.0,3.0,,,3.0,3.0,,4.0,5.0,1.0,,,3.0,,,5.0,3.0,,5.0,5.0,3.0,,3.0,4.0,3.0
5.0,3.0,3.0,3.0,3.0,3.0,1.0,3.0,,3.0,3.0,3.0,3.0,,5.0,3.0,3.0,3.0,,3.0,3.0,3.0,5.0,3.0,3.0,3.0,1.0,3.0,3.0,,5.0,3.0
5.0,3.0,,3.0,,3.0,1.0,,3.0,,,3.0,3.0,3.0,4.0,,1.0,3.0,3.0,5.0,,3.0,5.0,,1.0,3.0,,3.0,3.0,3.0,5.0,3.0
5.0,3.0,1.0,3.0,3.0,3.0,1.0,,3.0,3.0,3.0,3.0,,3.0,4.0,1.0,1.0,3.0,3.0,,,3.0,,,3.0,3.0,1.0,5.0,3.0,3.0,5.0,3.0
5.0,,,3.0,,3.0,5.0,5.0,,3.0,4.0,,,,,3.0,,3.0,,,,3.0,1.0,,1.0,3.0,5.0,3.0,,3.0,5.0,
3.0,,3.0,3.0,,3.0,,3.0,,3.0,,3.0,3.0,,5.0,1.0,1.0,,3.0,3.0,3.0,3.0,1.0,3.0,3.0,3.0,5.0,3.0,3.

In [159]:
print(
    raw["human"].to_csv(index=False)
)

1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32
2.0,2.0,3.0,1.0,4.0,5.0,4.0,3.0,4.0,3.0,3.0,1.0,5.0,4.0,4.0,4.0,2.0,3.0,3.0,1.0,4.0,4.0,3.0,2.0,1.0,3.0,2.0,4.0,2.0,4.0,2.0,2.0
2.0,3.0,2.0,2.0,4.0,4.0,4.0,3.0,4.0,3.0,4.0,3.0,3.0,4.0,5.0,4.0,2.0,2.0,3.0,2.0,3.0,3.0,4.0,2.0,2.0,5.0,1.0,2.0,4.0,4.0,3.0,1.0
3.0,4.0,3.0,3.0,4.0,4.0,3.0,1.0,2.0,4.0,3.0,2.0,4.0,4.0,3.0,3.0,2.0,4.0,2.0,1.0,4.0,2.0,4.0,3.0,2.0,4.0,3.0,3.0,2.0,5.0,4.0,2.0
3.0,3.0,3.0,4.0,3.0,5.0,4.0,3.0,,4.0,2.0,4.0,4.0,5.0,4.0,3.0,3.0,3.0,3.0,3.0,4.0,3.0,2.0,4.0,2.0,4.0,2.0,2.0,4.0,5.0,3.0,3.0
1.0,4.0,2.0,2.0,3.0,5.0,4.0,1.0,4.0,4.0,2.0,2.0,5.0,4.0,4.0,4.0,2.0,3.0,2.0,1.0,5.0,3.0,3.0,1.0,1.0,5.0,2.0,3.0,2.0,5.0,4.0,2.0
3.0,3.0,3.0,2.0,3.0,3.0,4.0,2.0,2.0,1.0,3.0,3.0,4.0,4.0,4.0,3.0,2.0,1.0,4.0,2.0,4.0,4.0,4.0,2.0,2.0,3.0,2.0,4.0,3.0,4.0,3.0,3.0
4.0,1.0,2.0,4.0,2.0,3.0,3.0,3.0,4.0,4.0,4.0,1.0,2.0,1.0,2.0,4.0,4.0,1.0,3.0,1.0,3.0,2.0,2.0,3.0,4.0,3.0,2.0,2.0,3.0,3.0,4.0,4.0
2.0,4.0,4.0,1.0,5.0,

In [129]:
statsmodels.stats.descriptivestats.describe(raw["llm"]).T

Unnamed: 0_level_0,nobs,missing,mean,std_err,upper_ci,lower_ci,std,iqr,iqr_normal,mad,...,median,1%,5%,10%,25%,50%,75%,90%,95%,99%
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,311.0,0.0,2.270096,0.064998,2.397491,2.142702,1.146259,2.0,1.482602,0.934854,...,2.0,1.0,1.0,1.0,1.0,2.0,3.0,4.0,4.0,5.0
2,311.0,0.0,2.987138,0.07372,3.131626,2.84265,1.30006,2.0,1.482602,1.07013,...,3.0,1.0,1.0,1.0,2.0,3.0,4.0,5.0,5.0,5.0
3,311.0,0.0,2.639871,0.085675,2.807791,2.471952,1.510891,3.0,2.223903,1.383526,...,2.0,1.0,1.0,1.0,1.0,2.0,4.0,5.0,5.0,5.0
4,311.0,0.0,2.826367,0.073767,2.970947,2.681786,1.300898,2.0,1.482602,1.139897,...,3.0,1.0,1.0,1.0,2.0,3.0,4.0,4.0,5.0,5.0
5,311.0,0.0,3.990354,0.047,4.082472,3.898235,0.828857,2.0,1.482602,0.631011,...,4.0,2.0,3.0,3.0,3.0,4.0,5.0,5.0,5.0,5.0
6,311.0,0.0,4.199357,0.023588,4.245588,4.153126,0.415971,0.0,0.0,0.329525,...,4.0,4.0,4.0,4.0,4.0,4.0,4.0,5.0,5.0,5.0
7,311.0,0.0,3.199357,0.083341,3.362703,3.036011,1.469738,3.0,2.223903,1.27263,...,3.0,1.0,1.0,1.0,2.0,3.0,5.0,5.0,5.0,5.0
8,311.0,0.0,3.819936,0.053108,3.924025,3.715847,0.936563,1.0,0.741301,0.693727,...,4.0,1.0,2.0,2.0,3.0,4.0,4.0,5.0,5.0,5.0
9,311.0,0.0,2.038585,0.017503,2.072891,2.004279,0.308676,0.0,0.0,0.081864,...,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,3.9
10,311.0,0.0,4.051447,0.042626,4.134993,3.967901,0.751722,1.0,0.741301,0.555102,...,4.0,3.0,3.0,3.0,4.0,4.0,5.0,5.0,5.0,5.0


In [130]:
statsmodels.stats.descriptivestats.describe(raw["human"]).T

Unnamed: 0,nobs,missing,mean,std_err,upper_ci,lower_ci,std,iqr,iqr_normal,mad,...,median,1%,5%,10%,25%,50%,75%,90%,95%,99%
1,993.0,0.0,2.031219,0.033599,2.097072,1.965365,1.058773,2.0,1.482602,0.798859,...,2.0,1.0,1.0,1.0,1.0,2.0,3.0,4.0,4.0,5.0
2,993.0,0.0,3.354481,0.034658,3.42241,3.286553,1.092138,1.0,0.741301,0.918839,...,3.0,1.0,1.0,2.0,3.0,3.0,4.0,5.0,5.0,5.0
3,993.0,0.0,3.082578,0.036762,3.15463,3.010526,1.158432,2.0,1.482602,0.933278,...,3.0,1.0,1.0,1.0,2.0,3.0,4.0,5.0,5.0,5.0
4,993.0,0.0,2.823766,0.036692,2.895681,2.751852,1.156224,2.0,1.482602,0.959145,...,3.0,1.0,1.0,1.0,2.0,3.0,4.0,4.0,5.0,5.0
5,993.0,0.0,3.615307,0.032882,3.679754,3.55086,1.036164,1.0,0.741301,0.861815,...,4.0,1.0,2.0,2.0,3.0,4.0,4.0,5.0,5.0,5.0
6,993.0,0.0,4.178248,0.029947,4.236944,4.119552,0.943703,1.0,0.741301,0.748101,...,4.0,1.0,2.0,3.0,4.0,4.0,5.0,5.0,5.0,5.0
7,993.0,0.0,3.288016,0.034429,3.355495,3.220537,1.084916,1.0,0.741301,0.899353,...,3.0,1.0,1.0,2.0,3.0,3.0,4.0,5.0,5.0,5.0
8,993.0,0.0,2.540785,0.037895,2.615059,2.466512,1.194155,1.0,0.741301,1.032788,...,2.0,1.0,1.0,1.0,2.0,2.0,3.0,4.0,5.0,5.0
9,993.0,0.0,2.596173,0.038381,2.671398,2.520948,1.209449,1.0,0.741301,1.03062,...,2.0,1.0,1.0,1.0,2.0,2.0,3.0,4.0,5.0,5.0
10,993.0,0.0,2.880161,0.037493,2.953647,2.806676,1.181485,2.0,1.482602,0.970276,...,3.0,1.0,1.0,1.0,2.0,3.0,4.0,4.0,5.0,5.0


In [119]:
(
    pandas.Series(
        scipy
        .stats.f_oneway(
            raw["llm"],
            raw["human"],
            nan_policy="omit"
        )
        .pvalue
        > 0.05,
        name="same_distribution"
    )
    .value_counts()
)

same_distribution
False    29
True      3
Name: count, dtype: int64