In [2]:
import pandas as pd
from factor_analyzer import FactorAnalyzer

from helpers import vignettes_en, foundations, validated_codes

In [21]:
def run_factor_analysis(df):
    exploratory_fa = FactorAnalyzer(
        n_factors=7,
        rotation="promax",
        method="minres",
        # method="ml", # this is the original, but it doesn't converge
    )

    exploratory_fa.fit(df)

    factor_df = pd.DataFrame(
        exploratory_fa.loadings_,
        columns=["Factor 1", "Factor 2", "Factor 3", "Factor 4", "Factor 5", "Factor 6", "Factor 7"],
    )

    factor_df["MFV Code"] = validated_codes
    factor_df["MFV Scenario"] = vignettes_en.split("\n\n")
    factor_df["Foundation"] = foundations
    # reorder columns
    factor_df = factor_df[
        [
            "MFV Code",
            "MFV Scenario",
            "Foundation",
        ] + [f"Factor {i}" for i in range(1, 8)]
    ]
    return factor_df

# GPT 3-5

In [5]:
df = pd.read_csv("data/results_original_gpt-3.5-turbo_2023-08-13_22-43.csv")

df.head(3)

Unnamed: 0,102,103,104,105,108,109,110,111,112,113,...,714,715,716,801,802,803,804,805,808,810
0,4,4,3,4,4,3,4,3,4,3,...,5,4,5,5,3,5,4,3,3,3
1,4,3,3,4,4,2,2,3,4,2,...,5,4,5,5,4,5,5,4,4,4
2,4,3,3,4,4,2,4,3,4,3,...,3,2,3,5,4,5,5,5,5,5


In [23]:
df.nunique()

102    4
103    4
104    4
105    4
108    3
      ..
803    3
804    2
805    4
808    5
810    5
Length: 68, dtype: int64

In [7]:
factor_df_chat = run_factor_analysis(df) 

factor_df_chat.to_excel("data/exploratory_loadings_chatgpt.xlsx", index=False)

GPT-4

In [18]:
df_gpt4 = pd.concat([
    pd.read_csv("data/results_original_gpt-4_2023-08-13_22-15.csv"),
    pd.read_csv("data/results_original_gpt-4_2023-08-13_23-45.csv")
])

# value counts all columns
for col in df_gpt4.columns:
    print(col, df_gpt4[col].value_counts(dropna=False))

102 5    86
4    50
Name: 102, dtype: int64
103 4    73
5    35
3    28
Name: 103, dtype: int64
104 5    81
4    53
3     2
Name: 104, dtype: int64
105 5    119
4     17
Name: 105, dtype: int64
108 5    136
Name: 108, dtype: int64
109 3    72
4    55
2     7
5     2
Name: 109, dtype: int64
110 5    135
4      1
Name: 110, dtype: int64
111 4    63
3    59
5     9
2     5
Name: 111, dtype: int64
112 5    104
4     32
Name: 112, dtype: int64
113 4    86
3    34
5    13
2     3
Name: 113, dtype: int64
114 4    75
3    42
5    18
2     1
Name: 114, dtype: int64
201 5    136
Name: 201, dtype: int64
202 5    125
4     11
Name: 202, dtype: int64
203 5    136
Name: 203, dtype: int64
204 5    135
4      1
Name: 204, dtype: int64
205 5    130
4      6
Name: 205, dtype: int64
206 5    113
4     23
Name: 206, dtype: int64
207 5    136
Name: 207, dtype: int64
208 4    69
5    61
3     6
Name: 208, dtype: int64
303 5    90
4    46
Name: 303, dtype: int64
402 5    106
4     29
3      1
Name: 402, dtyp

In [19]:
df_gpt4.nunique()

102    2
103    3
104    3
105    2
108    1
      ..
803    1
804    2
805    4
808    2
810    4
Length: 68, dtype: int64

In [22]:
factor_gptT4 = run_factor_analysis(df) 

factor_gptT4.to_excel("data/exploratory_loadings_fpt4.xlsx", index=False)