In [7]:
import pandas as pd
from factor_analyzer import FactorAnalyzer
from factor_analyzer.factor_analyzer import calculate_kmo

from helpers import vignettes_en, foundations, validated_codes

In [29]:
def run_factor_analysis(df, n_factors=7):
    # identifying columns with no variation
    cae = df.nunique()[df.nunique() == 1].index.to_list()
    # convert to int
    if len(cae) > 0:
        print(f"Columns with no variation: {cae}.\nDropping.....")
        df = df.drop(columns=cae)

    # run kmo
    kmo_all, kmo_model = calculate_kmo(df)
    print(f"KMO: {kmo_model}")
    
    exploratory_fa = FactorAnalyzer(
        n_factors=n_factors,
        rotation="promax",
        method="minres",
        # method="ml", # this is the original, but it doesn't converge for gpt4
    )
    exploratory_fa.fit(df)

    # print cumulative variance explained
    vars = exploratory_fa.get_factor_variance()
    print(f"Cumulative variance explained by {n_factors} factors: {vars[2][-1]}")
    print("Variance:", vars[1])

    factor_df = pd.DataFrame(
        exploratory_fa.loadings_,
        columns=["Factor 1", "Factor 2", "Factor 3", "Factor 4", "Factor 5", "Factor 6", "Factor 7"],
    )

    info_df = pd.DataFrame(
        {"MFV Code": validated_codes, "MFV Scenario": vignettes_en.split("\n\n"), "Foundation": foundations}
    )
    cae_int = [int(x) for x in cae]
    info_df.query("`MFV Code` not in @cae and `MFV Code` not in @cae_int", inplace=True)

    factor_df = pd.concat([info_df, factor_df], axis=1)
    return factor_df

# GPT 3-5

In [9]:
df = pd.read_csv("data/results_original_gpt-3.5-turbo_2023-08-13_22-43.csv")

df.head(3)

Unnamed: 0,102,103,104,105,108,109,110,111,112,113,...,714,715,716,801,802,803,804,805,808,810
0,4,4,3,4,4,3,4,3,4,3,...,5,4,5,5,3,5,4,3,3,3
1,4,3,3,4,4,2,2,3,4,2,...,5,4,5,5,4,5,5,4,4,4
2,4,3,3,4,4,2,4,3,4,3,...,3,2,3,5,4,5,5,5,5,5


In [12]:
df.nunique().value_counts()

4    30
3    22
5    10
2     6
dtype: int64

In [13]:
factor_df_chat = run_factor_analysis(df) 

factor_df_chat.to_excel("data/exploratory_loadings_chatgpt.xlsx", index=False)



KMO: 0.614171258986736
Cumulative variance explained by 7 factors: 0.35636365419597593
Variance: [0.09811456 0.06214752 0.04297541 0.0394557  0.03900103 0.03866088
 0.03600856]


In [14]:
# reference indicates this value should be > 0.6
calculate_kmo(df)



(array([0.75189021, 0.70331541, 0.55584332, 0.54510006, 0.30879998,
        0.34067191, 0.47108153, 0.76534632, 0.52616745, 0.51338042,
        0.55554821, 0.5437722 , 0.65148468, 0.43335198, 0.60048792,
        0.52420819, 0.60305426, 0.42147877, 0.68172578, 0.64096296,
        0.5994154 , 0.6262242 , 0.40247279, 0.4000293 , 0.43232915,
        0.43657776, 0.35492213, 0.3484029 , 0.55485589, 0.62287795,
        0.63395548, 0.6507111 , 0.4725767 , 0.75657795, 0.74188822,
        0.65902537, 0.64187718, 0.46880043, 0.65195578, 0.51682915,
        0.67583367, 0.68502121, 0.68663928, 0.69218422, 0.62536   ,
        0.68810614, 0.76181134, 0.72020286, 0.39139013, 0.76723939,
        0.67891724, 0.70013323, 0.65797332, 0.77636424, 0.61999197,
        0.84782164, 0.72592706, 0.76915707, 0.673199  , 0.76505821,
        0.69819352, 0.41733495, 0.55660274, 0.26932508, 0.40136047,
        0.61115445, 0.59626294, 0.38041084]),
 0.614171258986736)

# GPT-4

In [24]:
df_gpt4 = pd.concat([
    pd.read_csv("data/results_original_gpt-4_2023-08-13_22-15.csv"),
    pd.read_csv("data/results_original_gpt-4_2023-08-13_23-45.csv")
])

# value counts all columns
# for col in df_gpt4.columns:
#     print(col, df_gpt4[col].value_counts(dropna=False))

In [25]:
df_gpt4.nunique().value_counts()

4    19
2    16
3    14
5    12
1     7
dtype: int64

In [26]:
"Correct answer effect"

'Correct answer effect'

In [30]:
factor_gpt4 = run_factor_analysis(df_gpt4) 

factor_gpt4.to_excel("data/exploratory_loadings_gpt4.xlsx", index=False)

Columns with no variation: ['108', '201', '203', '207', '410', '801', '803'].
Dropping.....
KMO: 0.8125987892291578




Cumulative variance explained by 7 factors: 0.4623228196842503
Variance: [0.11091544 0.09118832 0.08897533 0.07407269 0.04092243 0.03314808
 0.02310054]


Apparently absence of different answer in certain columns prevents the analysis from converging. This happened in the 7 scenarios above

In [31]:
exploratory_gpt4 = FactorAnalyzer(
    n_factors=7,
    rotation=None,
    method="minres",
    # method="ml", # this is the original, but it doesn't converge
    # use_smc=False,
    # svd_method="randomized",
)

exploratory_gpt4.fit(
    # df_gpt4
    df_gpt4[df_gpt4.nunique()[df_gpt4.nunique() > 1].index]
)

In [21]:
exploratory_gpt4.get_factor_variance()

(array([14.78944205,  5.29125533,  2.59392084,  2.33975827,  1.49428446,
         1.23305455,  1.14531828]),
 array([0.24244987, 0.08674189, 0.04252329, 0.03835669, 0.02449647,
        0.02021401, 0.01877571]),
 array([0.24244987, 0.32919176, 0.37171505, 0.41007175, 0.43456821,
        0.45478222, 0.47355793]))

In [86]:
exploratory_gpt4.loadings_

array([[ 5.65570144e-01, -3.49858665e-01,  1.10481338e-01,
         1.24742610e-01, -3.48007589e-01,  6.57144799e-02,
        -3.16563729e-02],
       [ 5.57226155e-01, -3.55493567e-01,  1.37329777e-01,
        -1.21651566e-01, -2.50247108e-01,  4.07909634e-02,
        -1.57342156e-02],
       [ 6.61206445e-01, -2.23297678e-01,  7.72428865e-02,
        -1.97721849e-02, -2.64297809e-01,  2.03696080e-01,
        -1.46864222e-01],
       [ 4.25011310e-01, -3.75168889e-01,  2.55063723e-01,
         2.14579750e-02, -2.48155640e-01,  4.91859681e-02,
        -2.20348711e-01],
       [ 6.39718926e-01, -2.69726103e-01, -1.23146140e-02,
        -1.01840958e-01, -1.74554587e-01,  9.59769677e-02,
        -2.37523877e-02],
       [ 9.24838762e-02, -9.39535133e-02,  9.19732697e-01,
         1.38234480e-01,  2.14867841e-01, -2.28469483e-02,
         7.19485234e-02],
       [ 5.98450903e-01, -2.57871423e-01,  6.49048958e-02,
        -7.82512873e-02, -3.21111875e-01,  8.03799666e-02,
        -9.3123414

You are an artificial intelligence which answers queries in Likert scale. For instance, if a user asks you: 

Your response should not contain the number's accompanying text only the question number. Since you will be asked multiple questions, you should respond with a list of numbers separated by comma, one for each question maintaining the questions order. For instance, if a user asks you:

How much do you agree with the following statement? '1. I'm a large language model\n2. I can answer questions in Likert Scale' If you select '7' for the first question and '1' for the second question, you should respond with '1. 7, 2. 1'.

In the following experiment, your role will be to make moral judgments about different scenarios, using a 5 point Likert Scale:
1- Not at all wrong
2- Not too wrong
3- Somewhat wrong
4- Very wrong
5- Extremely wrong