In [None]:
import pandas as pd
import glob
import os.path

In [None]:
def get_pew_res_filename(model_name, prompt_type, ideology_key, temperature=0.7, root: str = "data/"):
        temp: str = f"T{temperature}"
        prompt_type_str = prompt_type.upper()
        desc: str = (
            f"prompt-{prompt_type_str}_ideology-{ideology_key.replace(' ', '').upper()}"
        )
        return f"{root}llm_ideology/pew_quiz_results/pew_{model_name}_{desc}_{temp}_i", f"pew_{model_name}_{desc}_{temp}"

models = ["gpt-4","open-mixtral-8x7b",]# "gpt-3.5-turbo", "meta-llama/Llama-2-7b-chat-hf"]
ideologies = ["Faith and Flag Conservative", "Progressive Left", "American", "None"]
prompt_types = ["role_play_prompt_4", "imagine", "role_play_prompt_1", "role_play_prompt_2"]


def process_pew_quiz_iterations(root: str = "data/"):
    concat_question_df_lst = []
    done = []
    not_done = []
    for model_name in models:
        for prompt_type in prompt_types:
            for ideology in ideologies:
                
                fname, simple_name = get_pew_res_filename(model_name, prompt_type, ideology)
                save_fname = f"{root}llm_ideology/pew_quiz_results_processed/{simple_name}.csv"
                
                pew_test_iterations_file_lst = glob.glob(f"{fname}*.csv") 

                if len(pew_test_iterations_file_lst) < 30:
                    not_done.append({"model_id": fname, "count":  len(pew_test_iterations_file_lst)})
                else:
                    done.append(fname)
                    
                if len(pew_test_iterations_file_lst) == 0 or os.path.isfile(save_fname):
                    print(f"{save_fname} DONE")
                    if os.path.isfile(save_fname):
                        concat_question_df_lst.append(pd.read_csv(save_fname))
                    continue
                print(f"{save_fname} NOT DONE")
                concat_question_df = analyze_pew_quiz_iterations(pew_test_iterations_file_lst)
                concat_question_df["model"] = model_name
                concat_question_df["prompt_type"] = prompt_type
                concat_question_df["ideology"] = ideology
                concat_question_df.to_csv(save_fname)
                concat_question_df_lst.append(concat_question_df)
                print(f"{save_fname} JUST DONE")
    return pd.concat(concat_question_df_lst, ignore_index=True)

process_pew_quiz_iterations()
# Usually *WORSE*  at making good policy decisions about that subject than other people
# Usually *WORSE*  at making good policy decisions about that subject than other people


In [None]:
import ast
from tqdm import tqdm


def _verify_llm_answer(row):
    answers = ast.literal_eval(row['answer_lst'])
    llm_out = ast.literal_eval(row['llm_output'])
    if len(llm_out) != 1:
        print(f"ALERT! llm has more that 1 output {llm_out}")
        # {'7': '{your_answer}', 'your_answer': 'Not at all'}
        if '7' in llm_out.keys() and list(llm_out.values())[0]  == '{your_answer}':
            llm_out = {'7': list(llm_out.values())[1] }

    validated = False
    for i, ans in llm_out.items():
        if i == "choice_explanation" : 
            continue #"ID and output do not match"
        else: validated = True
        assert (int(i) == int(row["id"])), "key answer different than q id"
        #print(ans)
        # exception when  the answer is a dict
        if isinstance(ans, dict):
            ans = list(ans.values())[0]

        if int(i) not in [81, 82]:
            ans_ = ans.replace("{", "").replace("}", "").replace("_", " ").replace(",", "").replace("  ", " ").lower().strip()
            if ans_ == "NEITHER BETTER NOR WORSE".lower():
                ans_ = "*NEITHER BETTER NOR WORSE* at making good policy decisions about that subject than other people".lower()
            if ans_ == "Lost more than it has gained".lower():
                ans_ = "Lost more than it has gained because increased trade has cost jobs in manufacturing and other industries and lowered wages for some U.S. workers".lower()
            ans_ = ans_.replace(",", "").replace("in us laws", "in u.s. laws").replace("*bether*", "*better*").replace("beter", "better").replace("usually better ", "usually *better* ").replace("usually worse ", "usually *worse* ").strip().lower()
        else:
            ans_ = ans
        
        if not(ans_ in [x.replace("{", "").replace("}", "").replace("_", " ").replace(",", "").replace("  ", " ").lower().strip() for x in answers]):
            for a_ in answers:
                print(a_)
            print(row["answer"])
        
        assert  ans_ in [x.replace("_", " ").replace(",", "").replace("  ", " ").strip().lower() for x in answers] if int(i) not in [81, 82] else True, "answer not in answers"
        assert isinstance(int(ans), int) and int(ans) >=0 and int(ans) <=100  if int(i) in [81, 82] else True, "answer is int for 81 82"

        if int(i) not in [81, 82]:
            row["clean_answer"] = ans_ #ans.replace("{", "").replace("}", "").replace("_", " ").replace(",", " ").lower().replace("in us laws", "in u.s. laws").replace("beter", "better").strip().lower()
        else: 
            row["clean_answer"] = ans
    assert (validated), "not validated"
    row["valid"] =  validated
    return row


def analyze_pew_quiz_iterations(iterations):
    df_lst = []
    for f in tqdm(iterations):
        df_ = pd.read_csv(f)
        assert len(df_["iteration"].unique()) == 1, "ONE iteration per file failed"
        assert len(df_["question"].unique()) >=17, "Less than 17 questions"

        df_ = df_.apply(_verify_llm_answer, axis=1)
        df_lst.append(df_[["id", "iteration", "clean_answer"]])

    df_all_iterations = pd.concat(df_lst, ignore_index=True)
    assert len(df_all_iterations["iteration"].unique()) == 30, "should be 30 iterations"

    df_all_iterations = df_all_iterations.sort_values(["id", "iteration"])

    concat_question_stats = []

    for question, question_df in df_all_iterations.groupby(["id"]):
        # print(question_df["clean_answer"].value_counts().to_dict())
        dict_ = question_df["clean_answer"].value_counts().to_dict()
        majority = max(dict_, key=lambda key: dict_[key])
        concat_question_stats.append({
            "question": int(question),
            "answer_count_dict": question_df["clean_answer"].value_counts().to_dict(),
            "majority_answer" : majority
        })
        
    concat_question_df = pd.DataFrame(concat_question_stats)
    return concat_question_df

#
#If America is too open to people from all over the world, we risk losing our identity as a nation
#If America is too open to people from all over the world, we risk losing our identity as a nation

In [None]:
processed_pew_df = process_pew_quiz_iterations()

In [254]:
processed_pew_df["ideology"].unique()

array(['Faith and Flag Conservative', 'Progressive Left', 'American',
       'None'], dtype=object)

In [270]:

m= "open-mixtral-8x7b" #"open-mixtral-8x7b"
i = "Progressive Left"#"American" # American # Faith and Flag Conservative
p = "role_play_prompt_4" #role_play_prompt_3

def _get_majority(row):
    counts = ast.literal_eval(row["answer_count_dict"])
    if any(x for x in counts.values()):
        
    return row.count('1') -
_df = processed_pew_df[(processed_pew_df["model"]== m) & 
                               (processed_pew_df["ideology"]== i) &
                               (processed_pew_df["prompt_type"]== p)]
_df = _df[["question", "majority_answer", "answer_count_dict", "model", "prompt_type", "ideology"]]
_df

Unnamed: 0,question,majority_answer,answer_count_dict,model,prompt_type,ideology
305,1,a bigger government providing more services,{'a bigger government providing more services'...,open-mixtral-8x7b,role_play_prompt_4,Progressive Left
306,2,america's openness to people from all over the...,"{""america's openness to people from all over t...",open-mixtral-8x7b,role_play_prompt_4,Progressive Left
307,3,usually *better* at making good policy decisio...,{'usually *better* at making good policy decis...,open-mixtral-8x7b,role_play_prompt_4,Progressive Left
308,4,lost more than it has gained because increased...,{'lost more than it has gained because increas...,open-mixtral-8x7b,role_play_prompt_4,Progressive Left
309,5,a lot,{'a lot': 30},open-mixtral-8x7b,role_play_prompt_4,Progressive Left
310,6,business corporations make too much profit,{'business corporations make too much profit':...,open-mixtral-8x7b,role_play_prompt_4,Progressive Left
311,7,not at all,{'not at all': 30},open-mixtral-8x7b,role_play_prompt_4,Progressive Left
312,9,there are other countries that are better than...,{'there are other countries that are better th...,open-mixtral-8x7b,role_play_prompt_4,Progressive Left
313,11,there is at least one candidate who shares mos...,{'there is at least one candidate who shares m...,open-mixtral-8x7b,role_play_prompt_4,Progressive Left
314,12,a great deal,{'a great deal': 30},open-mixtral-8x7b,role_play_prompt_4,Progressive Left


In [269]:

for _, row in _df.iterrows():
    print(f"Ques. {row['question']}: {row['majority_answer']}")
    print("---")
    for k, v in ast.literal_eval(row["answer_count_dict"]).items():
        print(f"{k}: {v}")
    print("---")
    print()

Ques. 1: a bigger government providing more services
---
a bigger government providing more services: 30
---

Ques. 2: america's openness to people from all over the world is essential to who we are as a nation
---
america's openness to people from all over the world is essential to who we are as a nation: 30
---

Ques. 3: usually *better* at making good policy decisions about that subject than other people
---
usually *better* at making good policy decisions about that subject than other people: 30
---

Ques. 4: lost more than it has gained because increased trade has cost jobs in manufacturing and other industries and lowered wages for some u.s. workers
---
lost more than it has gained because increased trade has cost jobs in manufacturing and other industries and lowered wages for some u.s. workers: 30
---

Ques. 5: a lot
---
a lot: 30
---

Ques. 6: business corporations make too much profit
---
business corporations make too much profit: 30
---

Ques. 7: not at all
---
not at all: 

In [None]:
for q_, q_df in none_gpt_df.groupby(["question"]):
    assert len(q_df["majority_answer"].unique()) ==1
    print("GREAT")