In [1]:
print('h')

h


In [1]:
from generative_social_choice.objects.comparers import BasicComparer
from utils.gpt_wrapper import  prompt_gpt
from tqdm import tqdm
import pandas as pd
import random
import itertools

from datasets import load_dataset


ModuleNotFoundError: No module named 'generative_social_choice'

In [None]:
!pip install datasets

In [8]:

# Load dataset
sts_dataset = load_dataset('stsb_multi_mt', 'en')

# Filter out short sentences
filtered_data = [entry for entry in sts_dataset['train'] if len(entry['sentence1']) >= 100 and len(entry['sentence2']) >= 100]

# Create combinations
combinations = list(itertools.combinations(filtered_data, 3))

# Prepare data for DataFrame
data_for_df = []

for comb in tqdm(combinations, desc="Processing combinations"):
    center = comb[0]['sentence1']

    # Randomly assign stmt_0 and stmt_1
    if random.choice([True, False]):
        stmt_0, stmt_1 = comb[1]['sentence1'], comb[2]['sentence1']
        dist_0, dist_1 = 5 - comb[1]['similarity_score'], 5 - comb[2]['similarity_score']
    else:
        stmt_0, stmt_1 = comb[2]['sentence1'], comb[1]['sentence1']
        dist_0, dist_1 = 5 - comb[2]['similarity_score'], 5 - comb[1]['similarity_score']

    closest_index = 0 if dist_0 < dist_1 else 1

    data_for_df.append([center, stmt_0, stmt_1, dist_0, dist_1, closest_index])


# Convert to DataFrame
df = pd.DataFrame(data_for_df, columns=['statement_center', 'statement_0', 'statement_1', 'dist_center_0', 'dist_center_1', 'inx_closes'])


Processing combinations: 100%|██████████| 27427674/27427674 [00:30<00:00, 890591.62it/s] 


In [9]:
clean_data = df[
    (df['dist_center_0'] < 0.2) & (df['dist_center_1'] <0.2) & (df['dist_center_0'] != df['dist_center_1'])
    & (df['dist_center_0'] > 0.0) & (df['dist_center_1'] > 0.0)
]

In [10]:
print(clean_data.shape)
print(clean_data.describe())

(1210, 6)
       dist_center_0  dist_center_1   inx_closes
count    1210.000000    1210.000000  1210.000000
mean        0.190866       0.191134     0.492562
std         0.009003       0.009003     0.500151
min         0.182000       0.182000     0.000000
25%         0.182000       0.182000     0.000000
50%         0.182000       0.200000     0.000000
75%         0.200000       0.200000     1.000000
max         0.200000       0.200000     1.000000


In [None]:
## Test

In [21]:

comparer = BasicComparer(id=0, prompt_type="basic")
prompt_templates = comparer.prompt_templates

# Storing the results
results_data = []

for index, row in tqdm(clean_data.iterrows(), desc="Processing dataframe rows", total=clean_data.shape[0]):
    for prompt_type, template in prompt_templates.items():
        center_agent = row['statement_center']
        agent0 = row['statement_0']
        agent1 = row['statement_1']

        prompt = template.format(center_agent_description=center_agent, other_agents_description=f"agent0: {agent0}  \n agent1: {agent1}")

        response = prompt_gpt(model='gpt-4', prompt=prompt)

        # Check if the response is "correct"
        try:
            response_int = int(response)
            correct = 1 if response_int == row['inx_closes'] else 0
        except ValueError:
            correct = 0

        # Append to results
        results_data.append([prompt_type, response, center_agent, agent0, agent1, row['dist_center_0'], row['dist_center_1'], row['inx_closes'], correct])

    # Save results every n iterations
    n = 20
    if (index + 1) % n == 0:
        # Create a temporary dataframe and save
        temp_df = pd.DataFrame(results_data, columns=['prompt_type', 'response', 'statement_center', 'statement_0', 'statement_1', 'dist_center_0', 'dist_center_1', 'inx_closes', 'correct'])
        temp_df.to_csv('results.csv', index=False)

# After the loop, save the complete results (in case the total number of iterations is not a multiple of 100)
results_df = pd.DataFrame(results_data, columns=['prompt_type', 'response', 'statement_center', 'statement_0', 'statement_1', 'dist_center_0', 'dist_center_1', 'inx_closes', 'correct'])
results_df.to_csv('results.csv', index=False)

Processing dataframe rows: 100%|██████████| 1210/1210 [8:28:44<00:00, 25.23s/it]     


prompt_type
CoT               0.099174
basic             0.236364
few_shot          0.357025
CoT_w_few_shot    0.383471
Name: correct, dtype: float64

In [96]:
def extract_response(row):
    try:
        # Try to convert the entire response to an int
        return int(row['response'])
    except ValueError:
        # If that fails, try the last 3 chars with specified replacements
        try:
            cleaned_response = row['response'][-3:].replace('.', '').replace(' ', '').replace('"', '')
            return int(cleaned_response)
        except:
            return row['response']

results_df['extracted_response'] = results_df.apply(extract_response, axis=1)
results_df['correct'] = (results_df['extracted_response'] == results_df['inx_closes']).astype(int)

In [98]:
results_df.groupby('prompt_type').correct.mean().sort_values()

prompt_type
CoT               0.285950
basic             0.347107
few_shot          0.403306
CoT_w_few_shot    0.440496
Name: correct, dtype: float64

In [100]:
results_df.to_csv('comparer_res.csv', index=False)

In [111]:
# Define a function to check if a group has at least one integer in 'extracted_response'
def has_integer(group):
    return group['extracted_response'].apply(lambda x: isinstance(x, int)).any()

# Filter groups using the function
filtered_groups = results_df.groupby(['statement_center', 'statement_0', 'statement_1']).filter(has_integer)

# This filtered_groups dataframe now contains only those groups where at least one row has an integer 'extracted_response'

In [114]:
sorted_means = filtered_groups.groupby('prompt_type').correct.mean().sort_values()

print(sorted_means)

prompt_type
CoT               0.296741
basic             0.360206
few_shot          0.418525
CoT_w_few_shot    0.457118
Name: correct, dtype: float64


In [124]:
print(filtered_groups[filtered_groups.prompt_type == 'CoT'].response.iloc[6]
)

Neither of the provided opinions from agent0 and agent1 directly relate to the opinions of the Reference Person, which deal with a political situation in Myanmar. Both agents express opinions on different topics: agent0 on a workshop for anti-terrorism experts, and agent1 on contraction and expansion in the manufacturing sector.

Thus, neither of the agents' opinions are particularly closer to the Reference Person, but as per the instructions, a best guess must be made, so let's choose:

0


In [123]:
filtered_groups[filtered_groups.prompt_type == 'CoT'].iloc[6]

prompt_type                                                         CoT
response              Neither of the provided opinions from agent0 a...
statement_center      Myanmar's pro-democracy leader Aung San Suu Ky...
statement_0           the riyadh-based naif arab academy for securit...
statement_1           A number below 50 suggests contraction in the ...
dist_center_0                                                       0.2
dist_center_1                                                     0.182
inx_closes                                                            1
correct                                                               0
extracted_response                                                    0
Name: 46, dtype: object