# Shared Task: Critical thinking generation
## Loadind Data


In [None]:
pip install -q social_agents/

In [None]:
pip show social_agents

In [None]:
import tqdm
# Reading the data
import json
import pandas as pd
import social_agents

from social_agents.utils import get_st_data


for key, line in tqdm.tqdm(get_st_data("sample").items()):
    print(key)

    print(line['intervention'])
    input_text = line['intervention']
    break


In [None]:
%%capture --no-stderr
%pip install --quiet -U langchain_openai langchain_core langchain_community tavily-python

## Prerequisites
env, LLM, ...

In [None]:
import os, getpass
def _set_env(var: str):
    if not os.environ.get(var):
        os.environ[var] = getpass.getpass(f"{var}: ")

_set_env("OPENAI_API_KEY")
_set_env("LANGCHAIN_API_KEY")
_set_env("MISTRAL_API_KEY")

os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = "shared_task_critical_questions"


##  1. Zero Shot LLM to start with o3-mini

In [None]:

from social_agents.graph_tools import BasicCQModel
model_name = "o3-mini"

basic_agent = BasicCQModel(llm_name = model_name, temperature=None ) #interrupt_before=['human_feedback'], checkpointer=memory)


display(Image(basic_agent.graph.get_graph(xray=1).draw_mermaid_png()))

# RUN
# basic_agent.run_experiment()



### Evaulate 

- Run shell 

```shell

python3 eval_scripts/evaluation.py \
    --metric similarity \
    --input_path data_splits/validation.json \
    --submission_path output/elbaff_experiment/output_gpt-4o-mini-2024-07-18social_n2_Te_Sr.json \
    --threshold 0.6 

```

- OUTPUT : output/output_o3-mini_temperatureNA._eval_similarity_06.json



**Overall count**

| **Questions Labels** |  **#**  | **ratio** |
|:--------------------|-------:|---------:|
| useful               | **329** |  **0,59** |
| unhelpful            |      63 |      0,11 |
| Invalid              |       8 |      0,01 |
| Not able to evaluate |     158 |      0,28 |
| **Total**            | **558** |         1 |


**Overall count within each argument**

| **n/3 useful questions per arg** | **# of arguments** | **ratio** |
|:-------------------------------:|-------------------:|:---------:|
|                               0/3 |                 17 |      0,10 |
|                               1/3 |                 51 |      0,27 |
|                               2/3 |             **76** |      0,40 |
|                               3/3 |                 42 |      0,23 |
| **Total**                       |            **186** |         1 |

## Social Agents


In [None]:
import pandas as pd
import os

from social_agents.graph_tools import  CQSTAbstractAgent, SocialAgentBuilder

EXPERIMENT_SETTINGS_FILE_PATH = "output/elbaff_experiment/experiment_settings.csv"
override = False
if not os.path.exists(EXPERIMENT_SETTINGS_FILE_PATH) or override:
    print("generating exp settings file")
    exps_df = SocialAgentBuilder._generate_experiment_settings()
    exps_df.to_csv(EXPERIMENT_SETTINGS_FILE_PATH, index=False)

experiment_settings = pd.read_csv(EXPERIMENT_SETTINGS_FILE_PATH)
experiment_settings.head()

# exp_row = experiment_settings.iloc[20].to_dict()
# exp_row

In [2]:
import ast
from social_agents import objects
import ast
from tqdm import tqdm
from IPython.display import Image, display

llm_name = "gpt-4o-mini-2024-07-18"# "o3-mini-2025-01-31"
llm_name_short = "gpt-4o-mini_"
temperature = 0.8
for idx, row in tqdm(experiment_settings.iterrows(), total=len(experiment_settings)):
    exp_name = row["experiment_name"].format(llm_name= llm_name_short)
    if os.path.exists(f"{SocialAgentBuilder.ROOT_FOLDER}output_{exp_name}.json"):
        print("Experiment ", exp_name, "already done!")
        continue
    if "easy_going" in list(ast.literal_eval(row["traits"])):
        print("="*20)
        print(row.to_dict)
        print("EXPERIMENT NAME: ", exp_name)
        print("="*20)
        
        social_agent = SocialAgentBuilder(
            model_thread_id=row["thread_id"],
            llm_name = llm_name,
            llm_num = row["number_of_agents"],
            experiment_name= exp_name,
            temperature=temperature,
            collaborative_strategy=list(ast.literal_eval(row["strategies"])),
            agent_trait_lst=list(ast.literal_eval(row["traits"])))
        display(Image(social_agent.graph.get_graph(xray=1).draw_mermaid_png()))

        social_agent.run_experiment(data_type = "validation", save= True)
        print(f'finished {exp_name}')






Iteration TRUMP_93: 15.3562 seconds




Iteration TRUMP_95: 17.4069 seconds




Iteration Tuatho__98: 15.9765 seconds




Iteration Vec_164: 14.4269 seconds




Iteration Velshtein__565: 18.3240 seconds




Iteration Zewstain__641: 20.1814 seconds




Iteration aimwill_178: 17.6098 seconds




Iteration ambersky_145: 19.9614 seconds




Iteration annoyed_177: 22.7289 seconds




Iteration atraveller_82_1: 14.5996 seconds




Iteration atraveller_82_2: 21.8589 seconds




Iteration cd38_220_1: 16.3824 seconds




Iteration cd38_220_2: 14.8256 seconds




Iteration citizen-s_134: 20.0894 seconds




Iteration citizen-s_176: 14.4290 seconds




Iteration darawayne_121: 14.2469 seconds




Iteration darawayne_182: 22.2065 seconds




Iteration JDwyer_17: 18.1558 seconds




Iteration JJMurray_140: 14.9267 seconds




Iteration JJMurray_143: 14.6993 seconds




Iteration JJMurray_166: 21.9460 seconds




Iteration JJMurray_180: 16.0735 seconds




Iteration JL_15: 14.9590 seconds




Iteration JL_3: 16.9962 seconds




Iteration JL_5: 18.8353 seconds




Iteration JW_41: 12.8030 seconds




Iteration Javier_84: 17.2993 seconds




Iteration JetJock_156_1: 15.9740 seconds




Iteration JetJock_156_2: 18.2259 seconds




Iteration KHenrickson_172: 18.0273 seconds




Iteration MP_22: 17.9168 seconds




Iteration MP_48: 17.7088 seconds




Iteration MR_TELEVOID__178: 23.9564 seconds




Iteration MT_10: 15.6457 seconds




Iteration MT_40: 14.8651 seconds




Iteration Melanie_64: 14.6524 seconds




Iteration Mpogoda_184: 16.5774 seconds




Iteration MrFordization__82: 21.3873 seconds




Iteration Mulder_112: 15.4733 seconds




Iteration Mulder_226: 19.1580 seconds




Iteration Mulder_240: 16.5355 seconds


 60%|█████▉    | 111/186 [33:06<22:22, 17.90s/it]
  4%|▍         | 5/113 [1:52:28<40:29:26, 1349.69s/it]

Iteration ND_28: 1.9250 seconds





RateLimitError: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}