# Causal Reasoning In Large Language Models: CLadder


#### 1. Data Preparation


In [1]:
import pandas as pd
import json

dataset_path = "../data/cladder/cladder-v1-q-commonsense.json"
with open(dataset_path, "r") as f:
    data = json.load(f)
    
df = pd.DataFrame(data)
df.rename(columns={'given_info': 'info'}, inplace=True)

df = df[df['meta'].apply(lambda x: x.get('query_type') != 'backadj')].reset_index(drop=True)


In [2]:
df.iloc[0]['question']


'Will alarm set by husband increase the chance of ringing alarm?'

In [3]:
df.head(2)

Unnamed: 0,question_id,desc_id,info,question,answer,meta,reasoning
0,0,alarm-mediation-ate-model0-spec0-q0,"For husbands that don't set the alarm, the pro...",Will alarm set by husband increase the chance ...,yes,"{'story_id': 'alarm', 'graph_id': 'mediation',...",{'step0': 'Let X = husband; V2 = wife; Y = ala...
1,1,alarm-mediation-ate-model0-spec0-q1,"For husbands that don't set the alarm, the pro...",Will alarm set by husband decrease the chance ...,no,"{'story_id': 'alarm', 'graph_id': 'mediation',...",{'step0': 'Let X = husband; V2 = wife; Y = ala...


In [4]:
df.iloc[100]['meta']

{'story_id': 'alarm',
 'graph_id': 'mediation',
 'mediators': ['V2'],
 'polarity': False,
 'groundtruth': 0.35250941023943905,
 'query_type': 'nde',
 'rung': 3,
 'formal_form': 'E[Y_{X=1, V2=0} - Y_{X=0, V2=0}]',
 'given_info': {'p(Y | X, V2)': [[0.07539329207269328, 0.5003865412696569],
   [0.3886361970461835, 0.8599843795718491]],
  'p(V2 | X)': [0.8470836315848196, 0.289175632189055]},
 'estimand': '\\sum_{V2=v} P(V2=v|X=0)*[P(Y=1|X=1,V2=v) - P(Y=1|X=0, V2=v)]',
 'treatment': 'X',
 'outcome': 'Y',
 'model_id': 22}

In [5]:
index = 6330

print('Info: ', df.iloc[index]['info'])
print('Question: ', df.iloc[index]['question'])
print('Answer: ', df.iloc[index]['answer'])
print('Graph ID: ', df.iloc[index]['meta']['graph_id'])
print('Query type: ', df.iloc[index]['meta']['query_type'])
print('Rung: ', df.iloc[index]['meta']['rung'])
print('Formal form: ', df.iloc[index]['meta']['formal_form'])
print('Reasoning: ' , df.iloc[index]['reasoning'])

Info:  The overall probability of intelligent parents is 5%. The probability of unintelligent parents and intelligent child is 68%. The probability of intelligent parents and intelligent child is 3%.
Question:  Is the chance of intelligent child smaller when observing intelligent parents?
Answer:  yes
Graph ID:  arrowhead
Query type:  correlation
Rung:  1
Formal form:  P(Y | X)
Reasoning:  {'step0': "Let V2 = other unobserved factors; X = parents' intelligence; V3 = parents' social status; Y = child's intelligence.", 'step1': 'X->V3,V2->V3,X->Y,V2->Y,V3->Y', 'step2': 'P(Y | X)', 'step3': 'P(X = 1, Y = 1)/P(X = 1) - P(X = 0, Y = 1)/P(X = 0)', 'step4': 'P(X=1=1) = 0.05\nP(Y=1, X=0=1) = 0.68\nP(Y=1, X=1=1) = 0.03', 'step5': '0.03/0.05 - 0.68/0.95 = -0.14', 'end': '-0.14 < 0'}


In [6]:
df['meta'][3]

{'story_id': 'alarm',
 'graph_id': 'mediation',
 'mediators': ['V2'],
 'polarity': False,
 'groundtruth': -0.2305349321780112,
 'query_type': 'nie',
 'rung': 3,
 'formal_form': 'E[Y_{X=0, V2=1} - Y_{X=0, V2=0}]',
 'given_info': {'p(Y | X, V2)': [[0.08430222457648505, 0.5394610521458689],
   [0.4061509701126924, 0.8620283206949241]],
  'p(V2 | X)': [0.7416866188819116, 0.23519324071521291]},
 'estimand': '\\sum_{V2 = v} P(Y=1|X =0,V2 = v)*[P(V2 = v | X = 1) − P(V2 = v | X = 0)]',
 'treatment': 'X',
 'outcome': 'Y',
 'model_id': 0}

In [7]:
df_new = df.copy()
meta_df = df_new['meta'].apply(pd.Series)
meta_df
df_new = pd.concat([df_new, meta_df], axis = 1)
df_new = df_new.drop('meta', axis = 1)
df_new.rename(columns={'given_info': 'given_info_meta', 'given_info': 'given_info'}, inplace=True)

In [8]:
df_new['query_type'].unique()

array(['ate', 'ett', 'nie', 'nde', 'marginal', 'correlation', 'exp_away',
       'collider_bias', 'det-counterfactual'], dtype=object)

In [9]:
df_new.columns

Index(['question_id', 'desc_id', 'info', 'question', 'answer', 'reasoning',
       'story_id', 'graph_id', 'treated', 'result', 'polarity', 'groundtruth',
       'query_type', 'rung', 'formal_form', 'given_info', 'estimand',
       'treatment', 'outcome', 'model_id', 'mediators', 'baseline', 'collider',
       'action'],
      dtype='object')

In [10]:
df_sampled = df_new.sample(n = 1000, random_state=25)
print(len(df_sampled))

1000


In [12]:
print(df_sampled.head(10))

      question_id                                            desc_id  \
297           915  firing_employee-diamondcut-marginal-model79-sp...   
4914        15784       vaccine_kills-diamond-nie-model4119-spec5-q0   
7222        23367  smoke_birthWeight-arrowhead-ett-model4780-spec...   
3318        10711  smoke_birthWeight-arrowhead-nde-model936-spec6-q1   
7091        22933  simpson_kidneystone-confounding-ett-model4739-...   
3787        12133   smoking_tar_cancer-chain-ate-model1046-spec26-q1   
3466        11200  smoking_frontdoor-frontdoor-nie-model973-spec1...   
2392         7802  nature_vs_nurture-arrowhead-nie-model677-spec1...   
2716         8852  orange_scurvy-chain-correlation-model752-spec2-q0   
8306        25842  orange_scurvy-chain-det-counterfactual-model33...   

                                                   info  \
297   The overall probability of manager signing the...   
4914  For unvaccinated individuals, the probability ...   
7222  For infants with nonsmok

In [11]:
column_names = ['answer', 'query_type', 'answer', 'graph_id', 'rung', 'query_type', 'story_id', 'polarity']

for column_name in column_names:
    print(df_sampled[column_name].value_counts())
    print(df_new[column_name]. value_counts())
    print('----------------------------------')

answer
no     504
yes    496
Name: count, dtype: int64
answer
yes    4345
no     4345
Name: count, dtype: int64
----------------------------------
query_type
marginal              209
ate                   174
correlation           174
ett                   138
det-counterfactual     95
nie                    92
nde                    73
collider_bias          23
exp_away               22
Name: count, dtype: int64
query_type
marginal              1702
ate                   1518
correlation           1518
ett                   1288
nie                    874
det-counterfactual     870
nde                    552
exp_away               184
collider_bias          184
Name: count, dtype: int64
----------------------------------
answer
no     504
yes    496
Name: count, dtype: int64
answer
yes    4345
no     4345
Name: count, dtype: int64
----------------------------------
graph_id
mediation      197
arrowhead      188
confounding    106
diamond        105
IV             102
chain           

#### 2. Add columns for each model to dataframe for storing results

In [13]:
df_cladder = df_sampled.copy()

In [14]:
from utils import add_columns_to_dataframe, generate_results, generate_results_per_rung


model_names = ['gpt-4o-zeroshot', 'gpt-4o-ccot']
df_cladder = add_columns_to_dataframe(df_cladder, model_names)

#### 3. Run models

In [15]:
from openai import OpenAI
from constants import DEEPINFRA_API_KEY, OPENAI_API_KEY
from model_inference import initialize_openai_client, run_model_on_cladder

client = initialize_openai_client(api_key=OPENAI_API_KEY, base_url='https://api.openai.com/v1/')


In [16]:
run_model_on_cladder(df=df_cladder, 
                     output_column='gpt-4o-zeroshot', 
                     model='gpt-4o', 
                     method_name='zero_shot', 
                     info_column='info', 
                     question_column='question', 
                     temperature=1.0, 
                     overwrite=True, 
                     min_range=0, 
                     max_range=1000, 
                     client=client)

(LOG) Prompt Question:  The overall probability of manager signing the termination letter is 39%. For managers who don't sign termination letters, the probability of employee being fired is 22%. For managers who sign termination letters, the probability of employee being fired is 60%. Is employee being fired less likely than employee not being fired overall? Let's think step by step. Answer with 'yes' or 'no' at the end.
(LOG) Correct answer:  yes
(LOG) Prompt Answer:  To determine whether the employee being fired is less likely than not being fired overall, we need to consider both scenarios: when a manager signs the termination letter and when they don't. Then we should calculate the combined probability of the employee being fired.

Let's break it down step by step:

1. **Probability of a manager signing the termination letter:**  
   \( P(\text{Sign}) = 0.39 \)

2. **Probability of a manager not signing the termination letter:**  
   \( P(\text{Not Sign}) = 1 - P(\text{Sign}) = 0.6

In [17]:
run_model_on_cladder(df=df_cladder, 
                     output_column='gpt-4o-ccot', 
                     model='gpt-4o', 
                     method_name='causal_chain_of_thought', 
                     info_column='info', 
                     question_column='question', 
                     temperature=1.0, 
                     overwrite=True, 
                     min_range=0, 
                     max_range=1000, 
                     client=client)

(LOG) Prompt Question:  The overall probability of manager signing the termination letter is 39%. For managers who don't sign termination letters, the probability of employee being fired is 22%. For managers who sign termination letters, the probability of employee being fired is 60%. Is employee being fired less likely than employee not being fired overall?
Guidance: Address the question by following the steps below:
Step 1) Extract the causal graph: Identify the causal graph that depicts the relationships in the scenario.
The diagram should simply consist of edges denoted in "var1 -> var2" format, separated by commas.
Step 2) Determine the query type: Identify the type of query implied by the main question. Choices
include "marginal probability", "conditional probability", "explaining away effect", "backdoor adjustment set", "average treatment effect", "collider bias", "normal counterfactual question", "average
treatment effect on treated", "natural direct effect" or "natural indirec

In [18]:
import datetime

# Save pickle
df_cladder.to_pickle('../data/log/cladder-openai-zeroshot-ccot' + '-' + datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + '.pkl')

# Read pickle
#unpickled_df = pd.read_pickle('./cladder.pkl')
#df_cladder = unpickled_df

In [19]:
generate_results(df_cladder, 'answer', model_names)

Accuracy for gpt-4o-zeroshot : 85.30%
Accuracy for gpt-4o-ccot : 81.10%


In [None]:
generate_results_per_rung(df_cladder, 'answer', model_names)

### 4. Run the experiments with the perturbed datasets

In [20]:
import pickle
# Read pickle

df_cladder_nonsensical = pd.read_pickle('../data/cladder/nonsensical-data.pkl')
df_cladder_anticommonsensical = pd.read_pickle('../data/cladder/anticommonsensical-data.pkl')

In [21]:
df_cladder_nonsensical = add_columns_to_dataframe(df_cladder_nonsensical, model_names)
df_cladder_anticommonsensical = add_columns_to_dataframe(df_cladder_anticommonsensical, model_names)

In [None]:
run_model_on_cladder(df=df_cladder_nonsensical, 
                     output_column='gpt-4o-zeroshot', 
                     model='gpt-4o', 
                     method_name='zero_shot', 
                     info_column='nonsensical_info', 
                     question_column='nonsensical_question', 
                     temperature=1.0, 
                     overwrite=True, 
                     min_range=0, 
                     max_range=1000, 
                     client=client)

(LOG) Prompt Question:  the overall probability of asdf signing the termination letter is 39%. for asdf who don't sign termination letters, the probability of ghjk being fired is 22%. for asdf who sign termination letters, the probability of ghjk being fired is 60%. is ghjk being fired less likely than ghjk not being fired overall? Let's think step by step. Answer with 'yes' or 'no' at the end.
(LOG) Correct answer:  yes
(LOG) Prompt Answer:  To determine whether the probability of ghjk being fired is less than ghjk not being fired overall, we can break down the problem and calculate the probabilities step by step.

1. **Probability of asdf signing the termination letter (S):** 39% or 0.39.

2. **Probability of asdf not signing the termination letter (Not S):** 1 - 0.39 = 0.61.

3. **Probability of ghjk being fired given that asdf signs the termination letter (P(Fired | S)):** 60% or 0.60.

4. **Probability of ghjk being fired given that asdf does not sign the termination letter (P(Fir

In [None]:
run_model_on_cladder(df=df_cladder_nonsensical, 
                     output_column='gpt-4o-ccot', 
                     model='gpt-4o', 
                     method_name='causal_chain_of_thought', 
                     info_column='nonsensical_info', 
                     question_column='nonsensical_question', 
                     temperature=1.0, 
                     overwrite=True, 
                     min_range=0, 
                     max_range=1000, 
                     client=client)

In [None]:
generate_results(df_cladder_nonsensical, 'answer', model_names)
generate_results_per_rung(df_cladder_nonsensical, 'answer', model_names)

In [45]:
import datetime

df_cladder_nonsensical.to_pickle('../data/log/cladder-openai-o3-mini-nonsensical' + '-' + datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + '.pkl')
#df_cladder_nonsensical.to_pickle('../data/log/cladder-openllms-nonsensical' + '-' + datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + '.pkl')

In [None]:
run_model_on_cladder(df=df_cladder_anticommonsensical, 
                     output_column='o3-mini', 
                     model='o3-mini', 
                     method_name='input_output', 
                     info_column='anticommonsensical_info', 
                     question_column='anticommonsensical_question', 
                     temperature=1.0, 
                     overwrite=True, 
                     min_range=0, 
                     max_range=1000, 
                     client=client)

In [None]:
generate_results(df_cladder_nonsensical, 'answer', model_names)
generate_results_per_rung(df_cladder_nonsensical, 'answer', model_names)

In [46]:
df_cladder_anticommonsensical.to_pickle('../data/log/cladder-openai-o3-mini-anticommonsensical' + '-' + datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + '.pkl')
