# Load description for each variable in each pair

Recreates and extends analysis from https://github.com/amit-sharma/chatgpt-causality-pairs
Focuses on analysis of the Tübingen dataset from https://webdav.tuebingen.mpg.de/cause-effect/

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
pip install '/content/drive/MyDrive/pywhy-llm'

In [None]:
pip install guidance

In [None]:
pip install python-dotenv

In [31]:
from dotenv import load_dotenv
from typing import Dict, List, Tuple
import guidance
import os

load_dotenv()

os.environ["OPENAI_API_KEY"] = ''

In [None]:
from pywhyllm.suggesters.tuebingen_model_suggester import TuebingenModelSuggester, Strategy
modeler = TuebingenModelSuggester('gpt-4')

In [28]:
import pandas as pd

In [29]:
df = pd.read_csv('/content/drive/MyDrive/pywhy-llm/pywhyllm/tuebingen_pairs.csv')

In [None]:
df

# Get relationship of each variable pair

In [15]:
llm_output : Dict[str, dict] = {}

####  Variables + Straight Strategy

In [None]:
for pair_number, values in df.iterrows():

        temp_dict = {}


        temp_dict['llm_ab'] = modeler.suggest_relationship(variable_a=values['var1'], variable_b=values['var2'], description_a=values['var1_desc'], description_b=values['var2_desc'], strategy=Strategy.Straight)

        temp_dict['llm_ba'] = modeler.suggest_relationship(variable_a=values['var2'], variable_b=values['var1'], description_a=values['var2_desc'], description_b=values['var1_desc'], strategy=Strategy.Straight)

        llm_output[(pair_number, temperature, n)] = temp_dict

##### Average LLM Output

In [None]:
av_ab = 0
av_ba = 0

for i in range(5):
    av_ab += llm_output[('pair0087', 0.3, i+1)]['llm_ab']
    av_ba += llm_output[('pair0087', 0.3, i+1)]['llm_ba']

    print(llm_output[('pair0087', 0.3, i+1)]['llm_ab'])
    print(llm_output[('pair0087', 0.3, i+1)]['llm_ba'])

av_ab = av_ab/5.0
av_ba = av_ba/5.0

print(av_ab)
print(av_ba)

In [None]:
for id in saved_pairs_info:

    av_correct_ab = 0
    av_correct_ba = 0

    for i in range(5):
        print(llm_output[(id, 0.3, i+1)]['llm_ab'])

In [None]:
results : Dict = {}

for id in saved_pairs_info:

    av_correct_ab = 0
    av_correct_ba = 0

    for i in range(5):

        if llm_output[(id, 0.3, i+1)]['llm_ab'] == 1 and saved_pairs_info[id]['ground_truth'] == " R":
            av_correct_ab += 1
        elif llm_output[(id, 0.3, i+1)]['llm_ab'] == 0 and saved_pairs_info[id]['ground_truth'] == " L":
            av_correct_ab += 1

        if llm_output[(id, 0.3, i+1)]['llm_ba'] == 1 and saved_pairs_info[id]['ground_truth'] == " L":
            av_correct_ba += 1
        elif llm_output[(id, 0.3, i+1)]['llm_ba'] == 0 and saved_pairs_info[id]['ground_truth'] == " R":
            av_correct_ba += 1

    av_correct_ab /= 5.0
    av_correct_ba /= 5.0

    temp : Dict = {}

    temp['PairID'] = id
    temp['CorrectACauseB'] = av_correct_ab
    temp['CorrectBCauseA'] = av_correct_ba
    temp['VarA'] = saved_pairs_info[id]['var1']
    temp['VarB'] = saved_pairs_info[id]['var2']
    temp['GroundTruth'] = saved_pairs_info[id]['ground_truth']

    results[id] = temp
    print(results[id])




#### Save to csv file

In [None]:
import csv
import copy

# CSV file name
csv_file = "gpt-4_results_straight_prompt_w_descriptions.csv"

# Define the CSV file's header (column names)
header = ["CorrectACauseB", "CorrectBCauseA", "PairID", "VarA", "VarB", "GroundTruth"]

# Write the data to the CSV file
with open(csv_file, mode="w", newline="", encoding="utf-8") as file:
    writer = csv.DictWriter(file, fieldnames=header)
    writer.writeheader()
    for pair_id, values in results.items():
        writer.writerow(values)

print(f"CSV file '{csv_file}' has been created.")
