# Set Up

In [None]:
from dotenv import load_dotenv
import guidance
import os

load_dotenv()

endpoint = os.getenv("endpoint")
client_id = os.getenv("client_id")
scopes = os.getenv("scopes")
authority = os.getenv("authority")

In [None]:
from typing import Dict, Tuple, List
import csv

In [None]:
# llm = guidance.llms.MSALOpenAI(
#     "dev-chat-completion-gpt-35-turbo", #"text-davinci-003",
#     endpoint=endpoint, 
#     client_id=client_id,
#     scopes=[scopes],
#     authority=authority
#     )

In [None]:
import guidance
from typing import Set, Dict, Tuple

api_key = "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"
org = "org-vzLKqr2IxHajiWH6yz2Qzg64"

gpt4 = guidance.llms.OpenAI(api_key=api_key, model="gpt-4") #, organization=org)

### Modeler

In [None]:
from suggesters import ModelSuggester, ModelType, RelationshipStrategy
m = ModelSuggester()
model_type = ModelType.Chat
relationship_strategy = RelationshipStrategy.Parent

m.suggest_relationships()

In [None]:
from suggesters import ModelSuggester
import networkx as nx
import matplotlib.pyplot as plt

ms = ModelSuggester()

edges = ms.suggest_relationships(
    treatment=treatment, 
    outcome=outcome,    
    factors_list=variables_list, 
    llm=gpt4)

G = nx.Graph()
G.add(edges)
nx.draw(
    G, 
    with_labels=True, 
    font_weight='bold')
plt.show()

### Helper Method: Add to CSV

In [None]:
import csv

def add_to_csv(file_path, row):
    with open(file_path, mode='a', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(row)

### Helper Method: Ask LLM and Save to File

In [None]:
def ask_openai_and_save_to_csv(factors_list: List[str], edges: List[Tuple[str, str]], analysis_context, experts, treatment, outcome, llm: guidance.llms, file_name:str, model_type, relationship_strategy, temperatures=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]):

    for temperature in temperatures:
        sug_edges, edge_counter = m.suggest_relationships(
            analysis_context=analysis_context, 
            factors_list=factors_list, 
            experts=experts, 
            treatment=treatment, 
            outcome=outcome, 
            model_type=model_type,
            relationship_strategy=relationship_strategy, 
            llm=llm, 
            temperature=temperature
        )     

        information : Dict[Tuple[str, str], Dict[str, str]] = dict()
        edge_info: Dict[str, str]

        for var_a in factors_list:
            for var_b in factors_list:
                if var_a != var_b:
                    accuracy = ""
                    correct = ""
                    llm_sug = ""
                    truth = ""
                    # true positive
                    if (var_a, var_b) in sug_edges and (var_a, var_b) in edges:
                        accuracy = "TP"
                        correct = "1"
                        llm_sug = "1"
                        truth = "1"
                    # false positive
                    elif (var_a, var_b) in sug_edges and (var_a, var_b) not in edges:
                        accuracy = "FP"
                        correct = "0"
                        llm_sug = "1"
                        truth = "0"
                    # false negative
                    elif (var_a, var_b) not in sug_edges and (var_a, var_b) in edges:
                        accuracy = "FN"
                        correct = "0"  
                        llm_sug = "0"    
                        truth = "1"        
                    # true negative
                    elif (var_a, var_b) not in sug_edges and (var_a, var_b) not in edges:
                        accuracy = "TN"
                        correct = "1"
                        llm_sug = "0"
                        truth = "0"

                    new_row = [temperature, var_a, var_b, correct, llm_sug, truth, accuracy]
                    add_to_csv(file_name, new_row)



# Tuebingen Dataset

### Read from a csv file helper method

In [None]:
def read_csv_file(file_path):
    data = []
    with open(file_path, 'r', newline='') as csvfile:
        reader = csv.reader(csvfile)
        for row in reader:
            data.append(row)
    return data

### Read from a text file helper method

In [None]:
def read_context_to_dictionary(pairs_dict: Dict[str, Dict[str, str]]):    
    for pair_number in pairs_dict:
        file_name = 'notebooks/tuebingen_causality_pairs/pairs/' + pair_number + '_des.txt'
        try:
            with open(file_name, 'r') as file:
                metadata = file.read()
                pairs_dict[pair_number]["context"] = metadata 
        except FileNotFoundError:
            print(f"Warning: File '{file_name}' not found. Skipping...")
        except Exception as e:
            print(f"Error occurred while processing file '{file_name}': {str(e)}")
    
    return pairs_dict

### Load data

In [None]:
file_path = './notebooks/tuebingen_causality_pairs/tuebingen_pairs.csv'
rows = read_csv_file(file_path)

pairs_dict : Dict[str, Dict[str, str]] = {}  

# Print the pairs list
for values in rows: 
    pairs_dict[values[0]] = {"var1": values[1], "var2": values[2], "ground_truth": values[3]}

for pair in pairs_dict:
    print(pair)
    if pairs_dict[pair]["ground_truth"] == " R":
        print(pairs_dict[pair]["ground_truth"])
        pairs_dict[pair]["truth_ab"] = 1
        pairs_dict[pair]["truth_ba"] = 0
    elif pairs_dict[pair]["ground_truth"] == " L":
        print(pairs_dict[pair]["ground_truth"])
        pairs_dict[pair]["truth_ab"] = 0
        pairs_dict[pair]["truth_ba"] = 1 

read_context_to_dictionary(pairs_dict)

for key, value in pairs_dict.items():
    print(key, value)

In [None]:
csv_file_path = "tuebingen_davinci3.csv"
new_row = ["temperature", "use_domain", "use_context", "pair_id", "var1", "var2", "llm_ab", "llm_ba", 'correct_ab_sug', 'correct_ba_sug']
add_to_csv(csv_file_path, new_row)

# Sea Ice Dataset

In [None]:
sea_ice_variables = [
    "geopotential_heights", 
    "relative_humidity", 
    "sea_level_pressure",  
    "zonal_wind_at_10_meters", 
    "meridional_wind_at_10_meters", 
    "sensible_plus_latent_heat_flux", 
    "total_precipitation", 
    "total_cloud_cover", 
    "total_cloud_water_path",  
    "surface_net_shortwave_flux", 
    "surface_net_longwave_flux", 
    "northern_hemisphere_sea_ice_extent",
]

treatment = "surface_net_longwave_flux"
outcome = "northern_hemisphere_sea_ice_extent"

# ground truth confounders to the relationship between surface_net_longwave_flux and northern_hemisphere_sea_ice_extent
sea_ice_confounders = ["total_precipitation"]
   
sea_ice_relationships: List[Tuple[str, str]] = [
    ("surface_net_longwave_flux", "northern_hemisphere_sea_ice_extent"), 
    
    ("geopotential_heights", "surface_net_longwave_flux"), 
    ("geopotential_heights", "relative_humidity"), 
    ("geopotential_heights", "sea_level_pressure"), 

    ("relative_humidity", "total_cloud_cover"), 
    ("relative_humidity", "total_cloud_water_path"),
    ("relative_humidity", "total_precipitation"), 
    ("relative_humidity", "surface_net_longwave_flux"),

    ("sea_level_pressure", "relative_humidity"), 
    ("sea_level_pressure", "geopotential_heights"), 
    ("sea_level_pressure", "zonal_wind_at_10_meters"), 
    ("sea_level_pressure", "northern_hemisphere_sea_ice_extent"), 
    ("sea_level_pressure", "sensible_plus_latent_heat_flux"), 
    ("sea_level_pressure", "meridional_wind_at_10_meters"),

    ("zonal_wind_at_10_meters", "northern_hemisphere_sea_ice_extent"),
    ("zonal_wind_at_10_meters", "sensible_plus_latent_heat_flux"), 
    
    ("meridional_wind_at_10_meters", "northern_hemisphere_sea_ice_extent"),
    ("meridional_wind_at_10_meters", "sensible_plus_latent_heat_flux"), 
   
    ("sensible_plus_latent_heat_flux", "northern_hemisphere_sea_ice_extent"), 
    ("sensible_plus_latent_heat_flux", "sea_level_pressure"), 
    ("sensible_plus_latent_heat_flux", "zonal_wind_at_10_meters"), 
    ("sensible_plus_latent_heat_flux", "meridional_wind_at_10_meters"), 
    ("sensible_plus_latent_heat_flux", "total_precipitation"), 
    ("sensible_plus_latent_heat_flux", "total_cloud_cover"), 
    ("sensible_plus_latent_heat_flux", "total_cloud_water_path"), 
    
    ("total_precipitation", "northern_hemisphere_sea_ice_extent"),
    ("total_precipitation", "relative_humidity"),
    ("total_precipitation", "sensible_plus_latent_heat_flux"),
    ("total_precipitation", "surface_net_longwave_flux"),
    ("total_precipitation", "total_cloud_cover"),
    ("total_precipitation", "total_cloud_water_path"),
   
    ("total_cloud_water_path", "total_precipitation"), 
    ("total_cloud_water_path", "sensible_plus_latent_heat_flux"), 
    ("total_cloud_water_path", "relative_humidity"), 
    ("total_cloud_water_path", "surface_net_longwave_flux"), 
    ("total_cloud_water_path", "surface_net_shortwave_flux"), 
    
    ("total_cloud_cover", "total_precipitation"),
    ("total_cloud_cover", "sensible_plus_latent_heat_flux"),
    ("total_cloud_cover", "relative_humidity"),
    ("total_cloud_cover", "surface_net_longwave_flux"),
    ("total_cloud_cover", "surface_net_shortwave_flux"), 
    
    ("surface_net_shortwave_flux", "northern_hemisphere_sea_ice_extent"),
    
    ("northern_hemisphere_sea_ice_extent", "sea_level_pressure"),
    ("northern_hemisphere_sea_ice_extent", "zonal_wind_at_10_meters"),
    ("northern_hemisphere_sea_ice_extent", "meridional_wind_at_10_meters"),
    ("northern_hemisphere_sea_ice_extent", "sensible_plus_latent_heat_flux"),
    ("northern_hemisphere_sea_ice_extent", "surface_net_shortwave_flux"),
    ("northern_hemisphere_sea_ice_extent", "surface_net_longwave_flux"),
]

In [None]:
sea_ice_domain_experts_ycontext_file_path = "sea_ice_gpt4_askparents_domain_experts_ycontext.csv"
sea_ice_causality_experts_ycontext_file_path = "sea_ice_gpt4_askparents_causality_experts_ycontext.csv"
sea_ice_domain_experts_ncontext_file_path = "sea_ice_gpt4_askparents_domain_experts_ncontext.csv"
sea_ice_causality_experts_ncontext_file_path = "sea_ice_gpt4_askparents_causality_experts_ncontext.csv"

model_type = ModelType.Chat

analysis_context = "about the atmospheric, climate, and physical factors that influence and cause changes to the northern sea ice extent"
no_context = "causal mechanisms"

causal_experts = ["answering questions about causality, you are a helpful causality assistant ", "causality, you are an intelligent AI with expertise in causality"]

In [None]:
# expertises = m.suggest_domain_expertises(analysis_context=analysis_context, factors=sea_ice_variables, n_experts=5,  model_type=ModelType.Chat, llm=gpt4)

expertises = ["cliamte scientist", "atmospheric scientist"]

### Suggest Parents

#### (1) Using domain experts and context

In [None]:
new_row = ["temperature", "var1", "var2", "correct_sug", "llm_sug", "truth", "accuracy"]
add_to_csv(sea_ice_domain_experts_ycontext_file_path, new_row)

In [None]:
ask_openai_and_save_to_csv(factors_list=sea_ice_variables, edges=sea_ice_relationships, analysis_context=analysis_context, experts=expertises, treatment=treatment, outcome=outcome, llm=gpt4, file_name=sea_ice_domain_experts_ycontext_file_path, model_type=ModelType.Chat, relationship_strategy=RelationshipStrategy.Parent, temperatures=[0.3, 0.5, 0.7])

#### (2) Using causal experts and context

In [None]:
new_row = ["temperature", "var1", "var2", "correct_sug", "llm_sug", "truth", "accuracy"]
add_to_csv(sea_ice_causality_experts_ycontext_file_path, new_row)

In [None]:
ask_openai_and_save_to_csv(factors_list=sea_ice_variables, edges=sea_ice_relationships, analysis_context=analysis_context, experts=causal_experts, treatment=treatment, outcome=outcome, llm=gpt4, file_name=sea_ice_causality_experts_ycontext_file_path, model_type=ModelType.Chat, relationship_strategy=RelationshipStrategy.Parent, temperatures=[0.3, 0.5, 0.7])

#### (3) Using domain experts without context

In [None]:
new_row = ["temperature", "var1", "var2", "correct_sug", "llm_sug", "truth", "accuracy"]
add_to_csv(sea_ice_domain_experts_ncontext_file_path, new_row)

In [None]:
ask_openai_and_save_to_csv(factors_list=sea_ice_variables, edges=sea_ice_relationships, analysis_context=no_context, experts=expertises, treatment=treatment, outcome=outcome, llm=gpt4, file_name=sea_ice_domain_experts_ncontext_file_path, model_type=ModelType.Chat, relationship_strategy=RelationshipStrategy.Parent, temperatures=[0.7])     

#### (4) Using causal experts without context

In [None]:
new_row = ["temperature", "var1", "var2", "correct_sug", "llm_sug", "truth", "accuracy"]
add_to_csv(sea_ice_causality_experts_ncontext_file_path, new_row)

In [None]:
ask_openai_and_save_to_csv(factors_list=sea_ice_variables, edges=sea_ice_relationships, analysis_context=no_context, experts=causal_experts, treatment=treatment, outcome=outcome, llm=gpt4, file_name=sea_ice_causality_experts_ncontext_file_path, model_type=ModelType.Chat, relationship_strategy=RelationshipStrategy.Parent, temperatures=[0.7])

### Suggest Pairwise

In [None]:
sea_ice_pairwise_domain_experts_ycontext_file_path = "sea_ice_gpt4_pairwise_domain_experts_ycontext.csv"
sea_ice_pairwise_causality_experts_ycontext_file_path = "sea_ice_gpt4_pairwise_causality_experts_ycontext.csv"
sea_ice_pairwise_domain_experts_ncontext_file_path = "sea_ice_gpt4_pairwise_domain_experts_ncontext.csv"
sea_ice_pairwise_causality_experts_ncontext_file_path = "sea_ice_gpt4_pairwise_causality_experts_ncontext.csv"

model_type = ModelType.Chat
relationship_strategy = RelationshipStrategy.Pairwise

analysis_context = "about the atmospheric, climate, and physical factors that influence and cause changes to the northern sea ice extent"
no_context = "causal mechanisms"

causal_experts = ["answering questions about causality, you are a helpful causality assistant ", "causality, you are an intelligent AI with expertise in causality"]

expertises = ["climate scientist", "atmospheric scientist"]

In [None]:
new_row = ["temperature", "var1", "var2", "correct_sug", "llm_sug", "truth", "accuracy"]
add_to_csv(sea_ice_pairwise_domain_experts_ycontext_file_path, new_row)
add_to_csv(sea_ice_pairwise_causality_experts_ycontext_file_path, new_row)
add_to_csv(sea_ice_pairwise_domain_experts_ncontext_file_path, new_row)
add_to_csv(sea_ice_pairwise_causality_experts_ncontext_file_path, new_row)

In [None]:
ask_openai_and_save_to_csv(
    factors_list=sea_ice_variables, 
    edges=sea_ice_relationships, 
    experts=expertises,                # yes expert
    analysis_context=analysis_context, # yes context
    treatment=treatment, 
    outcome=outcome, 
    llm=gpt4, 
    file_name=sea_ice_pairwise_domain_experts_ycontext_file_path, 
    model_type=ModelType.Chat, 
    relationship_strategy=RelationshipStrategy.Pairwise, 
    temperatures=[0.3])

ask_openai_and_save_to_csv(
    factors_list=sea_ice_variables, 
    edges=sea_ice_relationships, 
    experts=expertises,                # yes expert
    analysis_context=analysis_context, # yes context
    treatment=treatment, 
    outcome=outcome, 
    llm=gpt4, 
    file_name=sea_ice_pairwise_domain_experts_ycontext_file_path, 
    model_type=ModelType.Chat, 
    relationship_strategy=RelationshipStrategy.Pairwise, 
    temperatures=[0.5])

ask_openai_and_save_to_csv(
    factors_list=sea_ice_variables, 
    edges=sea_ice_relationships, 
    experts=expertises,                # yes expert
    analysis_context=analysis_context, # yes context
    treatment=treatment, 
    outcome=outcome, 
    llm=gpt4, 
    file_name=sea_ice_pairwise_domain_experts_ycontext_file_path, 
    model_type=ModelType.Chat, 
    relationship_strategy=RelationshipStrategy.Pairwise, 
    temperatures=[0.7])

In [None]:
ask_openai_and_save_to_csv(
    factors_list=sea_ice_variables, 
    edges=sea_ice_relationships, 
    experts=expertises,                # yes expert
    analysis_context=no_context,       # no context
    treatment=treatment, 
    outcome=outcome, 
    llm=gpt4, 
    file_name=sea_ice_pairwise_domain_experts_ncontext_file_path, 
    model_type=ModelType.Chat, 
    relationship_strategy=RelationshipStrategy.Pairwise, 
    temperatures=[0.3])

# ask_openai_and_save_to_csv(
#     factors_list=sea_ice_variables, 
#     edges=sea_ice_relationships, 
#     experts=expertises,                # yes expert
#     analysis_context=no_context,       # no context
#     treatment=treatment, 
#     outcome=outcome, 
#     llm=gpt4, 
#     file_name=sea_ice_pairwise_domain_experts_ncontext_file_path, 
#     model_type=ModelType.Chat, 
#     relationship_strategy=RelationshipStrategy.Pairwise, 
#     temperatures=[0.5])

# ask_openai_and_save_to_csv(
#     factors_list=sea_ice_variables, 
#     edges=sea_ice_relationships, 
#     experts=expertises,                # yes expert
#     analysis_context=no_context,       # no context
#     treatment=treatment, 
#     outcome=outcome, 
#     llm=gpt4, 
#     file_name=sea_ice_pairwise_domain_experts_ncontext_file_path, 
#     model_type=ModelType.Chat, 
#     relationship_strategy=RelationshipStrategy.Pairwise, 
#     temperatures=[0.7])

In [None]:
ask_openai_and_save_to_csv(
    factors_list=sea_ice_variables, 
    edges=sea_ice_relationships, 
    experts=causal_experts,                # no expert
    analysis_context=analysis_context,     # yes context
    treatment=treatment, 
    outcome=outcome, 
    llm=gpt4, 
    file_name=sea_ice_pairwise_causality_experts_ycontext_file_path, 
    model_type=ModelType.Chat, 
    relationship_strategy=RelationshipStrategy.Pairwise, 
    temperatures=[0.3])

ask_openai_and_save_to_csv(
    factors_list=sea_ice_variables, 
    edges=sea_ice_relationships, 
    experts=causal_experts,                # no expert
    analysis_context=analysis_context,     # yes context
    treatment=treatment, 
    outcome=outcome, 
    llm=gpt4, 
    file_name=sea_ice_pairwise_causality_experts_ycontext_file_path, 
    model_type=ModelType.Chat, 
    relationship_strategy=RelationshipStrategy.Pairwise, 
    temperatures=[0.5])

ask_openai_and_save_to_csv(
    factors_list=sea_ice_variables, 
    edges=sea_ice_relationships, 
    experts=causal_experts,                # no expert
    analysis_context=analysis_context,     # yes context
    treatment=treatment, 
    outcome=outcome, 
    llm=gpt4, 
    file_name=sea_ice_pairwise_causality_experts_ycontext_file_path, 
    model_type=ModelType.Chat, 
    relationship_strategy=RelationshipStrategy.Pairwise, 
    temperatures=[0.7])

In [None]:
ask_openai_and_save_to_csv(
    factors_list=sea_ice_variables, 
    edges=sea_ice_relationships, 
    experts=causal_experts,                # no expert
    analysis_context=no_context,           # no context
    treatment=treatment, 
    outcome=outcome, 
    llm=gpt4, 
    file_name=sea_ice_pairwise_causality_experts_ncontext_file_path, 
    model_type=ModelType.Chat, 
    relationship_strategy=RelationshipStrategy.Pairwise, 
    temperatures=[0.3])

ask_openai_and_save_to_csv(
    factors_list=sea_ice_variables, 
    edges=sea_ice_relationships, 
    experts=causal_experts,                # no expert
    analysis_context=no_context,           # no context
    treatment=treatment, 
    outcome=outcome, 
    llm=gpt4, 
    file_name=sea_ice_pairwise_causality_experts_ncontext_file_path, 
    model_type=ModelType.Chat, 
    relationship_strategy=RelationshipStrategy.Pairwise, 
    temperatures=[0.5])

ask_openai_and_save_to_csv(
    factors_list=sea_ice_variables, 
    edges=sea_ice_relationships, 
    experts=causal_experts,                # no expert
    analysis_context=no_context,           # no context
    treatment=treatment, 
    outcome=outcome, 
    llm=gpt4, 
    file_name=sea_ice_pairwise_causality_experts_ncontext_file_path, 
    model_type=ModelType.Chat, 
    relationship_strategy=RelationshipStrategy.Pairwise, 
    temperatures=[0.7])

### Counting

In [None]:
import csv

sea_ice_parents_domain_experts_ycontext_file_path = "sea_ice_gpt4_askparents_domain_experts_ycontext.csv"
sea_ice_parents_causality_experts_ycontext_file_path = "sea_ice_gpt4_askparents_causality_experts_ycontext.csv"
sea_ice_parents_domain_experts_ncontext_file_path = "sea_ice_gpt4_askparents_domain_experts_ncontext.csv"
sea_ice_parents_causality_experts_ncontext_file_path = "sea_ice_gpt4_askparents_causality_experts_ncontext.csv"


csv_file_path = sea_ice_parents_domain_experts_ycontext_file_path
domain_yes_context = {}
domain_no_context = {}
causal_yes_context = {}
causal_no_context = {}

def csv_to_dictionary(file_path):
    data_dict = {}
    with open(file_path, 'r') as csv_file:
        csv_reader = csv.reader(csv_file)
        header = next(csv_reader)  # Read and skip the header row
        
        for row in csv_reader:
            key = tuple(row[:3])  # Create a tuple from the first three values
            value_dict = {header[i]: row[i] for i in range(3, len(header))}  # Create a dictionary of column headers and values
            data_dict[key] = value_dict  # Add the value dictionary to the main dictionary
    # print(data_dict)
    return data_dict


def count_each_accuracy(data_dict):

    count_dict = {}
    for i in [0.3, 0.5, 0.7]:
        count_dict[f"{i} TP"] = 0
        count_dict[f"{i} TN"] = 0
        count_dict[f"{i} FP"] = 0
        count_dict[f"{i} FN"] = 0
    # print(count_dict)

    
    for key, value in data_dict.items():

        # print(value)
        if value["llm_sug"] == "1" and value["truth"] == "1":
            vals = count_dict[f"{key[0]} TP"]
            count_dict[f"{key[0]} TP"] = vals+1
        elif value["llm_sug"] == "0" and value["truth"] == "0":
            vals = count_dict[f"{key[0]} TN"]
            count_dict[f"{key[0]} TN"] = vals+1
        elif value["llm_sug"] == "1" and value["truth"] == "0":
            vals = count_dict[f"{key[0]} FP"]
            count_dict[f"{key[0]} FP"] = vals+1
        elif value["llm_sug"] == "0" and value["truth"] == "1":
            vals = count_dict[f"{key[0]} FN"]
            count_dict[f"{key[0]} FN"] = vals+1
    
    return count_dict


In [None]:
domain_no_context_count = count_each_accuracy(csv_to_dictionary("sea_ice_gpt4_askparents_domain_experts_ycontext.csv"))
print(domain_no_context_count)

In [None]:
domain_no_context_count = count_each_accuracy(csv_to_dictionary("sea_ice_gpt4_askparents_domain_experts_ncontext.csv"))
print(domain_no_context_count)

In [None]:
causal_yes_context_count = count_each_accuracy(csv_to_dictionary("sea_ice_gpt4_askparents_causality_experts_ycontext.csv"))
print(causal_yes_context_count)

In [None]:
causal_no_context_count = count_each_accuracy(csv_to_dictionary("sea_ice_gpt4_askparents_causality_experts_ncontext.csv"))
print(causal_no_context_count)