# Model codes for 2nd and 3rd steps in LLM-augumented Statistic Causal Discovery(for Google Colaboratory)

###Preparation of Libraries

In [None]:
!pip install numpy==1.25.0 #For maintaining the consistency of the versions among the packages tentatively.

In [None]:
!pip install openai
!pip install lingam
!pip install factor_analyzer
!pip install igraph
!pip install pygam
!pip install causal-learn

In [None]:
import os
os.environ["OPENAI_API_KEY"]=""#API key for OpenAI is inserted here.
import numpy as np
import pandas as pd
import graphviz
import lingam
from sklearn.preprocessing import StandardScaler
from lingam.utils import print_causal_directions, print_dagc, make_dot, make_prior_knowledge
import hashlib
import matplotlib.pyplot as plt
import seaborn as sns
from causallearn.utils.GraphUtils import GraphUtils
import matplotlib.image as mpimg
import io
from scipy.stats import norm
from copy import deepcopy
from itertools import combinations

from causallearn.search.ConstraintBased.PC import pc
from causallearn.search.ScoreBased.GES import ges


print("NumPy",  "ver:", np.__version__)
print("Pandas", "ver:", pd.__version__)
print("Graphviz",   "ver:", graphviz.__version__)
print("LiNGAM", "ver:", lingam.__version__)

np.set_printoptions(precision=3, suppress=True)

# fixing the random seed of np for the repoductivity
np.random.seed(203)

### importing basic information for LLM-KBCI and the results of SCD

terminological setting for LLM

In [None]:
#example for health screening data
#blank 1
context_X = "on health screening results"

#blank 2
labels_X = ["body mass index","waist circumference", "systolic blood pressure", "diastolic blood pressure", "hemoglobin A1c", "low density lipoprotein cholesterol", "age"]

#blank 4
dataset_explanation_X = "health screening results among working-age(from 40 to 64 years old) population"

Importing the results of SCD

In [None]:
# for PC
dag_est_pc = np.loadtxt('', delimiter=',')#loading the csv file of adjacency matrix calculated with PC
prob0_pc_directed = np.loadtxt('', delimiter=',')#loading the csv file of bootstrap probability matrix for directed edges calculated with PC
prob0_pc_undirected = np.loadtxt('', delimiter=',')#loading the csv file of bootstrap probability matrix for undirected edges calculated with PC

In [None]:
# for Exact Search
dag_est_es = np.loadtxt('', delimiter=',')#loading the csv file of adjacency matrix calculated with Exact Search
prob0_es = np.loadtxt('', delimiter=',') #loading the csv file of bootstrap probability matrix calculated with ExactSearch

In [None]:
#for DirectLiNGAM
lingam0_adjacency_matrix_ = np.loadtxt('', delimiter=',')#loading the csv file of adjacency matrix calculated with DirectLiNGAM with causal coefficients
prob0_lingam = np.loadtxt('', delimiter=',')#loading the csv file of bootstrap probability matrix calculated with DirectLiNGAM

## Preparing the 1st prompting(for 2nd step)

##For LiNGAM

Preparation of functions for Pattern 1

In [None]:
def all_edges_pattern1(adjacency_matrix, labels):
  num_nodes = adjacency_matrix.shape[0]
  text = """All of the edges suggested by the statistical causal discovery are below:
-----
"""

  for i in range(num_nodes):
        for j in range(num_nodes):
            if j == i:
                continue
            if adjacency_matrix[i, j] == 0:
                continue
            else:
              text = text + f"""{labels[j]} → {labels[i]}
"""
  text = text +"""-----
"""
  return text

In [None]:
def create_causal_text_matrix1_pattern1(adjacency_matrix, labels):
    num_nodes = adjacency_matrix.shape[0]
    causal_text_matrix = np.empty(adjacency_matrix.shape, dtype=object)

    for i in range(num_nodes):
        for j in range(num_nodes):
            if j == i:
                continue
            if adjacency_matrix[i, j] == 0:
                causal_text_matrix[i, j] = f"there may be no direct impact of a change in {labels[j]} on {labels[i]}."
            else:
                causal_text_matrix[i, j] = f"there may be a direct impact of a change in {labels[j]} on {labels[i]}."

    return causal_text_matrix

Preparation of functions for Pattern 2

In [None]:
def all_edges_pattern2(boot_prob, labels):
  num_nodes = boot_prob.shape[0]
  text = """All of the edges with non-zero bootstrap probabilities suggested by the statistical causal discovery are below:
-----
"""

  for i in range(num_nodes):
        for j in range(num_nodes):
            if j == i:
                continue
            if boot_prob[i, j] == 0:
                continue
            else:
              text = text + f"""{labels[j]} → {labels[i]} (bootstrap probability = {boot_prob[i,j]})
"""
  text = text +"""-----
"""
  return text

In [None]:

def create_causal_text_matrix1_pattern2(boot_prob, labels):
    num_nodes = boot_prob.shape[0]
    causal_text_matrix = np.empty(boot_prob.shape, dtype=object)

    for i in range(num_nodes):
        for j in range(num_nodes):
            if j == i:
                continue
            if boot_prob[i, j] == 0:
                causal_text_matrix[i, j] = f"there may be no direct impact of a change in {labels[j]} on {labels[i]}."
            else:
                causal_text_matrix[i, j] = f"there may be a direct impact of a change in {labels[j]} on {labels[i]} with a bootstrap probability of {boot_prob[i, j]}."

    return causal_text_matrix

Preparation of functions for Pattern 3

In [None]:
def all_edges_pattern3(adjacency_matrix, labels):
  num_nodes = adjacency_matrix.shape[0]
  text = """All of the edges and their coefficients of the structural causal model suggested by the statistical causal discovery are below:
-----
"""

  for i in range(num_nodes):
        for j in range(num_nodes):
            if j == i:
                continue
            if adjacency_matrix[i, j] == 0:
                continue
            else:
              text = text + f"""{labels[j]} → {labels[i]} (coefficient = {adjacency_matrix[i,j]})
"""
  text = text +"""-----
"""
  return text

In [None]:
def create_causal_text_matrix1_pattern3(adjacency_matrix, labels):
    num_nodes = adjacency_matrix.shape[0]
    causal_text_matrix = np.empty(adjacency_matrix.shape, dtype=object)

    for i in range(num_nodes):
        for j in range(num_nodes):
            if j == i:
                continue
            if adjacency_matrix[i, j] == 0:
                causal_text_matrix[i, j] = f"there may be no direct impact of a change in {labels[j]} on {labels[i]}."
            else:
                causal_text_matrix[i, j] = f"there may be a direct impact of a change in {labels[j]} on {labels[i]} with a causal coefficient of {adjacency_matrix[i, j]}."

    return causal_text_matrix

Preparation of functions for Pattern 4

In [None]:
def all_edges_pattern4(adjacency_matrix, boot_prob, labels):
  num_nodes = boot_prob.shape[0]
  text = """All of the edges with non-zero bootstrap probabilities and their coefficients of the structural causal model suggested by the statistical causal discovery are below:
-----
"""

  for i in range(num_nodes):
        for j in range(num_nodes):
            if j == i:
                continue
            if boot_prob[i, j] == 0:
                continue
            else:
              text = text + f"""{labels[j]} → {labels[i]} (coefficient = {adjacency_matrix[i, j]}, bootstrap probability = {boot_prob[i,j]})
"""
  text = text +"""-----
"""
  return text

In [None]:
def create_causal_text_matrix1_pattern4(adjacency_matrix, boot_prob, labels):
    num_nodes = adjacency_matrix.shape[0]
    causal_text_matrix = np.empty(adjacency_matrix.shape, dtype=object)

    for i in range(num_nodes):
        for j in range(num_nodes):
            if j == i:
                continue
            if boot_prob[i, j] == 0:
                causal_text_matrix[i, j] = f"there may be no direct impact of a change in {labels[j]} on {labels[i]}."
            else:
              if adjacency_matrix[i, j] == 0:
                  causal_text_matrix[i, j] = f"there may be a direct impact of a change in {labels[j]} on {labels[i]} with a bootstrap probability of {boot_prob[i, j]}, but the coefficient is likely to be {adjacency_matrix[i, j]}."

              else:
                  causal_text_matrix[i, j] = f"there may be a direct impact of a change in {labels[j]} on {labels[i]} with a bootstrap probability of {boot_prob[i, j]}, and the coefficient is likely to be {adjacency_matrix[i, j]}."


    return causal_text_matrix

In [None]:
template_Q1_1 = "We want to carry out causal inference {}, considering {} as variables."
template_Q1_2 = "First, we have conducted the statistical causal discovery with LiNGAM(Linear Non-Gaussian Acyclic Model) algorithm, using a fully standardized dataset on {}."

variables_X = ', '.join(labels_X[:-1]) + ', and ' + labels_X[-1]


Q1_1 = template_Q1_1.format(context_X, variables_X)
Q1_2 = template_Q1_2.format(dataset_explanation_X)

Q1_3 = f"According to the results shown above, it has been determined that"

def create_1st_template_text_matrix(adjacency_matrix, labels):
    num_nodes = adjacency_matrix.shape[0]
    causal_1st_template_text_matrix = np.empty(adjacency_matrix.shape, dtype=object)

    for i in range(num_nodes):
        for j in range(num_nodes):
            if j == i:
                continue
            causal_1st_template_text_matrix[i, j] = f"""Then, your task is to interpret this result from a domain knowledge perspective and determine whether this statistically suggested hypothesis is plausible in the context of the domain.
Please provide an explanation that leverages your expert knowledge on the causal relationship between {labels[j]} and {labels[i]}, and assess the naturalness of this causal discovery result.
Your response should consider the relevant factors and provide a reasoned explanation based on your understanding of the domain."""

    return causal_1st_template_text_matrix

#Pattern 0 is prepared from here.
def create_1st_prompt_matrix_pattern0(adjacency_matrix, labels):
    num_nodes = adjacency_matrix.shape[0]
    first_prompt_matrix = np.empty(adjacency_matrix.shape, dtype=object)

    for i in range(num_nodes):
        for j in range(num_nodes):
            if j == i:
                continue
            first_prompt_matrix[i, j] = Q1_1 + "\n" + f"""If {labels[j]} is modified, will it have a direct impact on {labels[i]}?
Please provide an explanation that leverages your expert knowledge on the causal relationship between {labels[j]} and {labels[i]}.
Your response should consider the relevant factors and provide a reasoned explanation based on your understanding of the domain."""

    return first_prompt_matrix

#Pattern 1
def create_1st_prompt_matrix_pattern1(adjacency_matrix, labels):
    num_nodes = adjacency_matrix.shape[0]
    first_prompt_matrix = np.empty(adjacency_matrix.shape, dtype=object)

    all_edges = all_edges_pattern1(adjacency_matrix, labels)
    causal_texts = create_causal_text_matrix1_pattern1(adjacency_matrix, labels)
    causal_1st_template_texts = create_1st_template_text_matrix(adjacency_matrix, labels)

    for i in range(num_nodes):
        for j in range(num_nodes):
            if j == i:
                continue
            first_prompt_matrix[i, j] = Q1_1 +"\n"+ Q1_2 +"\n"+ all_edges + "\n"+ Q1_3 + causal_texts[i, j] +"\n"+ causal_1st_template_texts[i, j]

    return first_prompt_matrix

#Pattern 2
def create_1st_prompt_matrix_pattern2(boot_prob, labels):
    num_nodes = boot_prob.shape[0]
    first_prompt_matrix = np.empty(boot_prob.shape, dtype=object)

    all_edges = all_edges_pattern3(boot_prob, labels)
    causal_texts = create_causal_text_matrix1_pattern3(boot_prob, labels)
    causal_1st_template_texts = create_1st_template_text_matrix(boot_prob, labels)

    for i in range(num_nodes):
        for j in range(num_nodes):
            if j == i:
                continue
            first_prompt_matrix[i, j] = Q1_1 +"\n"+ Q1_2 +"\n"+ all_edges + "\n"+ Q1_3 + causal_texts[i, j] +"\n"+ causal_1st_template_texts[i, j]

    return first_prompt_matrix


#Pattern 3
def create_1st_prompt_matrix_pattern3(adjacency_matrix, labels):
    num_nodes = adjacency_matrix.shape[0]
    first_prompt_matrix = np.empty(adjacency_matrix.shape, dtype=object)

    all_edges = all_edges_pattern2(adjacency_matrix, labels)
    causal_texts = create_causal_text_matrix1_pattern2(adjacency_matrix, labels)
    causal_1st_template_texts = create_1st_template_text_matrix(adjacency_matrix, labels)

    for i in range(num_nodes):
        for j in range(num_nodes):
            if j == i:
                continue
            first_prompt_matrix[i, j] = Q1_1 +"\n"+ Q1_2 +"\n"+ all_edges + "\n"+ Q1_3 + causal_texts[i, j] +"\n"+ causal_1st_template_texts[i, j]

    return first_prompt_matrix

#Pattern 4
def create_1st_prompt_matrix_pattern4(adjacency_matrix, boot_prob, labels):
    num_nodes = boot_prob.shape[0]
    first_prompt_matrix = np.empty(boot_prob.shape, dtype=object)

    all_edges = all_edges_pattern4(adjacency_matrix, boot_prob, labels)
    causal_texts = create_causal_text_matrix1_pattern4(adjacency_matrix, boot_prob, labels)
    causal_1st_template_texts = create_1st_template_text_matrix(adjacency_matrix, labels)

    for i in range(num_nodes):
        for j in range(num_nodes):
            if j == i:
                continue
            first_prompt_matrix[i, j] = Q1_1 +"\n"+ Q1_2 +"\n"+ all_edges + "\n"+ Q1_3 + causal_texts[i, j] +"\n"+ causal_1st_template_texts[i, j]

    return first_prompt_matrix


In [None]:
# comletion of 1st prompting matrices
first_prompt_matrix_LiNGAM1_X_pattern0 = create_1st_prompt_matrix_pattern0(lingam0_adjacency_matrix_, labels_X)
first_prompt_matrix_LiNGAM1_X_pattern1 = create_1st_prompt_matrix_pattern1(lingam0_adjacency_matrix_, labels_X)
first_prompt_matrix_LiNGAM1_X_pattern2 = create_1st_prompt_matrix_pattern2(prob0_lingam, labels_X)
first_prompt_matrix_LiNGAM1_X_pattern3 = create_1st_prompt_matrix_pattern3(lingam0_adjacency_matrix_, labels_X)
first_prompt_matrix_LiNGAM1_X_pattern4 = create_1st_prompt_matrix_pattern4(lingam0_adjacency_matrix_, prob0_lingam, labels_X)

##For Exact Search(Slight modification of patterns for DirectLiNGAM)

In [None]:
template_Q1_1 = "We want to carry out causal inference {}, considering {} as variables."
template_Q1_2 = "First, we have conducted the statistical causal discovery with Exact Search algorithm, using a fully standardized dataset on {}."

variables_X = ', '.join(labels_X[:-1]) + ', and ' + labels_X[-1]

Q1_1 = template_Q1_1.format(context_X, variables_X)
Q1_2 = template_Q1_2.format(dataset_explanation_X)

Q1_3 = f"According to the results shown above, it has been determined that"

#Pattern1
def create_1st_prompt_matrix_pattern1(adjacency_matrix, labels):
    num_nodes = adjacency_matrix.shape[0]
    first_prompt_matrix = np.empty(adjacency_matrix.shape, dtype=object)

    all_edges = all_edges_pattern1(adjacency_matrix, labels)
    causal_texts = create_causal_text_matrix1_pattern1(adjacency_matrix, labels)
    causal_1st_template_texts = create_1st_template_text_matrix(adjacency_matrix, labels)

    for i in range(num_nodes):
        for j in range(num_nodes):
            if j == i:
                continue
            first_prompt_matrix[i, j] = Q1_1 +"\n"+ Q1_2 +"\n"+ all_edges + "\n"+ Q1_3 + causal_texts[i, j] +"\n"+ causal_1st_template_texts[i, j]

    return first_prompt_matrix

#Pattern2
def create_1st_prompt_matrix_pattern2(boot_prob, labels):
    num_nodes = boot_prob.shape[0]
    first_prompt_matrix = np.empty(boot_prob.shape, dtype=object)

    all_edges = all_edges_pattern3(boot_prob, labels)
    causal_texts = create_causal_text_matrix1_pattern2(boot_prob, labels)
    causal_1st_template_texts = create_1st_template_text_matrix(boot_prob, labels)

    for i in range(num_nodes):
        for j in range(num_nodes):
            if j == i:
                continue
            first_prompt_matrix[i, j] = Q1_1 +"\n"+ Q1_2 +"\n"+ all_edges + "\n"+ Q1_3 + causal_texts[i, j] +"\n"+ causal_1st_template_texts[i, j]

    return first_prompt_matrix

In [None]:
first_prompt_matrix_ES_X_pattern1 = create_1st_prompt_matrix_pattern1(dag_est_es, labels_X)
first_prompt_matrix_ES_X_pattern2 = create_1st_prompt_matrix_pattern2(prob0_es, labels_X)

## For PC

Preparation for Pattern 1

In [None]:
def all_edges_pattern1_PC(adjacency_matrix, labels):
  num_nodes = adjacency_matrix.shape[0]
  text = """All of the directed edges suggested by the statistic causal discovery are below:
-----
"""

  for i in range(num_nodes):
        for j in range(num_nodes):
            if j == i:
                continue
            if adjacency_matrix[i, j] == 1:
              text = text + f"""{labels[j]} → {labels[i]}
"""
  text = text +"""-----
  In additon to the directed edges above, all of the undirected edges suggested by the statistic causal discovery are below:
-----
"""

  for i in range(num_nodes):
        for j in range(i+1, num_nodes):
            if j == i:
                continue
            if adjacency_matrix[i, j] == -1:
              text = text + f"""{labels[j]} － {labels[i]}
"""
  text = text +"""-----
"""
  return text

In [None]:
def create_causal_text_matrix1_pattern1_PC(adjacency_matrix, labels):
    num_nodes = adjacency_matrix.shape[0]
    causal_text_matrix = np.empty(adjacency_matrix.shape, dtype=object)

    for i in range(num_nodes):
        for j in range(num_nodes):
            if j == i:
                continue
            if adjacency_matrix[i, j] == 0:
                causal_text_matrix[i, j] = f"there may be no direct impact of a change in {labels[j]} on {labels[i]}."
            if adjacency_matrix[i, j] == 1:
                causal_text_matrix[i, j] = f"there may be a direct impact of a change in {labels[j]} on {labels[i]}."
            else:
                causal_text_matrix[i, j] = f"there may be a direct causal relationship between {labels[j]} and {labels[i]}, although the direction has not been determined."
    return causal_text_matrix

Preparation for Pattern 2

In [None]:
def all_edges_pattern2_PC(boot_prob0_directed, boot_prob0_undirected, labels):
  num_nodes = boot_prob0_directed.shape[0]
  text = """All of the directed edges with non-zero bootstrap probabilities suggested by the statistic causal discovery are below:
-----
"""

  for i in range(num_nodes):
        for j in range(num_nodes):
            if j == i:
                continue
            if boot_prob0_directed[i, j] == 0:
                continue
            else:
              text = text + f"""{labels[j]} → {labels[i]} (bootstrap probability = {boot_prob0_directed[i,j]})
"""
  text = text +"""-----
  In additon to the directed edges above, all of the undirected edges suggested by the statistic causal discovery are below:
-----
"""

  for i in range(num_nodes):
        for j in range(i+1, num_nodes):
            if j == i:
                continue
            if boot_prob0_undirected[i, j] == 0:
                continue
            else:
              text = text + f"""{labels[j]} ― {labels[i]} (bootstrap probability = {boot_prob0_undirected[i,j]})
"""
  text = text +"""-----
"""
  return text

In [None]:
def create_causal_text_matrix1_pattern2_PC(boot_prob0_directed, boot_prob0_undirected, labels):
    num_nodes = boot_prob0_directed.shape[0]
    causal_text_matrix = np.empty(boot_prob0_directed.shape, dtype=object)

    for i in range(num_nodes):
        for j in range(num_nodes):
            if j == i:
                continue
            if boot_prob0_directed[i, j] == 0 and boot_prob0_undirected[i, j] == 0:
                causal_text_matrix[i, j] = f"there may be no direct impact of a change in {labels[j]} on {labels[i]}."

            if boot_prob0_directed[i, j] != 0 and boot_prob0_undirected[i, j] == 0:
                causal_text_matrix[i, j] = f"there may be a direct impact of a change in {labels[j]} on {labels[i]} with a bootstrap probability of {boot_prob0_directed[i, j]}."

            if boot_prob0_directed[i, j] == 0 and boot_prob0_undirected[i, j] != 0:
                causal_text_matrix[i, j] = f"there may be a direct causal relationship between {labels[j]} and {labels[i]} with a bootstrap probability of {boot_prob0_undirected[i, j]}, although the direction has not been determined."

            else:
                causal_text_matrix[i, j] = f"there may be a direct impact of a change in {labels[j]} on {labels[i]} with a bootstrap probability of {boot_prob0_directed[i, j]}. In addition, it has also been shown above that there may be a direct causal relationship between {labels[j]} and {labels[i]} with a bootstrap probability of {boot_prob0_undirected[i, j]},although the direction has not completely been determined."

    return causal_text_matrix

In [None]:
template_Q1_1 = "We want to carry out causal inference {}, considering {} as variables."
template_Q1_2 = "First, we have conducted the statistical causal discovery with PC(Peter-Clerk) algorithm, using a fully standardized dataset on {}."

variables_X = ', '.join(labels_X[:-1]) + ', and ' + labels_X[-1]

Q1_1 = template_Q1_1.format(context_X, variables_X)
Q1_2 = template_Q1_2.format(dataset_explanation_X)

Q1_3 = f"According to the results shown above, it has been determined that "#LiNGAMの出力結果のテキストの直前部分。


#Pattern 1
def create_1st_prompt_matrix_pattern1_PC(adjacency_matrix, labels):
    num_nodes = adjacency_matrix.shape[0]
    first_prompt_matrix = np.empty(adjacency_matrix.shape, dtype=object)

    all_edges = all_edges_pattern1_PC(adjacency_matrix, labels)
    causal_texts = create_causal_text_matrix1_pattern1_PC(adjacency_matrix, labels)
    causal_1st_template_texts = create_1st_template_text_matrix(adjacency_matrix, labels)

    for i in range(num_nodes):
        for j in range(num_nodes):
            if j == i:
                continue
            first_prompt_matrix[i, j] = Q1_1 +"\n"+ Q1_2 +"\n"+ all_edges + "\n"+ Q1_3 + causal_texts[i, j] +"\n"+ causal_1st_template_texts[i, j]

    return first_prompt_matrix

#パターン2
def create_1st_prompt_matrix_pattern2_PC(boot_prob0_directed, boot_prob0_undirected, labels):
    num_nodes = boot_prob0_directed.shape[0]
    first_prompt_matrix = np.empty(boot_prob0_directed.shape, dtype=object)

    all_edges = all_edges_pattern2_PC(boot_prob0_directed, boot_prob0_undirected, labels)
    causal_texts = create_causal_text_matrix1_pattern2_PC(boot_prob0_directed, boot_prob0_undirected, labels)
    causal_1st_template_texts = create_1st_template_text_matrix(boot_prob0_directed, labels)

    for i in range(num_nodes):
        for j in range(num_nodes):
            if j == i:
                continue # 対角成分にも、テキストが入ってしまう場合の例外処理
            first_prompt_matrix[i, j] = Q1_1 +"\n"+ Q1_2 +"\n"+ all_edges + "\n"+ Q1_3 + causal_texts[i, j] +"\n"+ causal_1st_template_texts[i, j]

    return first_prompt_matrix

In [None]:
first_prompt_matrix_PC_X_pattern1 = create_1st_prompt_matrix_pattern1_PC(dag_est_pc, labels_X)
first_prompt_matrix_PC_X_pattern2 = create_1st_prompt_matrix_pattern2_PC(prob0_pc_directed, prob0_pc_undirected, labels_X)

# Knowledge generation with 1st prompting

In [None]:
system_role = "You are a helpful assistant for causal inference."
from openai import OpenAI
client = OpenAI()

In [None]:
generated_knowledge_matrix_0_L = np.empty(lingam0_adjacency_matrix_.shape, dtype=object)
generated_knowledge_matrix_1_L = np.empty(lingam0_adjacency_matrix_.shape, dtype=object)
generated_knowledge_matrix_2_L = np.empty(lingam0_adjacency_matrix_.shape, dtype=object)
generated_knowledge_matrix_3_L = np.empty(lingam0_adjacency_matrix_.shape, dtype=object)
generated_knowledge_matrix_4_L = np.empty(lingam0_adjacency_matrix_.shape, dtype=object)
generated_knowledge_matrix_1_E = np.empty(dag_est_es.shape, dtype=object)
generated_knowledge_matrix_2_E = np.empty(dag_est_es.shape, dtype=object)
generated_knowledge_matrix_1_P = np.empty(dag_est_pc.shape, dtype=object)
generated_knowledge_matrix_2_P = np.empty(dag_est_pc.shape, dtype=object)

For DirectLiNGAM

In [None]:
#pattern 0
for i in range(lingam0_adjacency_matrix_.shape[0]):
    for j in range(lingam0_adjacency_matrix_.shape[0]):
      if i == j:
        continue

      response1 = client.chat.completions.create(
        model="gpt-4-1106-preview",
        messages=[
          {"role": "system",
           "content": system_role},
          {
           "role": "user",
           "content": first_prompt_matrix_LiNGAM1_X_pattern0[i,j]
          }
          ],
        temperature=0.7,
        max_tokens=3000
        )
      generated_knowledge_matrix_0_L[i, j]= response1.choices[0].message.content

      print(str(i)+","+str(j))

generated_knowledge_matrix_0_L_df = pd.DataFrame(generated_knowledge_matrix_0_L)
generated_knowledge_matrix_0_L_df.to_csv("generated_knowledge_matrix_0_L.csv", encoding='utf-8')
generated_knowledge_matrix_0_L_df.to_csv("generated_knowledge_matrix_0_L_for_excel.csv", encoding='utf-8-sig')

In [None]:
#pattern 1
for i in range(lingam0_adjacency_matrix_.shape[0]):
    for j in range(lingam0_adjacency_matrix_.shape[0]):
      if i == j:
        continue

      response1 = client.chat.completions.create(
        model="gpt-4-1106-preview",
        messages=[
          {"role": "system",
           "content": system_role},
          {
           "role": "user",
           "content": first_prompt_matrix_LiNGAM1_X_pattern1[i,j]
          }
          ],
        temperature=0.7,
        max_tokens=3000
        )
      generated_knowledge_matrix_1_L[i, j]= response1.choices[0].message.content

      print(str(i)+","+str(j))

generated_knowledge_matrix_1_L_df = pd.DataFrame(generated_knowledge_matrix_1_L)
generated_knowledge_matrix_1_L_df.to_csv("generated_knowledge_matrix_1_L.csv", encoding='utf-8')
generated_knowledge_matrix_1_L_df.to_csv("generated_knowledge_matrix_1_L_for_excel.csv", encoding='utf-8-sig')

In [None]:
#pattern 2
for i in range(lingam0_adjacency_matrix_.shape[0]):
    for j in range(lingam0_adjacency_matrix_.shape[0]):
      if i == j:
        continue

      response1 = client.chat.completions.create(
        model="gpt-4-1106-preview",
        messages=[
          {"role": "system",
           "content": system_role},
          {
           "role": "user",
           "content": first_prompt_matrix_LiNGAM1_X_pattern2[i,j]
          }
          ],
        temperature=0.7,
        max_tokens=3000
        )
      generated_knowledge_matrix_2_L[i, j]= response1.choices[0].message.content

      print(str(i)+","+str(j))

generated_knowledge_matrix_2_L_df = pd.DataFrame(generated_knowledge_matrix_2_L)
generated_knowledge_matrix_2_L_df.to_csv("generated_knowledge_matrix_2_L.csv", encoding='utf-8')
generated_knowledge_matrix_2_L_df.to_csv("generated_knowledge_matrix_2_L_for_excel.csv", encoding='utf-8-sig')

In [None]:
#pattern 3
for i in range(lingam0_adjacency_matrix_.shape[0]):
    for j in range(lingam0_adjacency_matrix_.shape[0]):
      if i == j:
        continue

      response1 = client.chat.completions.create(
        model="gpt-4-1106-preview",
        messages=[
          {"role": "system",
           "content": system_role},
          {
           "role": "user",
           "content": first_prompt_matrix_LiNGAM1_X_pattern3[i,j]
          }
          ],
        temperature=0.7,
        max_tokens=3000
        )
      generated_knowledge_matrix_3_L[i, j]= response1.choices[0].message.content

      print(str(i)+","+str(j))

generated_knowledge_matrix_3_L_df = pd.DataFrame(generated_knowledge_matrix_3_L)
generated_knowledge_matrix_3_L_df.to_csv("generated_knowledge_matrix_3_L.csv", encoding='utf-8')
generated_knowledge_matrix_3_L_df.to_csv("generated_knowledge_matrix_3_L_for_excel.csv", encoding='utf-8-sig')

In [None]:
#pattern 4
for i in range(lingam0_adjacency_matrix_.shape[0]):
    for j in range(lingam0_adjacency_matrix_.shape[0]):
      if i == j:
        continue

      response1 = client.chat.completions.create(
        model="gpt-4-1106-preview",
        messages=[
          {"role": "system",
           "content": system_role},
          {
           "role": "user",
           "content": first_prompt_matrix_LiNGAM1_X_pattern4[i,j]
          }
          ],
        temperature=0.7,
        max_tokens=3000
        )
      generated_knowledge_matrix_4_L[i, j]= response1.choices[0].message.content

      print(str(i)+","+str(j))

generated_knowledge_matrix_4_L_df = pd.DataFrame(generated_knowledge_matrix_4_L)
generated_knowledge_matrix_4_L_df.to_csv("generated_knowledge_matrix_4_L.csv", encoding='utf-8')
generated_knowledge_matrix_4_L_df.to_csv("generated_knowledge_matrix_4_L_for_excel.csv", encoding='utf-8-sig')

For Exact Search

In [None]:
#pattern 1
for i in range(dag_est_es.shape[0]):
    for j in range(dag_est_es.shape[0]):
      if i == j:
        continue

      response1 = client.chat.completions.create(
        model="gpt-4-1106-preview",
        messages=[
          {"role": "system",
           "content": system_role},
          {
           "role": "user",
           "content": first_prompt_matrix_ES_X_pattern1[i,j]
          }
          ],
        temperature=0.7,
        max_tokens=3000
        )
      generated_knowledge_matrix_1_E[i, j]= response1.choices[0].message.content

      print(str(i)+","+str(j))

generated_knowledge_matrix_1_E_df = pd.DataFrame(generated_knowledge_matrix_1_E)
generated_knowledge_matrix_1_E_df.to_csv("generated_knowledge_matrix_1_E.csv", encoding='utf-8')
generated_knowledge_matrix_1_E_df.to_csv("generated_knowledge_matrix_1_E_for_excel.csv", encoding='utf-8-sig')

In [None]:
#pattern 2
for i in range(dag_est_es.shape[0]):
    for j in range(dag_est_es.shape[0]):
      if i == j:
        continue

      response1 = client.chat.completions.create(
        model="gpt-4-1106-preview",
        messages=[
          {"role": "system",
           "content": system_role},
          {
           "role": "user",
           "content": first_prompt_matrix_ES_X_pattern2[i,j]
          }
          ],
        temperature=0.7,
        max_tokens=3000
        )
      generated_knowledge_matrix_2_E[i, j]= response1.choices[0].message.content

      print(str(i)+","+str(j))

generated_knowledge_matrix_2_E_df = pd.DataFrame(generated_knowledge_matrix_2_E)
generated_knowledge_matrix_2_E_df.to_csv("generated_knowledge_matrix_2_E.csv", encoding='utf-8')
generated_knowledge_matrix_2_E_df.to_csv("generated_knowledge_matrix_2_E_for_excel.csv", encoding='utf-8-sig')

For PC

In [None]:
#pattern 1
for i in range(dag_est_pc.shape[0]):
    for j in range(dag_est_pc.shape[0]):
      if i == j:
        continue

      response1 = client.chat.completions.create(
        model="gpt-4-1106-preview",
        messages=[
          {"role": "system",
           "content": system_role},
          {
           "role": "user",
           "content": first_prompt_matrix_PC_X_pattern1[i,j]
          }
          ],
        temperature=0.7,
        max_tokens=3000
        )
      generated_knowledge_matrix_1_P[i, j]= response1.choices[0].message.content

      print(str(i)+","+str(j))

generated_knowledge_matrix_1_P_df = pd.DataFrame(generated_knowledge_matrix_1_P)
generated_knowledge_matrix_1_P_df.to_csv("generated_knowledge_matrix_1_P.csv", encoding='utf-8')
generated_knowledge_matrix_1_P_df.to_csv("generated_knowledge_matrix_1_P_for_excel.csv", encoding='utf-8-sig')

In [None]:
#pattern 2
for i in range(dag_est_pc.shape[0]):
    for j in range(dag_est_pc.shape[0]):
      if i == j:
        continue

      response1 = client.chat.completions.create(
        model="gpt-4-1106-preview",
        messages=[
          {"role": "system",
           "content": system_role},
          {
           "role": "user",
           "content": first_prompt_matrix_PC_X_pattern2[i,j]
          }
          ],
        temperature=0.7,
        max_tokens=3000
        )
      generated_knowledge_matrix_2_P[i, j]= response1.choices[0].message.content

      print(str(i)+","+str(j))

generated_knowledge_matrix_2_P_df = pd.DataFrame(generated_knowledge_matrix_2_P)
generated_knowledge_matrix_2_P_df.to_csv("generated_knowledge_matrix_2_P.csv", encoding='utf-8')
generated_knowledge_matrix_2_P_df.to_csv("generated_knowledge_matrix_2_P_for_excel.csv", encoding='utf-8-sig')

# Constructiong 2nd prompt(preparing for 3rd step)

In [None]:
sen_eng_1 = f"""An expert was asked the question below:
"""
sen_eng_2 = f"""Then, the expert replied with its domain knowledge:
"""
sen_eng_3 = f"""
Considering objectively this discussion above,"""

sen_eng_4 = f"""
Please answer this question with <yes> or <no>.
No answers except these two responses are needed."""

In [None]:
def create_2nd_Question_matrix(adjacency_matrix, labels):
    num_nodes = adjacency_matrix.shape[0]
    second_Question_matrix = np.empty(adjacency_matrix.shape, dtype=object)

    for i in range(num_nodes):
        for j in range(num_nodes):
            if j == i:
                continue
            second_Question_matrix[i, j] = f"if {labels[j]} is modified, will it have a direct or indirect impact on {labels[i]}?"

    return second_Question_matrix

In [None]:
def create_2nd_prompt_matrix(adjacency_matrix, labels, first_prompt_matrix, first_answer_matrix):
    num_nodes = adjacency_matrix.shape[0]
    second_prompt_matrix = np.empty(adjacency_matrix.shape, dtype=object)
    second_quesiton_matrix =  create_2nd_Question_matrix(adjacency_matrix, labels)

    for i in range(num_nodes):
        for j in range(num_nodes):
            if j == i:
                continue
            second_prompt_matrix[i, j] = sen_eng_1 + first_prompt_matrix[i, j] + sen_eng_2 + first_answer_matrix[i, j] + sen_eng_3 + second_quesiton_matrix[i, j] + sen_eng_4
    return second_prompt_matrix

In [None]:
#2nd prompt matrix generation
#for LiNGAM
causal_2nd_prompt_LiNGAM1_X_pattern0 = create_2nd_prompt_matrix(lingam0_adjacency_matrix_, labels_X, first_prompt_matrix_LiNGAM1_X_pattern0, generated_knowledge_matrix_0_L)
causal_2nd_prompt_LiNGAM1_X_pattern1 = create_2nd_prompt_matrix(lingam0_adjacency_matrix_, labels_X, first_prompt_matrix_LiNGAM1_X_pattern1, generated_knowledge_matrix_1_L)
causal_2nd_prompt_LiNGAM1_X_pattern2 = create_2nd_prompt_matrix(lingam0_adjacency_matrix_, labels_X, first_prompt_matrix_LiNGAM1_X_pattern2, generated_knowledge_matrix_2_L)
causal_2nd_prompt_LiNGAM1_X_pattern3 = create_2nd_prompt_matrix(lingam0_adjacency_matrix_, labels_X, first_prompt_matrix_LiNGAM1_X_pattern3, generated_knowledge_matrix_3_L)
causal_2nd_prompt_LiNGAM1_X_pattern4 = create_2nd_prompt_matrix(lingam0_adjacency_matrix_, labels_X, first_prompt_matrix_LiNGAM1_X_pattern4, generated_knowledge_matrix_4_L)

#for Exact Search
causal_2nd_prompt_ES_X_pattern1 = create_2nd_prompt_matrix(dag_est_es, labels_X, first_prompt_matrix_ES_X_pattern1, generated_knowledge_matrix_1_E)
causal_2nd_prompt_ES_X_pattern2 = create_2nd_prompt_matrix(dag_est_es, labels_X, first_prompt_matrix_ES_X_pattern2, generated_knowledge_matrix_2_E)

#for PC
causal_2nd_prompt_PC_X_pattern1 = create_2nd_prompt_matrix(dag_est_pc, labels_X, first_prompt_matrix_PC_X_pattern1, generated_knowledge_matrix_1_P)
causal_2nd_prompt_PC_X_pattern2 = create_2nd_prompt_matrix(dag_est_pc, labels_X, first_prompt_matrix_PC_X_pattern2, generated_knowledge_matrix_2_P)


# Probability calculation from the responses to 2nd promptings

In [None]:
# functions for means and standard deviations for probabilities
def calculate_mean_std_matrices(measured_prob_table, adjacency_matrix):

    mean_matrix = np.empty(adjacency_matrix.shape)
    std_dev_matrix = np.empty(adjacency_matrix.shape)

    for i in range(adjacency_matrix.shape[0]):
        for j in range(adjacency_matrix.shape[0]):

            values = measured_prob_table[(measured_prob_table['i'] == f"{i}") & (measured_prob_table['j'] == f"{j}")]['probability']

            mean_matrix[i, j] = np.mean(values)
            std_dev_matrix[i, j] = np.std(values)

    return mean_matrix, std_dev_matrix

In [None]:
system_role = "You are a helpful assistant for causal inference."
from openai import OpenAI
client = OpenAI()

In [None]:
import math

For LiNGAM

In [None]:
# pattern0
temp_pattern_yes = []
temp_pattern_no = []

sample_number = 5
top_logprobs = 5

for i in range(lingam0_adjacency_matrix_.shape[0]):
    for j in range(lingam0_adjacency_matrix_.shape[0]):
      if i == j:
        continue
      count_yes_1 = 0
      count_no_1 = 0

      for t in range(sample_number):
        prob_yes = 0
        prob_no = 0
        response = client.chat.completions.create(
          model="gpt-4-1106-preview",
          messages=[
            {"role": "system",
             "content": system_role},
            {
             "role": "user",
             "content": causal_2nd_prompt_LiNGAM1_X_pattern0[i,j]
            }
            ],
          temperature=0.7,
          max_tokens=1500,
          logprobs = True,
          top_logprobs = 5
          )
        for m in range(top_logprobs):
          if response.choices[0].logprobs.content[0].top_logprobs[m].token == 'Yes' or response.choices[0].logprobs.content[0].top_logprobs[m].token == 'yes':
            prob_yes = prob_yes + math.e**(response.choices[0].logprobs.content[0].top_logprobs[m].logprob)
          elif response.choices[0].logprobs.content[0].top_logprobs[m].token == 'No' or response.choices[0].logprobs.content[0].top_logprobs[m].token == 'no':
            prob_no = prob_no + math.e**(response.choices[0].logprobs.content[0].top_logprobs[m].logprob)
          else:
            continue
        temp_pattern_yes.append([f"{i}", f"{j}", f"Trial {t+1}", prob_yes])
        temp_pattern_no.append([f"{i}", f"{j}", f"Trial {t+1}", prob_no])
        print("i="+str(i)+", j="+str(j)+",Trial:"+str(t+1), "p_yes:", prob_yes, "p_no:", prob_no)

columns = ['i', 'j', 'Trial', 'probability']
pattern0_yes_L_df = pd.DataFrame(temp_pattern_yes, columns=columns)
pattern0_no_L_df = pd.DataFrame(temp_pattern_no, columns=columns)
pattern0_yes_L_df.to_csv("pattern0_yes_L.csv", encoding='utf-8')
pattern0_no_L_df.to_csv("pattern0_no_L.csv", encoding='utf-8')

probability_X0_pattern0_L, stdev_X0_pattern0_L = calculate_mean_std_matrices(pattern0_yes_L_df, lingam0_adjacency_matrix_)
probability_X0_pattern0_L_df = pd.DataFrame(probability_X0_pattern0_L)
stdev_X0_pattern0_L_df = pd.DataFrame(stdev_X0_pattern0_L)
probability_X0_pattern0_L_df.to_csv("probability_X0_pattern0_L.csv", encoding='utf-8')
stdev_X0_pattern0_L_df.to_csv("stdev_X0_pattern0_L.csv", encoding='utf-8')

In [None]:
# pattern1
temp_pattern_yes = []
temp_pattern_no = []

sample_number = 5
top_logprobs = 5

for i in range(lingam0_adjacency_matrix_.shape[0]):
    for j in range(lingam0_adjacency_matrix_.shape[0]):
      if i == j:
        continue
      count_yes_1 = 0
      count_no_1 = 0

      for t in range(sample_number):
        prob_yes = 0
        prob_no = 0
        response = client.chat.completions.create(
          model="gpt-4-1106-preview",
          messages=[
            {"role": "system",
             "content": system_role},
            {
             "role": "user",
             "content": causal_2nd_prompt_LiNGAM1_X_pattern1[i,j]
            }
            ],
          temperature=0.7,
          max_tokens=1500,
          logprobs = True,
          top_logprobs = 5
          )
        for m in range(top_logprobs):
          if response.choices[0].logprobs.content[0].top_logprobs[m].token == 'Yes' or response.choices[0].logprobs.content[0].top_logprobs[m].token == 'yes':
            prob_yes = prob_yes + math.e**(response.choices[0].logprobs.content[0].top_logprobs[m].logprob)
          elif response.choices[0].logprobs.content[0].top_logprobs[m].token == 'No' or response.choices[0].logprobs.content[0].top_logprobs[m].token == 'no':
            prob_no = prob_no + math.e**(response.choices[0].logprobs.content[0].top_logprobs[m].logprob)
          else:
            continue
        temp_pattern_yes.append([f"{i}", f"{j}", f"Trial {t+1}", prob_yes])
        temp_pattern_no.append([f"{i}", f"{j}", f"Trial {t+1}", prob_no])
        print("i="+str(i)+", j="+str(j)+",Trial:"+str(t+1), "p_yes:", prob_yes, "p_no:", prob_no)

columns = ['i', 'j', 'Trial', 'probability']
pattern1_yes_L_df = pd.DataFrame(temp_pattern_yes, columns=columns)
pattern1_no_L_df = pd.DataFrame(temp_pattern_no, columns=columns)
pattern1_yes_L_df.to_csv("pattern1_yes_L.csv", encoding='utf-8')
pattern1_no_L_df.to_csv("pattern1_no_L.csv", encoding='utf-8')

probability_X0_pattern1_L, stdev_X0_pattern1_L = calculate_mean_std_matrices(pattern1_yes_L_df, lingam0_adjacency_matrix_)
probability_X0_pattern1_L_df = pd.DataFrame(probability_X0_pattern1_L)
stdev_X0_pattern1_L_df = pd.DataFrame(stdev_X0_pattern1_L)
probability_X0_pattern1_L_df.to_csv("probability_X0_pattern1_L.csv", encoding='utf-8')
stdev_X0_pattern1_L_df.to_csv("stdev_X0_pattern1_L.csv", encoding='utf-8')

In [None]:
# pattern2
temp_pattern_yes = []
temp_pattern_no = []

sample_number = 5
top_logprobs = 5

for i in range(lingam0_adjacency_matrix_.shape[0]):
    for j in range(lingam0_adjacency_matrix_.shape[0]):
      if i == j:
        continue
      count_yes_1 = 0
      count_no_1 = 0

      for t in range(sample_number):
        prob_yes = 0
        prob_no = 0
        response = client.chat.completions.create(
          model="gpt-4-1106-preview",
          messages=[
            {"role": "system",
             "content": system_role},
            {
             "role": "user",
             "content": causal_2nd_prompt_LiNGAM1_X_pattern2[i,j]
            }
            ],
          temperature=0.7,
          max_tokens=1500,
          logprobs = True,
          top_logprobs = 5
          )
        for m in range(top_logprobs):
          if response.choices[0].logprobs.content[0].top_logprobs[m].token == 'Yes' or response.choices[0].logprobs.content[0].top_logprobs[m].token == 'yes':
            prob_yes = prob_yes + math.e**(response.choices[0].logprobs.content[0].top_logprobs[m].logprob)
          elif response.choices[0].logprobs.content[0].top_logprobs[m].token == 'No' or response.choices[0].logprobs.content[0].top_logprobs[m].token == 'no':
            prob_no = prob_no + math.e**(response.choices[0].logprobs.content[0].top_logprobs[m].logprob)
          else:
            continue
        temp_pattern_yes.append([f"{i}", f"{j}", f"Trial {t+1}", prob_yes])
        temp_pattern_no.append([f"{i}", f"{j}", f"Trial {t+1}", prob_no])
        print("i="+str(i)+", j="+str(j)+",Trial:"+str(t+1), "p_yes:", prob_yes, "p_no:", prob_no)

columns = ['i', 'j', 'Trial', 'probability']
pattern2_yes_L_df = pd.DataFrame(temp_pattern_yes, columns=columns)
pattern2_no_L_df = pd.DataFrame(temp_pattern_no, columns=columns)
pattern2_yes_L_df.to_csv("pattern2_yes_L.csv", encoding='utf-8')
pattern2_no_L_df.to_csv("pattern2_no_L.csv", encoding='utf-8')

probability_X0_pattern2_L, stdev_X0_pattern2_L = calculate_mean_std_matrices(pattern2_yes_L_df, lingam0_adjacency_matrix_)
probability_X0_pattern2_L_df = pd.DataFrame(probability_X0_pattern2_L)
stdev_X0_pattern2_L_df = pd.DataFrame(stdev_X0_pattern2_L)
probability_X0_pattern2_L_df.to_csv("probability_X0_pattern2_L.csv", encoding='utf-8')
stdev_X0_pattern2_L_df.to_csv("stdev_X0_pattern2_L.csv", encoding='utf-8')

In [None]:
# pattern3
temp_pattern_yes = []
temp_pattern_no = []

sample_number = 5
top_logprobs = 5

for i in range(lingam0_adjacency_matrix_.shape[0]):
    for j in range(lingam0_adjacency_matrix_.shape[0]):
      if i == j:
        continue
      count_yes_1 = 0
      count_no_1 = 0

      for t in range(sample_number):
        prob_yes = 0
        prob_no = 0
        response = client.chat.completions.create(
          model="gpt-4-1106-preview",
          messages=[
            {"role": "system",
             "content": system_role},
            {
             "role": "user",
             "content": causal_2nd_prompt_LiNGAM1_X_pattern3[i,j]
            }
            ],
          temperature=0.7,
          max_tokens=1500,
          logprobs = True,
          top_logprobs = 5
          )
        for m in range(top_logprobs):
          if response.choices[0].logprobs.content[0].top_logprobs[m].token == 'Yes' or response.choices[0].logprobs.content[0].top_logprobs[m].token == 'yes':
            prob_yes = prob_yes + math.e**(response.choices[0].logprobs.content[0].top_logprobs[m].logprob)
          elif response.choices[0].logprobs.content[0].top_logprobs[m].token == 'No' or response.choices[0].logprobs.content[0].top_logprobs[m].token == 'no':
            prob_no = prob_no + math.e**(response.choices[0].logprobs.content[0].top_logprobs[m].logprob)
          else:
            continue
        temp_pattern_yes.append([f"{i}", f"{j}", f"Trial {t+1}", prob_yes])
        temp_pattern_no.append([f"{i}", f"{j}", f"Trial {t+1}", prob_no])
        print("i="+str(i)+", j="+str(j)+",Trial:"+str(t+1), "p_yes:", prob_yes, "p_no:", prob_no)

columns = ['i', 'j', 'Trial', 'probability']
pattern3_yes_L_df = pd.DataFrame(temp_pattern_yes, columns=columns)
pattern3_no_L_df = pd.DataFrame(temp_pattern_no, columns=columns)
pattern3_yes_L_df.to_csv("pattern3_yes_L.csv", encoding='utf-8')
pattern3_no_L_df.to_csv("pattern3_no_L.csv", encoding='utf-8')

probability_X0_pattern3_L, stdev_X0_pattern3_L = calculate_mean_std_matrices(pattern3_yes_L_df, lingam0_adjacency_matrix_)
probability_X0_pattern3_L_df = pd.DataFrame(probability_X0_pattern3_L)
stdev_X0_pattern3_L_df = pd.DataFrame(stdev_X0_pattern3_L)
probability_X0_pattern3_L_df.to_csv("probability_X0_pattern3_L.csv", encoding='utf-8')
stdev_X0_pattern3_L_df.to_csv("stdev_X0_pattern3_L.csv", encoding='utf-8')

In [None]:
# pattern4
temp_pattern_yes = []
temp_pattern_no = []

sample_number = 5
top_logprobs = 5

for i in range(lingam0_adjacency_matrix_.shape[0]):
    for j in range(lingam0_adjacency_matrix_.shape[0]):
      if i == j:
        continue
      count_yes_1 = 0
      count_no_1 = 0

      for t in range(sample_number):
        prob_yes = 0
        prob_no = 0
        response = client.chat.completions.create(
          model="gpt-4-1106-preview",
          messages=[
            {"role": "system",
             "content": system_role},
            {
             "role": "user",
             "content": causal_2nd_prompt_LiNGAM1_X_pattern4[i,j]
            }
            ],
          temperature=0.7,
          max_tokens=1500,
          logprobs = True,
          top_logprobs = 5
          )
        for m in range(top_logprobs):
          if response.choices[0].logprobs.content[0].top_logprobs[m].token == 'Yes' or response.choices[0].logprobs.content[0].top_logprobs[m].token == 'yes':
            prob_yes = prob_yes + math.e**(response.choices[0].logprobs.content[0].top_logprobs[m].logprob)
          elif response.choices[0].logprobs.content[0].top_logprobs[m].token == 'No' or response.choices[0].logprobs.content[0].top_logprobs[m].token == 'no':
            prob_no = prob_no + math.e**(response.choices[0].logprobs.content[0].top_logprobs[m].logprob)
          else:
            continue
        temp_pattern_yes.append([f"{i}", f"{j}", f"Trial {t+1}", prob_yes])
        temp_pattern_no.append([f"{i}", f"{j}", f"Trial {t+1}", prob_no])
        print("i="+str(i)+", j="+str(j)+",Trial:"+str(t+1), "p_yes:", prob_yes, "p_no:", prob_no)

columns = ['i', 'j', 'Trial', 'probability']
pattern4_yes_L_df = pd.DataFrame(temp_pattern_yes, columns=columns)
pattern4_no_L_df = pd.DataFrame(temp_pattern_no, columns=columns)
pattern4_yes_L_df.to_csv("pattern4_yes_L.csv", encoding='utf-8')
pattern4_no_L_df.to_csv("pattern4_no_L.csv", encoding='utf-8')

probability_X0_pattern4_L, stdev_X0_pattern4_L = calculate_mean_std_matrices(pattern4_yes_L_df, lingam0_adjacency_matrix_)
probability_X0_pattern4_L_df = pd.DataFrame(probability_X0_pattern4_L)
stdev_X0_pattern4_L_df = pd.DataFrame(stdev_X0_pattern4_L)
probability_X0_pattern4_L_df.to_csv("probability_X0_pattern4_L.csv", encoding='utf-8')
stdev_X0_pattern4_L_df.to_csv("stdev_X0_pattern4_L.csv", encoding='utf-8')

For Exact Search

In [None]:
# pattern1
temp_pattern_yes = []
temp_pattern_no = []

sample_number = 5
top_logprobs = 5

for i in range(dag_est_es.shape[0]):
    for j in range(dag_est_es.shape[0]):
      if i == j:
        continue
      count_yes_1 = 0
      count_no_1 = 0

      for t in range(sample_number):
        prob_yes = 0
        prob_no = 0
        response = client.chat.completions.create(
          model="gpt-4-1106-preview",
          messages=[
            {"role": "system",
             "content": system_role},
            {
             "role": "user",
             "content": causal_2nd_prompt_ES_X_pattern1[i,j]
            }
            ],
          temperature=0.7,
          max_tokens=1500,
          logprobs = True,
          top_logprobs = 5
          )
        for m in range(top_logprobs):
          if response.choices[0].logprobs.content[0].top_logprobs[m].token == 'Yes' or response.choices[0].logprobs.content[0].top_logprobs[m].token == 'yes':
            prob_yes = prob_yes + math.e**(response.choices[0].logprobs.content[0].top_logprobs[m].logprob)
          elif response.choices[0].logprobs.content[0].top_logprobs[m].token == 'No' or response.choices[0].logprobs.content[0].top_logprobs[m].token == 'no':
            prob_no = prob_no + math.e**(response.choices[0].logprobs.content[0].top_logprobs[m].logprob)
          else:
            continue
        temp_pattern_yes.append([f"{i}", f"{j}", f"Trial {t+1}", prob_yes])
        temp_pattern_no.append([f"{i}", f"{j}", f"Trial {t+1}", prob_no])
        print("i="+str(i)+", j="+str(j)+",Trial:"+str(t+1), "p_yes:", prob_yes, "p_no:", prob_no)

columns = ['i', 'j', 'Trial', 'probability']
pattern1_yes_E_df = pd.DataFrame(temp_pattern_yes, columns=columns)
pattern1_no_E_df = pd.DataFrame(temp_pattern_no, columns=columns)
pattern1_yes_E_df.to_csv("pattern1_yes_E.csv", encoding='utf-8')
pattern1_no_E_df.to_csv("pattern1_no_E.csv", encoding='utf-8')

probability_X0_pattern1_E, stdev_X0_pattern1_E = calculate_mean_std_matrices(pattern1_yes_E_df, dag_est_es)
probability_X0_pattern1_E_df = pd.DataFrame(probability_X0_pattern1_E)
stdev_X0_pattern1_E_df = pd.DataFrame(stdev_X0_pattern1_E)
probability_X0_pattern1_E_df.to_csv("probability_X0_pattern1_E.csv", encoding='utf-8')
stdev_X0_pattern1_E_df.to_csv("stdev_X0_pattern1_E.csv", encoding='utf-8')

In [None]:
# pattern2
temp_pattern_yes = []
temp_pattern_no = []

sample_number = 5
top_logprobs = 5

for i in range(dag_est_es.shape[0]):
    for j in range(dag_est_es.shape[0]):
      if i == j:
        continue
      count_yes_1 = 0
      count_no_1 = 0

      for t in range(sample_number):
        prob_yes = 0
        prob_no = 0
        response = client.chat.completions.create(
          model="gpt-4-1106-preview",
          messages=[
            {"role": "system",
             "content": system_role},
            {
             "role": "user",
             "content": causal_2nd_prompt_ES_X_pattern2[i,j]
            }
            ],
          temperature=0.7,
          max_tokens=1500,
          logprobs = True,
          top_logprobs = 5
          )
        for m in range(top_logprobs):
          if response.choices[0].logprobs.content[0].top_logprobs[m].token == 'Yes' or response.choices[0].logprobs.content[0].top_logprobs[m].token == 'yes':
            prob_yes = prob_yes + math.e**(response.choices[0].logprobs.content[0].top_logprobs[m].logprob)
          elif response.choices[0].logprobs.content[0].top_logprobs[m].token == 'No' or response.choices[0].logprobs.content[0].top_logprobs[m].token == 'no':
            prob_no = prob_no + math.e**(response.choices[0].logprobs.content[0].top_logprobs[m].logprob)
          else:
            continue
        temp_pattern_yes.append([f"{i}", f"{j}", f"Trial {t+1}", prob_yes])
        temp_pattern_no.append([f"{i}", f"{j}", f"Trial {t+1}", prob_no])
        print("i="+str(i)+", j="+str(j)+",Trial:"+str(t+1), "p_yes:", prob_yes, "p_no:", prob_no)

columns = ['i', 'j', 'Trial', 'probability']
pattern2_yes_E_df = pd.DataFrame(temp_pattern_yes, columns=columns)
pattern2_no_E_df = pd.DataFrame(temp_pattern_no, columns=columns)
pattern2_yes_E_df.to_csv("pattern2_yes_E.csv", encoding='utf-8')
pattern2_no_E_df.to_csv("pattern2_no_E.csv", encoding='utf-8')

probability_X0_pattern2_E, stdev_X0_pattern2_E = calculate_mean_std_matrices(pattern2_yes_E_df, dag_est_es)
probability_X0_pattern2_E_df = pd.DataFrame(probability_X0_pattern2_E)
stdev_X0_pattern2_E_df = pd.DataFrame(stdev_X0_pattern2_E)
probability_X0_pattern2_E_df.to_csv("probability_X0_pattern2_E.csv", encoding='utf-8')
stdev_X0_pattern2_E_df.to_csv("stdev_X0_pattern2_E.csv", encoding='utf-8')

For PC

In [None]:
# pattern1
temp_pattern_yes = []
temp_pattern_no = []

sample_number = 5
top_logprobs = 5

for i in range(dag_est_pc.shape[0]):
    for j in range(dag_est_pc.shape[0]):
      if i == j:
        continue
      count_yes_1 = 0
      count_no_1 = 0

      for t in range(sample_number):
        prob_yes = 0
        prob_no = 0
        response = client.chat.completions.create(
          model="gpt-4-1106-preview",
          messages=[
            {"role": "system",
             "content": system_role},
            {
             "role": "user",
             "content": causal_2nd_prompt_PC_X_pattern1[i,j]
            }
            ],
          temperature=0.7,
          max_tokens=1500,
          logprobs = True,
          top_logprobs = 5
          )
        for m in range(top_logprobs):
          if response.choices[0].logprobs.content[0].top_logprobs[m].token == 'Yes' or response.choices[0].logprobs.content[0].top_logprobs[m].token == 'yes':
            prob_yes = prob_yes + math.e**(response.choices[0].logprobs.content[0].top_logprobs[m].logprob)
          elif response.choices[0].logprobs.content[0].top_logprobs[m].token == 'No' or response.choices[0].logprobs.content[0].top_logprobs[m].token == 'no':
            prob_no = prob_no + math.e**(response.choices[0].logprobs.content[0].top_logprobs[m].logprob)
          else:
            continue
        temp_pattern_yes.append([f"{i}", f"{j}", f"Trial {t+1}", prob_yes])
        temp_pattern_no.append([f"{i}", f"{j}", f"Trial {t+1}", prob_no])
        print("i="+str(i)+", j="+str(j)+",Trial:"+str(t+1), "p_yes:", prob_yes, "p_no:", prob_no)

columns = ['i', 'j', 'Trial', 'probability']
pattern1_yes_P_df = pd.DataFrame(temp_pattern_yes, columns=columns)
pattern1_no_P_df = pd.DataFrame(temp_pattern_no, columns=columns)
pattern1_yes_P_df.to_csv("pattern1_yes_P.csv", encoding='utf-8')
pattern1_no_P_df.to_csv("pattern1_no_P.csv", encoding='utf-8')

probability_X0_pattern1_P, stdev_X0_pattern1_P = calculate_mean_std_matrices(pattern1_yes_P_df, dag_est_pc)
probability_X0_pattern1_P_df = pd.DataFrame(probability_X0_pattern1_P)
stdev_X0_pattern1_P_df = pd.DataFrame(stdev_X0_pattern1_P)
probability_X0_pattern1_P_df.to_csv("probability_X0_pattern1_P.csv", encoding='utf-8')
stdev_X0_pattern1_P_df.to_csv("stdev_X0_pattern1_P.csv", encoding='utf-8')

In [None]:
# pattern2
temp_pattern_yes = []
temp_pattern_no = []

sample_number = 5
top_logprobs = 5

for i in range(dag_est_pc.shape[0]):
    for j in range(dag_est_pc.shape[0]):
      if i == j:
        continue
      count_yes_1 = 0
      count_no_1 = 0

      for t in range(sample_number):
        prob_yes = 0
        prob_no = 0
        response = client.chat.completions.create(
          model="gpt-4-1106-preview",
          messages=[
            {"role": "system",
             "content": system_role},
            {
             "role": "user",
             "content": causal_2nd_prompt_PC_X_pattern2[i,j]
            }
            ],
          temperature=0.7,
          max_tokens=1500,
          logprobs = True,
          top_logprobs = 5
          )
        for m in range(top_logprobs):
          if response.choices[0].logprobs.content[0].top_logprobs[m].token == 'Yes' or response.choices[0].logprobs.content[0].top_logprobs[m].token == 'yes':
            prob_yes = prob_yes + math.e**(response.choices[0].logprobs.content[0].top_logprobs[m].logprob)
          elif response.choices[0].logprobs.content[0].top_logprobs[m].token == 'No' or response.choices[0].logprobs.content[0].top_logprobs[m].token == 'no':
            prob_no = prob_no + math.e**(response.choices[0].logprobs.content[0].top_logprobs[m].logprob)
          else:
            continue
        temp_pattern_yes.append([f"{i}", f"{j}", f"Trial {t+1}", prob_yes])
        temp_pattern_no.append([f"{i}", f"{j}", f"Trial {t+1}", prob_no])
        print("i="+str(i)+", j="+str(j)+",Trial:"+str(t+1), "p_yes:", prob_yes, "p_no:", prob_no)

columns = ['i', 'j', 'Trial', 'probability']
pattern2_yes_P_df = pd.DataFrame(temp_pattern_yes, columns=columns)
pattern2_no_P_df = pd.DataFrame(temp_pattern_no, columns=columns)
pattern2_yes_P_df.to_csv("pattern2_yes_P.csv", encoding='utf-8')
pattern2_no_P_df.to_csv("pattern2_no_P.csv", encoding='utf-8')

probability_X0_pattern2_P, stdev_X0_pattern2_P = calculate_mean_std_matrices(pattern2_yes_P_df, dag_est_pc)
probability_X0_pattern2_P_df = pd.DataFrame(probability_X0_pattern2_P)
stdev_X0_pattern2_P_df = pd.DataFrame(stdev_X0_pattern2_P)
probability_X0_pattern2_P_df.to_csv("probability_X0_pattern2_P.csv", encoding='utf-8')
stdev_X0_pattern12_P_df.to_csv("stdev_X0_pattern2_P.csv", encoding='utf-8')