In [None]:
import openai
import pandas as pd
import re
import random
from llm_response import (
    OpenAILLMInvoker,
    LLMResponse,
    OpenAILLMConfig,
    AnthropicLLMConfig,
    AnthropicLLMInvoker,
)
import matplotlib.pyplot as plt
import random
import numpy as np
import anthropic
import scipy.stats as stats



In [None]:
import time
import openai

def safe_api_call(openai, model, messages, stream=True, max_retries=20, initial_delay=2, **kwargs):
    for attempt in range(max_retries):
        try:
            # Pass additional kwargs to the API call, which can include 'temperature'
            response = openai.chat.completions.create(
                model=model,
                messages=messages,
                stream=stream,
                **kwargs  # This unpacks any additional arguments you've passed, such as 'temperature'
            )
            return response
        except Exception as e:
            print(f"API Connection Error on attempt {attempt + 1}: {e}. Retrying after {initial_delay} seconds...")
            time.sleep(15)
            #initial_delay *= 4  # Exponential backoff
    raise Exception("All API call attempts failed.")

In [None]:
#structural intervention distance (comparing difference between how intervention at one node changes subsequent nodes) adapted for chain graphs (max is 45, comes from 9+8+7+6.....1)

def compute_extended_sid(graph1, graph2):
    try:
        graph1 = re.split(r' \-> | - ', graph1)
    except:
        pass
    try:
        graph2 = re.split(r' \-> | - ', graph2)
    except:
        pass

    # Function to generate a dictionary where each key is a node and its value is the set of nodes that can be reached from it
    def generate_reachability_dict(graph):
        reachability = {}
        for i in range(len(graph)):
            reachability[graph[i]] = set(graph[j] for j in range(i+1, len(graph)))
        return reachability
    
    reach1 = generate_reachability_dict(graph1)
    reach2 = generate_reachability_dict(graph2)
    
    # Identify the set of common nodes in both graphs
    #common_nodes = set(graph1) & set(graph2)

    # Remove nodes not in both graphs
    #reach1 = {node: reach1[node] for node in common_nodes}
    #reach2 = {node: reach2[node] for node in common_nodes}
    
    # Calculate SID by comparing the differences in reachability for each node between the two graphs
    sid = 0
    for node in set(graph1) | set(graph2):  # Union of nodes in both graphs
        affected_nodes1 = reach1.get(node, set())
        affected_nodes2 = reach2.get(node, set())
        
        # The difference in affected nodes for interventions at the same node between the two graphs
        sid += len(affected_nodes1.difference(affected_nodes2))
    return (sid, sid/sum(range(1, len(set(graph1) | set(graph2)))), len(set(graph1) | set(graph2)) - len(set(graph1))  )

# Compute the extended SID considering the impact on all subsequent nodes
#extended_sid_value = compute_extended_sid(listSameLLM2, listTenE)
#extended_sid_value

#Contribution of picking a wrong node

In [None]:

def confidence_interval(data, confidence=0.95):
    n = len(data)
    mean = np.mean(data)
    sem = stats.sem(data)  # Standard error of the mean
    margin = sem * stats.t.ppf((1 + confidence) / 2., n-1)  # T-distribution critical value
    return mean, mean - margin, mean + margin

In [1]:
#This is a test of parametric knowledge (causal v anticausal)
def parametric_test(causal=False,chain = False, context = False, narr_graph = False):
    OeventList = []
    OtenEList = []
    OsameLLM1List = []
    Oacc_list = []
    Ocon_list = []
    Oint_diff =[]
    Onarr_list =[]


    num_iters = 10
    i=0
    counter = 0 

    #First loop is for chain length
    for j in range(4,22,4):
        
        numItems = j
        print("_______________________________________________:",j)
        #Tracker Variables
        eventList = []
        tenEList = []
        sameLLM1List = []
        acc_list = []
        con_list = []
        int_diff = []
        narr_list = []
        for counter in range(num_iters):
            
            #Creating Seed events 
            if (causal==True):
                stream = safe_api_call(
                openai=openai,
                model="gpt-4",
                messages=[{"role": "user", "content": "generate 30 events that cause each other (for example the first event causes the second and the second causes the third)"}],
                stream=True
            )
            else:
                stream = safe_api_call(
                openai=openai,
                model="gpt-4",
                messages=[{"role": "user", "content": "generate 30 events that are anticausal, for example the first event could be cancer and the second event could be a longer life " +
                           "because in reality, cancer causes a shorter life. Make it so each of the 30 events are anticausal in this way such that the next event is actually the opposite of what it should be " +
                           "For 3 events we might have 1.Rain 2.Plants Die 3.Increased Oxygen"}],
                stream=True
            )

            events = ""
            for a, chunk in enumerate(stream):
                if (chunk.choices[0].delta.content is not None):
                    events += chunk.choices[0].delta.content
            
            result_list = [re.sub(r'^\d+\.\s*', '', line.strip()) for line in events.split('\n')]
            result_list
            list1 = result_list[::2]  # First, third, fifth, etc. (odd-indexed)
            list2 = result_list[1::2] # Second, fourth, sixth, etc. (even-indexed)

            new_list = list1[:int(numItems/2)] + list2[:int(numItems/2)]

            
            #Creating Ground Truth Graph (Forward)
            stream = safe_api_call(
                openai = openai,
            model="gpt-4",
            messages=[{"role": "user", "content":  "context is " +str(new_list) + " generate a causal chain graph connecting them (dont include numbers with the items)."+
                    "The output should be in the form of a chain graph with only items from " +str(new_list) +" , the output should have "+str(numItems)+ " nodes in it that sequentially connected with edges, seperate nodes with a  ->"}],
            stream=True,
            temperature=0
            )
            tenE = ""
            for b, chunk in enumerate(stream):
                if (chunk.choices[0].delta.content is not None):
                    tenE += chunk.choices[0].delta.content
                    
            list_tenE= re.split(r' \-> | - ', tenE)
            if (len(list_tenE) != numItems):
                pass
            #print(list_tenE)
            #Creating Narrative
            stream = safe_api_call(
            openai = openai,
            model="gpt-4",
            messages=[{"role": "user", "content":  "context is" +tenE +"generate a hypothetical narrative from this causal chain graph and make causal relations explicit, even when the causal relations " +
                       "do not make sense, keep the causal relations as they were in the context. The events in the story should occur in the same order as in the chain graph (eg first item in chain graph should appear in narrative before the second item)"}],
            stream=True,
            )
            originalNarrative = ""
            for c, chunk in enumerate(stream):
                if (chunk.choices[0].delta.content is not None):
                    originalNarrative += chunk.choices[0].delta.content
            print(list_tenE)
            print(originalNarrative)
            stream = safe_api_call(
                openai = openai,
            model="gpt-4",
            messages=[{"role": "user", "content":  "generate a causal chain graph with "+str(numItems)+ " nodes from this narrative " +originalNarrative +"only using items from this list:"
                    +events+"(dont include numbers with the items), the output should have "+str(numItems)+ " nodes in it that sequentially connected with edges, seperate nodes with a  -> "}],
            stream=True,
            )
            sameLLM1 = ""
            for d, chunk in enumerate(stream):
                if (chunk.choices[0].delta.content is not None):
                    sameLLM1 += chunk.choices[0].delta.content
        
            list_SameLLM1 = re.split(r' \-> | - ', sameLLM1)
            #print("1")
            #print(list_SameLLM1)
            unique_nodes = list(set(list_SameLLM1) | set(list_tenE))

            truth_list = [] 
            consistency_list = []
            query_list = []
            for i in range(0,10):
                try:
                    int1= random.randint(0,int(numItems/2)+1)
                    #print(int1) 
                    int2 = int1 + int(numItems/2)
                    #print(int2) 
                    uniNodesEvent1 = list_tenE[int1]
                    uniNodesEvent2 = list_tenE[int2]
                    query_list.append((uniNodesEvent1,uniNodesEvent2 ))

                  
                    if int1 < int2:
                        truth_list.append(2)
                    else:
                        truth_list.append(1)
                    
                    try:
                        index1 = list_SameLLM1.index(uniNodesEvent1)
                    except:
                        index1 = -1
                    try:
                        index2= list_SameLLM1.index(uniNodesEvent2)
                    except:
                        index2= -1

                    if index1 < index2:
                        consistency_list.append(2)
                    else:
                        consistency_list.append(1)
                except:
                    pass
           
                        #print("Point 2")
           
            
            print(query_list)

            def extract_answer(text):
                # Regular expression to match content between <answer> and </answer> tags
                match = re.search(r'<answer>(Yes|No)</answer>', text)
                if match:
                    return match.group(1)  # Returns either 'Yes' or 'No'
                else:
                    return None  # If no match is found
               
            comp = []
            for item in query_list:
                if (chain == True):
                    q =   "Use this narrative "+ originalNarrative + " as context Did "+ item[0 ]+ " cause " + item[1] +\
                    " ? Do step by step reasoning. Then output your answer with <answer>Yes/No</answer>"
                elif (context==True):
                    q =  "Use this narrative "+ originalNarrative+\
                " as context. Did "+ item[0]+ " cause " + item[1] +\
                    " ? Output your answer with <answer>Yes/No</answer>. The cause can be direct or indirect." +\
                    "An example narrative would be: Rains leads to plants growing. This then causes increased oxygen in the atmosphere. A potential question would be does Rain cause increased oxygen in the atmosphere? The answer would be Yes." +\
                    "Another example narrative would be: Increased oxygen in the atmosphere is because of plants growing. Plants grow because rain provides them essential nutrients. A potential question would be does Rain cause increased oxygen in the atmosphere? The answer would be Yes." +\
                    "Another example narrative would be: Rain leads plants to grow. Plants growing causes less oxygen in the atmosphere. A potential question would be does Rain cause less oxygen in the atmosphere? The answer would be Yes."
                elif (narr_graph == True):
                    q =    "Use this narrative "+ originalNarrative + " and this causal ordering " +  str(sameLLM1) +\
                      "(such that each item is a cause of every item after it, for example the first list item is a cause of the third, fourth, fifth items etc)" +\
                " as context. Did "+ item[0]+ " cause " + item[1] +\
                    " ? Output only <answer>Yes/No</answer>. The cause can be direct or indirect"
                else:
                    q =    "Use this narrative "+ originalNarrative + " as context. Did "+ item[0 ]+ " cause " + item[1] +\
                    "? Output only <answer>Yes/No</answer>. The cause can be indirect or direct"  
                stream = safe_api_call(
                        openai = openai,
                model="gpt-4",
                messages=[{"role": "user", "content": q }],
                stream=True,
                )

                #print(q)
                answer = ""
                for f, chunk in enumerate(stream):
                    if (chunk.choices[0].delta.content is not None):
                        answer += chunk.choices[0].delta.content
                print(answer)
                comp.append(extract_answer(answer))
                
     

            try:
                print(comp)
                #print("Point 3")
                qList =  [x for x in comp if x!=""]
                qList =  [x for x in qList if x!="."]
                #print(qList)
                dictt = {"Yes":2, "No":1,"yes":2, "no":1}
                qList = [dictt[x] for x in qList]
                qList =[int(x) for x in qList]
                print(qList)
                print(truth_list)
                acc = sum(1 for x, y in zip(truth_list, qList) if x == y)/len(qList)
                con_score = sum(1 for x, y in zip(consistency_list, qList) if x == y)/len(qList)
                #consistency = sum(1 for x, y in zip(consistency_list, qList) if x == y)
                #con_score = consistency/len(qList) 
                con_list.append(con_score)
                acc_list.append(acc)
                eventList.append(events)
                tenEList.append(tenE)
                print("counter", counter)
                int_diff.append(int2-int1)
                narr_list.append(originalNarrative)
                sameLLM1List.append(list_SameLLM1)
            except:
                pass
        OeventList.append(eventList)
        OtenEList.append(tenEList)
        Oint_diff.append(int_diff)
        Ocon_list.append(con_list)
        Oacc_list.append(acc_list)
        Onarr_list.append(narr_list)
        OsameLLM1List.append(sameLLM1List)
    
    return (OeventList, OtenEList, Oacc_list, Oint_diff,Ocon_list,Onarr_list, OsameLLM1List )

In [None]:
#This test topological ordering
def topological(reverse= False, forward = False, chain = False, context = False, narr_graph = False)):
    OeventList = []
    OtenEList = []
    OsameLLM1List = []
    Oacc_list = []
    Ocon_list = []
    Oint_diff =[]
    Onarr_list =[]



    num_iters = 10
    i=0
    counter = 0 

    #First loop is for chain length
    for j in range(4,22,4):
        
        numItems = j
        print("_______________________________________________:",j)
        #Tracker Variables
        eventList = []
        tenEList = []
        sameLLM1List = []
        acc_list = []
        con_list = []
        int_diff = []
        narr_list = []
        for counter in range(num_iters):
            
            stream = safe_api_call(
                openai=openai,
                model="gpt-4",
                messages=[{"role": "user", "content": "generate 100 random distinct events"}],
                stream=True
            )
            events = ""
            for a, chunk in enumerate(stream):
                if (chunk.choices[0].delta.content is not None):
                    events += chunk.choices[0].delta.content
            
            #Creating Ground Truth Graph (Forward)
            stream = safe_api_call(
                openai = openai,
            model="gpt-3.5-turbo",
            messages=[{"role": "user", "content":  "context is" +events +"randomly pick"+str(numItems)+ "items from this list and generate a causal chain graph connecting them (dont include numbers with the items)."+
                    "The output should be in the form of a chain graph with only items from the 100 items, the output should have "+str(numItems)+ " nodes in it that sequentially connected with edges, seperate nodes with a  ->"}],
            stream=True,
            temperature=0
            )
            tenE = ""
            for b, chunk in enumerate(stream):
                if (chunk.choices[0].delta.content is not None):
                    tenE += chunk.choices[0].delta.content

            if (reverse==True):
            #Creating Reverse Narrative
                stream = safe_api_call(
                openai = openai,
                model="gpt-4",
                messages=[{"role": "user", "content":  "context is" +tenE +"generate a hypothetical narrative from this causal chain graph and make causal relations explicit, even when the causal relations " +
                       "do not make sense, keep the causal relations as they were in the context. The events in the story should occur in the opposite order as in the chain graph (eg last item in chain graph should appear first in the  narrative etc)"}],
                stream=True,
                )
                originalNarrative = ""
                for c, chunk in enumerate(stream):
                    if (chunk.choices[0].delta.content is not None):
                        originalNarrative += chunk.choices[0].delta.content
            
            if (forward==True):
                #Creating Narrative
                stream = safe_api_call(
                openai = openai,
                model="gpt-4",
                messages=[{"role": "user", "content":  "context is" +tenE +"generate a hypothetical narrative from this causal chain graph and make causal relations explicit, even when the causal relations " +
                       "do not make sense, keep the causal relations as they were in the context. The events in the story should occur in the same as in the chain graph (eg first item in chain graph should appear in narrative before the second item)"}],
                stream=True,
                )
                originalNarrative = ""
                for c, chunk in enumerate(stream):
                    if (chunk.choices[0].delta.content is not None):
                        originalNarrative += chunk.choices[0].delta.content


            stream = safe_api_call(
                openai = openai,
            model="gpt-4",
            messages=[{"role": "user", "content":  "generate a causal chain graph with "+str(numItems)+ " nodes from this narrative " +originalNarrative +"only using items from this list:"
                    +events+"(dont include numbers with the items), the output should have "+str(numItems)+ " nodes in it that sequentially connected with edges, seperate nodes with a  -> "}],
            stream=True,
            )
            sameLLM1 = ""
            for d, chunk in enumerate(stream):
                if (chunk.choices[0].delta.content is not None):
                    sameLLM1 += chunk.choices[0].delta.content


            list_tenE = re.split(r' \-> | - ', tenE)
            list_SameLLM1 = re.split(r' \-> | - ', sameLLM1)
            #print("1")
            #print(list_SameLLM1)
            unique_nodes = list(set(list_SameLLM1) | set(list_tenE))

            truth_list = [] 
            consistency_list = []
            query_list = []


            
                        

            
            for i in range(0,10):
                try:
                    int1, int2 = random.sample(range(0, len(list_tenE)), 2)  
                    uniNodesEvent1 = list_tenE[int1]
                    uniNodesEvent2 = list_tenE[int2]
                    query_list.append((uniNodesEvent1,uniNodesEvent2 ))
    
                    if int1 < int2:
                        truth_list.append(2)
                    else:
                        truth_list.append(1)
                    
                    try:
                        index1 = list_SameLLM1.index(uniNodesEvent1)
                    except:
                        index1 = -1
                    try:
                        index2= list_SameLLM1.index(uniNodesEvent2)
                    except:
                        index2= -1

                    if index1 < index2:
                        consistency_list.append(2)
                    else:
                        consistency_list.append(1)
                except:
                    pass
           
                        #print("Point 2")
           
            print(query_list)

            def extract_answer(text):
                # Regular expression to match content between <answer> and </answer> tags
                match = re.search(r'<answer>(Yes|No)</answer>', text)
                if match:
                    return match.group(1)  # Returns either 'Yes' or 'No'
                else:
                    return None  # If no match is found
                               
            comp = []
            for item in query_list:
                if (chain == True):
                    q =   "Use this narrative "+ originalNarrative + " as context Did "+ item[0 ]+ " cause " + item[1] +\
                    " ? Do step by step reasoning. Then output your answer with <answer>Yes/No</answer>. The cause can be direct or indirect."
                elif (context==True):
                    q =  "Use this narrative "+ originalNarrative+\
                " as context. Did "+ item[0]+ " cause " + item[1] +\
                    " ? Output your answer with <answer>Yes/No</answer>. The cause can be direct or indirect." +\
                    "An example narrative would be: Rains leads to plants growing. This then causes increased oxygen in the atmosphere. A potential question would be does Rain cause increased oxygen in the atmosphere? The answer would be Yes." +\
                    "Another example narrative would be: Increased oxygen in the atmosphere is because of plants growing. Plants grow because rain provides them essential nutrients. A potential question would be does Rain cause increased oxygen in the atmosphere? The answer would be Yes." +\
                    "Another example narrative would be: Rain leads plants to grow. Plants growing causes less oxygen in the atmosphere. A potential question would be does Rain cause less oxygen in the atmosphere? The answer would be Yes."
                elif (narr_graph == True):
                    q =    "Use this narrative "+ originalNarrative + " and this causal ordering " +  str(sameLLM1) +\
                      "(such that each item is a cause of every item after it, for example the first list item is a cause of the third, fourth, fifth items etc)" +\
                " as context. Did "+ item[0]+ " cause " + item[1] +\
                    " ? Output only <answer>Yes/No</answer>. The cause can be direct or indirect"
                else:
                    q =    "Use this narrative "+ originalNarrative + " as context. Did "+ item[0 ]+ " cause " + item[1] +\
                    "? Output only <answer>Yes/No</answer>. The cause can be indirect or direct"  
                stream = safe_api_call(
                        openai = openai,
                model="gpt-4",
                messages=[{"role": "user", "content": q }],
                stream=True,
                )

                #print(q)
                answer = ""
                for f, chunk in enumerate(stream):
                    if (chunk.choices[0].delta.content is not None):
                        answer += chunk.choices[0].delta.content
                print(answer)
                comp.append(extract_answer(answer))
                
     

            try:
                print(comp)
                #print("Point 3")
                qList =  [x for x in comp if x!=""]
                qList =  [x for x in qList if x!="."]
                #print(qList)
                dictt = {"Yes":2, "No":1,"yes":2, "no":1}
                qList = [dictt[x] for x in qList]
                qList =[int(x) for x in qList]
                print(qList)
                print(truth_list)
                acc = sum(1 for x, y in zip(truth_list, qList) if x == y)/len(qList)
                con_score = sum(1 for x, y in zip(consistency_list, qList) if x == y)/len(qList)
                #consistency = sum(1 for x, y in zip(consistency_list, qList) if x == y)
                #con_score = consistency/len(qList) 
                con_list.append(con_score)
                acc_list.append(acc)
                eventList.append(events)
                tenEList.append(tenE)
                print("counter", counter)
                int_diff.append(int2-int1)
                narr_list.append(originalNarrative)
                sameLLM1List.append(list_SameLLM1)
            except:
                pass
        OeventList.append(eventList)
        OtenEList.append(tenEList)
        Oint_diff.append(int_diff)
        Ocon_list.append(con_list)
        Oacc_list.append(acc_list)
        Onarr_list.append(narr_list)
        OsameLLM1List.append(sameLLM1List)
    
    return (OeventList, OtenEList, Oacc_list, Oint_diff,Ocon_list,Onarr_list, OsameLLM1List )