In [1]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer, WordNetLemmatizer


def findOccurrence(sentence, word): #find the occurrence of a given word by stemming and lemmatizing the word and the given text sequence 
    tokens = word_tokenize(sentence.lower()) #tokenize the text sequence
    stemmer = PorterStemmer() 
    lemmatizer = WordNetLemmatizer()
    stemmed = [stemmer.stem(token) for token in tokens] #stem each token 
    lemmatized = [lemmatizer.lemmatize(token) for token in tokens]   #lemmatie each token
    word = stemmer.stem(word) #stem the word to search for in the text

    if word in tokens or word in stemmed or word in lemmatized: #if the stemmed word is found with any method
        return True
    else:
        return False

In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig, LlamaTokenizer, LlamaForCausalLM
import transformers
import torch

def generate_prompt(instruction: str, input_ctxt: str = None) -> str: # boilerplate provided by alpaca models
    if input_ctxt:
        return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
        ### Instruction:
        {instruction}
        ### Input:
        {input_ctxt}
      	### Response:"""
    else:
      return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
    	 ### Instruction:
      {instruction}
       ### Response:"""


model = "/bigwork/nhwpshaa/Fine-tuned AlpacaNative/Few Epochs/3 Epochs/checkpoint-760" #load the model
tokenizer = LlamaTokenizer.from_pretrained("/bigwork/nhwpshaa/alpaca-native/") #load the tokenizer
pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.bfloat16,
    trust_remote_code=True,
    device_map="auto",
)

You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama.LlamaTokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [3]:
import csv


class effect:
    def __init__(self, name):
        self.name = name
        self.recall = False


class causalrelation:
    def __init__(self, cause):
        self.cause = cause
        self.effect= []
        self.effectRecallCount = 0
        self.recall = False

causalrelations = []

with open('Experiments/Section 5.1: Causal Relationships Extraction/Extracted Causal Relationship/cause.csv', 'r') as file:
    csvfile = csv.DictReader(file)
    for row in csvfile:
        flag = False
        for i in causalrelations:
            if row['effect'] == i.cause: #collect all causes
                newEffect = effect(row['cause']) #initialize as effect
                i.effect.append(newEffect) 
                flag = True
        if flag == False: #if not yet initialized
            newCR = causalrelation(row['effect']) #initialize a new causal relationship
            newEffect = effect(row['cause']) #initialize a new effect
            newCR.effect.append(newEffect)
            causalrelations.append(newCR)


In [4]:
input_ctxt = "What are the effects of <mask>?" #check for every question type


instruction ="Answer the following question to the best of your knowledge"
k = 0
for i in causalrelations:
    i.effectRecallCount = 0 # reset the count for each new effect
    input_ctxt = "What are the effects of <mask>?"
    input_ctxt = input_ctxt.replace("<mask>", i.cause) #replace the mask with the actual cause
    sequences = pipeline(
        generate_prompt(instruction, input_ctxt),
        max_length=160,
        do_sample=True,
        top_k=50,
        num_return_sequences=4,
        eos_token_id=tokenizer.eos_token_id,
    )
    for singleEffect in i.effect:
        singleEffect.recall = False #reset the flag for each effect
        for seq in sequences:
            if findOccurrence(seq['generated_text'], singleEffect.name):
                i.recall = True #set the flag when the effect is found
                singleEffect.recall = True # set the flag when the effect is found
                i.effectRecallCount += 1 #increment the number of effects found
                break #move on to the next effect

k=0
for relations in causalrelations:
    if relations.recall == True: #calculate the total recalled topics
        k += 1

print(f"Total recalled topics: {k}")
print(f"Total topics: {len(causalrelations)}")
with open("/bigwork/nhwpshaa/Fine-tuned AlpacaNative/Few Epochs/3 Epochs/AlpacaStatsWhat.csv", 'w', newline='') as writefile:        #adjust the path to write the file
    fieldnames = ['cause', 'total effects', 'recalled effects']
    writer = csv.DictWriter(writefile, fieldnames=fieldnames)
    writer.writeheader()
    for printvalues in causalrelations:
        recallcount= 0
        print(f"Total no. of effects: {len(printvalues.effect)}")
        for oneEffect in printvalues.effect:
            if oneEffect.recall == True: #calculate the total recalled effects
                recallcount += 1
        print(f"Total recalled: {recallcount}")
        writer.writerow({'cause': printvalues.cause, 'total effects': len(printvalues.effect), 'recalled effects': recallcount})
        for values in printvalues.effect:
            print(f"Cause: {printvalues.cause} Effect: {values.name} Recalled(Effect): {values.recall}")

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


Total recalled topics: 7
Total topics: 38
Total no. of effects: 5
Total recalled: 0
Cause: super bowl xxxviii Effect: delhomme Recalled(Effect): False
Cause: super bowl xxxviii Effect: morgan was also an integral part of the panthers defense Recalled(Effect): False
Cause: super bowl xxxviii Effect: fox Recalled(Effect): False
Cause: super bowl xxxviii Effect: 11 – 5 record Recalled(Effect): False
Cause: super bowl xxxviii Effect: super bowl — fox Recalled(Effect): False
Total no. of effects: 1
Total recalled: 0
Cause: olympics Effect: romney donated to charity the $ 1.4 million in salary and severance payments Recalled(Effect): False
Total no. of effects: 1
Total recalled: 0
Cause: golf Effect: well-struck shot Recalled(Effect): False
Total no. of effects: 28
Total recalled: 1
Cause: slavery Effect: poverty Recalled(Effect): True
Cause: slavery Effect: economic history of the guinea coast Recalled(Effect): False
Cause: slavery Effect: adverse impact Recalled(Effect): False
Cause: slave

In [5]:
instruction ="Answer the following question to the best of your knowledge"
k = 0
for i in causalrelations:
    i.effectRecallCount = 0
    input_ctxt = "What impacts does <mask> have?"
    input_ctxt = input_ctxt.replace("<mask>", i.cause)
    sequences = pipeline(
        generate_prompt(instruction, input_ctxt),
        max_length=160,
        do_sample=True,
        top_k=50,
        num_return_sequences=4,
        eos_token_id=tokenizer.eos_token_id,
    )
    for singleEffect in i.effect:
        singleEffect.recall = False
        for seq in sequences:
            if findOccurrence(seq['generated_text'], singleEffect.name):
                i.recall = True
                singleEffect.recall = True
                i.effectRecallCount += 1
                #k += 1
                break

k=0
for relations in causalrelations:
    if relations.recall == True:
        k += 1

print(f"Total recalled topics: {k}")
print(f"Total topics: {len(causalrelations)}")
with open("/bigwork/nhwpshaa/Fine-tuned AlpacaNative/Few Epochs/3 Epochs/AlpacaStatsPrecursorWhat.csv", 'a', newline='') as writefile:
    fieldnames = ['cause', 'total effects', 'recalled effects']
    writer = csv.DictWriter(writefile, fieldnames=fieldnames)
    writer.writeheader()
    for printvalues in causalrelations:
        recallcount= 0
        print(f"Total no. of effects: {len(printvalues.effect)}")
        for oneEffect in printvalues.effect:
            if oneEffect.recall == True:
                recallcount += 1
        print(f"Total recalled: {recallcount}")
        writer.writerow({'cause': printvalues.cause, 'total effects': len(printvalues.effect), 'recalled effects': recallcount})
        for values in printvalues.effect:
            print(f"Cause: {printvalues.cause} Effect: {values.name} Recalled(Effect): {values.recall}")

Total recalled topics: 10
Total topics: 38
Total no. of effects: 5
Total recalled: 0
Cause: super bowl xxxviii Effect: delhomme Recalled(Effect): False
Cause: super bowl xxxviii Effect: morgan was also an integral part of the panthers defense Recalled(Effect): False
Cause: super bowl xxxviii Effect: fox Recalled(Effect): False
Cause: super bowl xxxviii Effect: 11 – 5 record Recalled(Effect): False
Cause: super bowl xxxviii Effect: super bowl — fox Recalled(Effect): False
Total no. of effects: 1
Total recalled: 0
Cause: olympics Effect: romney donated to charity the $ 1.4 million in salary and severance payments Recalled(Effect): False
Total no. of effects: 1
Total recalled: 0
Cause: golf Effect: well-struck shot Recalled(Effect): False
Total no. of effects: 28
Total recalled: 0
Cause: slavery Effect: poverty Recalled(Effect): False
Cause: slavery Effect: economic history of the guinea coast Recalled(Effect): False
Cause: slavery Effect: adverse impact Recalled(Effect): False
Cause: sla

In [None]:
instruction ="Answer the following question to the best of your knowledge"
k = 0
for i in causalrelations:
    i.effectRecallCount = 0
    input_ctxt = "What effect does <mask> have?"
    input_ctxt = input_ctxt.replace("<mask>", i.cause)
    sequences = pipeline(
        generate_prompt(instruction, input_ctxt),
        max_length=160,
        do_sample=True,
        top_k=50,
        num_return_sequences=4,
        eos_token_id=tokenizer.eos_token_id,
    )
    for singleEffect in i.effect:
        singleEffect.recall = False
        for seq in sequences:
            if findOccurrence(seq['generated_text'], singleEffect.name):
                i.recall = True
                singleEffect.recall = True
                i.effectRecallCount += 1
                #k += 1
                break
                
                
k=0
for relations in causalrelations:
    if relations.recall == True:
        k += 1

print(f"Total recalled topics: {k}")
print(f"Total topics: {len(causalrelations)}")
with open("/bigwork/nhwpshaa/Fine-tuned AlpacaNative/Few Epochs/3 Epochs/AlpacaStatsPrecursorWhat.csv", 'a', newline='') as writefile:
    fieldnames = ['cause', 'total effects', 'recalled effects']
    writer = csv.DictWriter(writefile, fieldnames=fieldnames)
    for printvalues in causalrelations:
        recallcount= 0
        print(f"Total no. of effects: {len(printvalues.effect)}")
        for oneEffect in printvalues.effect:
            if oneEffect.recall == True:
                recallcount += 1
        print(f"Total recalled: {recallcount}")
        writer.writerow({'cause': printvalues.cause, 'total effects': len(printvalues.effect), 'recalled effects': recallcount})
        for values in printvalues.effect:
            print(f"Cause: {printvalues.cause} Effect: {values.name} Recalled(Effect): {values.recall}")

In [7]:
instruction ="Answer the following question to the best of your knowledge"
k = 0
for i in causalrelations:
    i.effectRecallCount = 0
    input_ctxt = "What does <mask> lead to?"
    input_ctxt = input_ctxt.replace("<mask>", i.cause)
    sequences = pipeline(
        generate_prompt(instruction, input_ctxt),
        max_length=160,
        do_sample=True,
        top_k=50,
        num_return_sequences=4,
        eos_token_id=tokenizer.eos_token_id,
    )
    for singleEffect in i.effect:
        singleEffect.recall = False
        for seq in sequences:
            if findOccurrence(seq['generated_text'], singleEffect.name):
                i.recall = True
                singleEffect.recall = True
                i.effectRecallCount += 1
                break

                
k=0
for relations in causalrelations:
    if relations.recall == True:
        k += 1

print(f"Total recalled topics: {k}")
print(f"Total topics: {len(causalrelations)}")
with open("/bigwork/nhwpshaa/Fine-tuned AlpacaNative/Few Epochs/3 Epochs/AlpacaStatsPrecursorWhat.csv", 'a', newline='') as writefile:
    fieldnames = ['cause', 'total effects', 'recalled effects']
    writer = csv.DictWriter(writefile, fieldnames=fieldnames)
    for printvalues in causalrelations:
        recallcount= 0
        print(f"Total no. of effects: {len(printvalues.effect)}")
        for oneEffect in printvalues.effect:
            if oneEffect.recall == True:
                recallcount += 1
        print(f"Total recalled: {recallcount}")
        writer.writerow({'cause': printvalues.cause, 'total effects': len(printvalues.effect), 'recalled effects': recallcount})
        for values in printvalues.effect:
            print(f"Cause: {printvalues.cause} Effect: {values.name} Recalled(Effect): {values.recall}")

Total recalled topics: 11
Total topics: 38
Total no. of effects: 5
Total recalled: 0
Cause: super bowl xxxviii Effect: delhomme Recalled(Effect): False
Cause: super bowl xxxviii Effect: morgan was also an integral part of the panthers defense Recalled(Effect): False
Cause: super bowl xxxviii Effect: fox Recalled(Effect): False
Cause: super bowl xxxviii Effect: 11 – 5 record Recalled(Effect): False
Cause: super bowl xxxviii Effect: super bowl — fox Recalled(Effect): False
Total no. of effects: 1
Total recalled: 0
Cause: olympics Effect: romney donated to charity the $ 1.4 million in salary and severance payments Recalled(Effect): False
Total no. of effects: 1
Total recalled: 0
Cause: golf Effect: well-struck shot Recalled(Effect): False
Total no. of effects: 28
Total recalled: 0
Cause: slavery Effect: poverty Recalled(Effect): False
Cause: slavery Effect: economic history of the guinea coast Recalled(Effect): False
Cause: slavery Effect: adverse impact Recalled(Effect): False
Cause: sla

In [8]:
instruction ="Answer the following question to the best of your knowledge"
k = 0
for i in causalrelations:
    i.effectRecallCount = 0
    for singleEffect in i.effect:
        singleEffect.recall = False
        input_ctxt = "What factors are associated with <mask>?"
        input_ctxt = input_ctxt.replace("<mask>", singleEffect.name)
        sequences = pipeline(
            generate_prompt(instruction, input_ctxt),
            max_length=160,
            do_sample=True,
            top_k=50,
            num_return_sequences=4,
            eos_token_id=tokenizer.eos_token_id,
        )
        for seq in sequences:
            if findOccurrence(seq['generated_text'], i.cause):
                    i.recall = True
                    singleEffect.recall = True
                    i.effectRecallCount += 1
                    #k += 1
                    break
                   
                
k=0
for relations in causalrelations:
    if relations.recall == True:
        k += 1

print(f"Total recalled topics: {k}")
print(f"Total topics: {len(causalrelations)}")
with open("/bigwork/nhwpshaa/Fine-tuned AlpacaNative/Few Epochs/3 Epochs/AlpacaStatsPrecursorWhat.csv", 'a', newline='') as writefile:
    fieldnames = ['cause', 'total effects', 'recalled effects']
    writer = csv.DictWriter(writefile, fieldnames=fieldnames)
    for printvalues in causalrelations:
        recallcount= 0
        print(f"Total no. of effects: {len(printvalues.effect)}")
        for oneEffect in printvalues.effect:
            if oneEffect.recall == True:
                recallcount += 1
        print(f"Total recalled: {recallcount}")
        writer.writerow({'cause': printvalues.cause, 'total effects': len(printvalues.effect), 'recalled effects': recallcount})
        for values in printvalues.effect:
            print(f"Cause: {printvalues.cause} Effect: {values.name} Recalled(Effect): {values.recall}")

Total recalled topics: 13
Total topics: 38
Total no. of effects: 5
Total recalled: 0
Cause: super bowl xxxviii Effect: delhomme Recalled(Effect): False
Cause: super bowl xxxviii Effect: morgan was also an integral part of the panthers defense Recalled(Effect): False
Cause: super bowl xxxviii Effect: fox Recalled(Effect): False
Cause: super bowl xxxviii Effect: 11 – 5 record Recalled(Effect): False
Cause: super bowl xxxviii Effect: super bowl — fox Recalled(Effect): False
Total no. of effects: 1
Total recalled: 0
Cause: olympics Effect: romney donated to charity the $ 1.4 million in salary and severance payments Recalled(Effect): False
Total no. of effects: 1
Total recalled: 1
Cause: golf Effect: well-struck shot Recalled(Effect): True
Total no. of effects: 28
Total recalled: 4
Cause: slavery Effect: poverty Recalled(Effect): False
Cause: slavery Effect: economic history of the guinea coast Recalled(Effect): False
Cause: slavery Effect: adverse impact Recalled(Effect): False
Cause: slav

In [9]:
instruction ="Answer the following question to the best of your knowledge"
k = 0
for i in causalrelations:
    i.effectRecallCount = 0
    input_ctxt = "What happens due to <mask>?"
    input_ctxt = input_ctxt.replace("<mask>", i.cause)
    sequences = pipeline(
        generate_prompt(instruction, input_ctxt),
        max_length=160,
        do_sample=True,
        top_k=50,
        num_return_sequences=4,
        eos_token_id=tokenizer.eos_token_id,
    )
    for singleEffect in i.effect:
        singleEffect.recall = False
        for seq in sequences:
            if findOccurrence(seq['generated_text'], singleEffect.name):
                i.recall = True
                singleEffect.recall = True
                i.effectRecallCount += 1
                break

                
k=0
for relations in causalrelations:
    if relations.recall == True:
        k += 1

print(f"Total recalled topics: {k}")
print(f"Total topics: {len(causalrelations)}")
with open("/bigwork/nhwpshaa/Fine-tuned AlpacaNative/Few Epochs/3 Epochs/AlpacaStatsPrecursorWhat.csv", 'a', newline='') as writefile:
    fieldnames = ['cause', 'total effects', 'recalled effects']
    writer = csv.DictWriter(writefile, fieldnames=fieldnames)
    for printvalues in causalrelations:
        recallcount= 0
        print(f"Total no. of effects: {len(printvalues.effect)}")
        for oneEffect in printvalues.effect:
            if oneEffect.recall == True:
                recallcount += 1
        print(f"Total recalled: {recallcount}")
        writer.writerow({'cause': printvalues.cause, 'total effects': len(printvalues.effect), 'recalled effects': recallcount})
        for values in printvalues.effect:
            print(f"Cause: {printvalues.cause} Effect: {values.name} Recalled(Effect): {values.recall}")

Total recalled topics: 13
Total topics: 38
Total no. of effects: 5
Total recalled: 0
Cause: super bowl xxxviii Effect: delhomme Recalled(Effect): False
Cause: super bowl xxxviii Effect: morgan was also an integral part of the panthers defense Recalled(Effect): False
Cause: super bowl xxxviii Effect: fox Recalled(Effect): False
Cause: super bowl xxxviii Effect: 11 – 5 record Recalled(Effect): False
Cause: super bowl xxxviii Effect: super bowl — fox Recalled(Effect): False
Total no. of effects: 1
Total recalled: 0
Cause: olympics Effect: romney donated to charity the $ 1.4 million in salary and severance payments Recalled(Effect): False
Total no. of effects: 1
Total recalled: 0
Cause: golf Effect: well-struck shot Recalled(Effect): False
Total no. of effects: 28
Total recalled: 1
Cause: slavery Effect: poverty Recalled(Effect): True
Cause: slavery Effect: economic history of the guinea coast Recalled(Effect): False
Cause: slavery Effect: adverse impact Recalled(Effect): False
Cause: slav

In [None]:
instruction ="Answer the following question to the best of your knowledge"
k = 0
for i in causalrelations:
    i.effectRecallCount = 0
    for singleEffect in i.effect:
        singleEffect.recall = False
        input_ctxt = "What causes <mask>?"
        input_ctxt = input_ctxt.replace("<mask>", singleEffect.name)
        sequences = pipeline(
            generate_prompt(instruction, input_ctxt),
            max_length=160,
            do_sample=True,
            top_k=50,
            num_return_sequences=4,
            eos_token_id=tokenizer.eos_token_id,
        )
        for seq in sequences:
            if findOccurrence(seq['generated_text'], i.cause):
                    i.recall = True
                    singleEffect.recall = True
                    i.effectRecallCount += 1
                    #k += 1
                    break
                  
                
k=0
for relations in causalrelations:
    if relations.recall == True:
        k += 1

print(f"Total recalled topics: {k}")
print(f"Total topics: {len(causalrelations)}")
with open("/bigwork/nhwpshaa/Fine-tuned AlpacaNative/Few Epochs/3 Epochs/AlpacaStatsPrecursorWhat.csv", 'a', newline='') as writefile:
    fieldnames = ['cause', 'total effects', 'recalled effects']
    writer = csv.DictWriter(writefile, fieldnames=fieldnames)
    for printvalues in causalrelations:
        recallcount= 0
        print(f"Total no. of effects: {len(printvalues.effect)}")
        for oneEffect in printvalues.effect:
            if oneEffect.recall == True:
                recallcount += 1
        print(f"Total recalled: {recallcount}")
        writer.writerow({'cause': printvalues.cause, 'total effects': len(printvalues.effect), 'recalled effects': recallcount})
        for values in printvalues.effect:
            print(f"Cause: {printvalues.cause} Effect: {values.name} Recalled(Effect): {values.recall}")

Total recalled topics: 14
Total topics: 38
Total no. of effects: 5
Total recalled: 0
Cause: super bowl xxxviii Effect: delhomme Recalled(Effect): False
Cause: super bowl xxxviii Effect: morgan was also an integral part of the panthers defense Recalled(Effect): False
Cause: super bowl xxxviii Effect: fox Recalled(Effect): False
Cause: super bowl xxxviii Effect: 11 – 5 record Recalled(Effect): False
Cause: super bowl xxxviii Effect: super bowl — fox Recalled(Effect): False
Total no. of effects: 1
Total recalled: 0
Cause: olympics Effect: romney donated to charity the $ 1.4 million in salary and severance payments Recalled(Effect): False
Total no. of effects: 1
Total recalled: 1
Cause: golf Effect: well-struck shot Recalled(Effect): True
Total no. of effects: 28
Total recalled: 3
Cause: slavery Effect: poverty Recalled(Effect): False
Cause: slavery Effect: economic history of the guinea coast Recalled(Effect): False
Cause: slavery Effect: adverse impact Recalled(Effect): False
Cause: slav

In [None]:
import random

nouns = [
    "science", "technology", "politics", "geography", "knowledge",
    "chemistry", "biology", "physics", "astronomy", "engineering",
    "computer", "internet", "software", "hardware", "algorithm",
    "government", "democracy", "election", "policy", "diplomacy",
    "continent", "country", "city", "ocean", "mountain",
    "planet", "star", "galaxy", "earthquake", "volcano",
    "invention", "discovery", "innovation", "experiment", "research",
    "energy", "environment", "climate", "weather", "ecosystem",
    "history", "culture", "civilization", "tradition", "heritage",
    "language", "literature", "education", "society", "population",
    "cancer", "Albert Einstein", "DNA", "vaccine", "robotics",
    "nanotechnology", "climate change", "sustainability", "globalization",
    "terrorism", "human rights", "economy", "finance", "trade",
    "migration", "demography", "geology", "meteorology", "biodiversity",
    "psychology", "philosophy", "artificial intelligence", "space exploration",
    "genetics", "biotechnology", "medicine", "healthcare", "nanoscience",
    "particle physics", "quantum mechanics", "cosmology", "astrophysics",
    "fossil fuels", "renewable energy", "deforestation", "urbanization",
    "archaeology", "anthropology", "linguistics", "geopolitics", "cartography",
    "sociology", "ethics", "paleontology", "pandemic", "virus"
]



input_ctxt = f"What is a result of <cause>? A){random.choice(nouns)} B){random.choice(nouns)} C) <effect>" ###redo


instruction ="Answer the following question to the best of your knowledge"
k = 0
count = 0
for i in causalrelations:
    i.effectRecallCount = 0
    for n in i.effect:
        n.recall = False
        input_ctxt = f"What is a result of <cause>? A){random.choice(nouns)} B){random.choice(nouns)} C) <effect>"
        input_ctxt = input_ctxt.replace("<effect>", n.name)
        input_ctxt = input_ctxt.replace("<cause>", i.cause)
        #print(input_ctxt)
        sequences = pipeline(
            generate_prompt(instruction, input_ctxt),
            max_length=160,
            do_sample=True,
            top_k=50,
            num_return_sequences=4,
            eos_token_id=tokenizer.eos_token_id,
        )
        for seq in sequences:
            if findOccurrence(seq['generated_text'], n.name):
                #print(f"#################################{j}##############################")
                #print(f"Result: {seq['generated_text']}")
                i.recall = True
                i.effectRecallCount += 1
                n.recall = True
                break
               
            
k=0
for relations in causalrelations:
    if relations.recall == True:
        k += 1
print(f"Total recalled topics: {k}")
print(f"Total topics: {len(causalrelations)}")
with open("/bigwork/nhwpshaa/Fine-tuned AlpacaNative/Few Epochs/3 Epochs/AlpacaStatsPreCursorMultipleChoice.csv", 'w', newline='') as writefile:
    fieldnames = ['cause', 'total effects', 'recalled effects']
    writer = csv.DictWriter(writefile, fieldnames=fieldnames)
    writer.writeheader()
    for printvalues in causalrelations:
        recallcount= 0
        print(f"Total no. of effects: {len(printvalues.effect)}")
        for oneEffect in printvalues.effect:
            if oneEffect.recall == True:
                recallcount += 1
        print(f"Total recalled: {recallcount}")
        writer.writerow({'cause': printvalues.cause, 'total effects': len(printvalues.effect), 'recalled effects': recallcount})
        for values in printvalues.effect:
            print(f"Cause: {printvalues.cause} Effect: {values.name} Recalled(Effect): {values.recall}")

Total recalled topics: 24
Total topics: 38
Total no. of effects: 5
Total recalled: 2
Cause: super bowl xxxviii Effect: delhomme Recalled(Effect): True
Cause: super bowl xxxviii Effect: morgan was also an integral part of the panthers defense Recalled(Effect): False
Cause: super bowl xxxviii Effect: fox Recalled(Effect): True
Cause: super bowl xxxviii Effect: 11 – 5 record Recalled(Effect): False
Cause: super bowl xxxviii Effect: super bowl — fox Recalled(Effect): False
Total no. of effects: 1
Total recalled: 0
Cause: olympics Effect: romney donated to charity the $ 1.4 million in salary and severance payments Recalled(Effect): False
Total no. of effects: 1
Total recalled: 0
Cause: golf Effect: well-struck shot Recalled(Effect): False
Total no. of effects: 28
Total recalled: 6
Cause: slavery Effect: poverty Recalled(Effect): True
Cause: slavery Effect: economic history of the guinea coast Recalled(Effect): False
Cause: slavery Effect: adverse impact Recalled(Effect): False
Cause: slaver

In [None]:
##Below this line

In [None]:
input_ctxt = "Can <cause> cause <effect>?"


instruction ="Answer the following question to the best of your knowledge"
k = 0
count = 0
for i in causalrelations:
    i.effectRecallCount = 0
    for singleEffect in i.effect: 
        singleEffect.recall = False
        input_ctxt = "Can <cause> cause <effect>?"
        input_ctxt = input_ctxt.replace("<effect>", singleEffect.name)
        input_ctxt = input_ctxt.replace("<cause>", i.cause)
        sequences = pipeline(
            generate_prompt(instruction, input_ctxt),
            max_length=160,
            do_sample=True,
            top_k=50,
            num_return_sequences=4,
            eos_token_id=tokenizer.eos_token_id,
        )
        for seq in sequences:
            if findOccurrence(seq['generated_text'], "yes"):
                i.recall = True
                i.effectRecallCount += 1
                singleEffect.recall = True
                #k += 1
                #print("We should break out of the loop now!")
                break

            
k=0
for relations in causalrelations:
    if relations.recall == True:
        k += 1
print(f"Total recalled topics: {k}")
print(f"Total topics: {len(causalrelations)}")
with open("/bigwork/nhwpshaa/Fine-tuned AlpacaNative/Few Epochs/3 Epochs/AlpacaStatsPreCursorYesNo.csv", 'w', newline='') as writefile:
    fieldnames = ['cause', 'total effects', 'recalled effects']
    writer = csv.DictWriter(writefile, fieldnames=fieldnames)
    writer.writeheader()
    for printvalues in causalrelations:
        recallcount= 0
        print(f"Total no. of effects: {len(printvalues.effect)}")
        for oneEffect in printvalues.effect:
            if oneEffect.recall == True:
                recallcount += 1
        print(f"Total recalled: {recallcount}")
        writer.writerow({'cause': printvalues.cause, 'total effects': len(printvalues.effect), 'recalled effects': recallcount})
        for values in printvalues.effect:
            print(f"Cause: {printvalues.cause} Effect: {values.name} Recalled(Effect): {values.recall}")

In [None]:
instruction ="Answer the following question to the best of your knowledge"
k = 0
count = 0
for i in causalrelations:
    i.effectRecallCount = 0
    for singleEffect in i.effect: 
        singleEffect.recall = False
        input_ctxt = "Does <cause> contribute to <effect>?"
        input_ctxt = input_ctxt.replace("<effect>", singleEffect.name)
        input_ctxt = input_ctxt.replace("<cause>", i.cause)
        sequences = pipeline(
            generate_prompt(instruction, input_ctxt),
            max_length=160,
            do_sample=True,
            top_k=4,
            num_return_sequences=1,
            eos_token_id=tokenizer.eos_token_id,
        )
        for seq in sequences:
            if findOccurrence(seq['generated_text'], "yes"):
                i.recall = True
                singleEffect.recall = True
                i.effectRecallCount += 1
                break
                
k=0
for relations in causalrelations:
    if relations.recall == True:
        k += 1

                
print(f"Total recalled topics: {k}")
print(f"Total topics: {len(causalrelations)}")
with open("/bigwork/nhwpshaa/Fine-tuned AlpacaNative/Few Epochs/3 Epochs/AlpacaStatsPreCursorYesNo.csv", 'a', newline='') as writefile:
    fieldnames = ['cause', 'total effects', 'recalled effects']
    writer = csv.DictWriter(writefile, fieldnames=fieldnames)
    for printvalues in causalrelations:
        recallcount= 0
        print(f"Total no. of effects: {len(printvalues.effect)}")
        for oneEffect in printvalues.effect:
            if oneEffect.recall == True:
                recallcount += 1
        print(f"Total recalled: {recallcount}")
        writer.writerow({'cause': printvalues.cause, 'total effects': len(printvalues.effect), 'recalled effects': recallcount})
        for values in printvalues.effect:
            print(f"Cause: {printvalues.cause} Effect: {values.name} Recalled(Effect): {values.recall}")

In [None]:
input_ctxt = "Does <cause> lead to <effect>?"


instruction ="Answer the following question to the best of your knowledge"
k = 0
count = 0
for i in causalrelations:
    i.effectRecallCount = 0
    for singleEffect in i.effect: 
        singleEffect.recall = False
        input_ctxt = "Does <cause> lead to <effect>?"
        input_ctxt = input_ctxt.replace("<effect>", singleEffect.name)
        input_ctxt = input_ctxt.replace("<cause>", i.cause)
        #print(input_ctxt)
        sequences = pipeline(
            generate_prompt(instruction, input_ctxt),
            max_length=160,
            do_sample=True,
            top_k=4,
            num_return_sequences=1,
            eos_token_id=tokenizer.eos_token_id,
        )
        for seq in sequences:
            if findOccurrence(seq['generated_text'], "yes"):
                i.recall = True
                singleEffect.recall = True
                i.effectRecallCount += 1
                break

k=0
for relations in causalrelations:
    if relations.recall == True:
        k += 1


print(f"Total recalled topics: {k}")
print(f"Total topics: {len(causalrelations)}")
with open("/bigwork/nhwpshaa/Fine-tuned AlpacaNative/Few Epochs/3 Epochs/AlpacaStatsPreCursorYesNo.csv", 'a', newline='') as writefile:
    fieldnames = ['cause', 'total effects', 'recalled effects']
    writer = csv.DictWriter(writefile, fieldnames=fieldnames)
    for printvalues in causalrelations:
        recallcount= 0
        print(f"Total no. of effects: {len(printvalues.effect)}")
        for oneEffect in printvalues.effect:
            if oneEffect.recall == True:
                recallcount += 1
        print(f"Total recalled: {recallcount}")
        writer.writerow({'cause': printvalues.cause, 'total effects': len(printvalues.effect), 'recalled effects': recallcount})
        for values in printvalues.effect:
            print(f"Cause: {printvalues.cause} Effect: {values.name} Recalled(Effect): {values.recall}")

In [None]:
instruction ="Answer the following question to the best of your knowledge"
k = 0
count = 0
for i in causalrelations:
    i.effectRecallCount = 0
    for singleEffect in i.effect:
        singleEffect.recall = False
        input_ctxt = "How is <effect> caused?"
        input_ctxt = input_ctxt.replace("<effect>", singleEffect.name)
        input_ctxt = input_ctxt.replace("<cause>", i.cause)
        #print(input_ctxt)
        sequences = pipeline(
            generate_prompt(instruction, input_ctxt),
            max_length=160,
            do_sample=True,
            top_k=4,
            num_return_sequences=1,
            eos_token_id=tokenizer.eos_token_id,
        )
        for seq in sequences:
            #print(f"Result: {seq['generated_text']}")
            if findOccurrence(seq['generated_text'], "yes"):
                i.recall = True
                singleEffect.recall = True
                i.effectRecallCount += 1
                break
                

k=0
for relations in causalrelations:
    if relations.recall == True:
        k += 1
                
print(f"Total recalled topics: {k}")
print(f"Total topics: {len(causalrelations)}")
with open("/bigwork/nhwpshaa/Fine-tuned AlpacaNative/Few Epochs/3 Epochs/AlpacaStatsPreCursorHow.csv", 'w', newline='') as writefile:
    fieldnames = ['cause', 'total effects', 'recalled effects']
    writer = csv.DictWriter(writefile, fieldnames=fieldnames)
    writer.writeheader()
    for printvalues in causalrelations:
        recallcount= 0
        print(f"Total no. of effects: {len(printvalues.effect)}")
        for oneEffect in printvalues.effect:
            if oneEffect.recall == True:
                recallcount += 1
        print(f"Total recalled: {recallcount}")
        writer.writerow({'cause': printvalues.cause, 'total effects': len(printvalues.effect), 'recalled effects': recallcount})
        for values in printvalues.effect:
            print(f"Cause: {printvalues.cause} Effect: {values.name} Recalled(Effect): {values.recall}")

In [None]:
import csv
from collections import defaultdict

class ThirdEffect:
    def __init__(self, name):
        self.name = name
        self.recall = False

class SecondEffect:
    def __init__(self, name):
        self.name = name
        self.recall = False
        self.third_effects = []

class Effect:
    def __init__(self, name):
        self.name = name
        self.recall = False

class CausalRelation:
    def __init__(self, cause):
        self.cause = cause
        self.effects = []
        self.second_effects = []
        self.effect_recall_count = 0
        self.third_recall_count = 0
        self.recall = False

causal_relations = defaultdict(CausalRelation)

with open('/bigwork/nhwpshaa/Initial Dataset/firstSecondThirdOrderSpace.csv', 'r') as file:
    csvfile = csv.DictReader(file)
    for row in csvfile:
        cause = row['cause']
        effect = Effect(row['effect'])
        second_effect = SecondEffect(row['2nd order effect'])
        third_effect = ThirdEffect(row['3rd order effect'])

        if cause not in causal_relations:
            causal_relations[cause] = CausalRelation(cause)

        causal_relation = causal_relations[cause]
        causal_relation.effects.append(effect)
        causal_relation.second_effects.append(second_effect)
        second_effect.third_effects.append(third_effect)
        causal_relation.recall = False

instruction = "Answer the following question to the best of your knowledge"

for cause, causal_relation in causal_relations.items():
    for second_effect in causal_relation.second_effects:
        for third_effect in second_effect.third_effects:
            input_ctxt = "Why does <cause> lead to <third-order effect>?"
            input_ctxt = input_ctxt.replace("<third-order effect>", third_effect.name)
            input_ctxt = input_ctxt.replace("<cause>", cause)

            sequences = pipeline(
                generate_prompt(instruction, input_ctxt),
                max_length=160,
                do_sample=True,
                top_k=50,
                num_return_sequences=4,
                eos_token_id=tokenizer.eos_token_id,
            )

            for seq in sequences:
                if findOccurrence(seq['generated_text'], third_effect.name):
                    causal_relation.recall = True
                    second_effect.recall = True
                    causal_relation.third_recall_count += 1
                    break

total_recalled = sum(1 for cr in causal_relations.values() if cr.recall)

print(f"Total recalled topics: {total_recalled}")
print(f"Total topics: {len(causal_relations)}")

with open("/bigwork/nhwpshaa/Knowledge Probing/AlpacaStatsThirdOrder.csv", 'w', newline='') as writefile:
    fieldnames = ['cause', 'total_effects', 'recalled_effects']
    writer = csv.DictWriter(writefile, fieldnames=fieldnames)
    writer.writeheader()
    for cause, causal_relation in causal_relations.items():
        total_effects = len(causal_relation.second_effects)
        recalled_effects = sum(1 for se in causal_relation.second_effects if se.recall)
        writer.writerow({'cause': cause, 'total_effects': total_effects, 'recalled_effects': recalled_effects})
        for se in causal_relation.second_effects:
            print(f"Cause: {cause} Effect: {se.name} Recalled(Effect): {se.recall}")


In [None]:
print("Done")