In [103]:
import pandas as pd

# Generation of Syllogism Task

## Evaluating Created Tasks

In [77]:
def get_code_form_text(text):

    code = None
    add_index = 0

    if "Conclusion:" in text:
        add_index = len("Conclusion: ")


    if "All" in text[:3+add_index]:
         code = "A"
    elif "No" in text[:2+add_index]:
        code = "E"
    elif "Some" in text[:6+add_index]:

        if "are not" in text:
            code = "O"
        else:
            code = "I"

    return code

In [79]:
import random

class Syllogism:
    '''
    contains the most basic structure in our inference task

    consists of:
        - premise1 (first premise)
        - premise2 (second premise)
        - conclusion

    can be generated in couple of ways:
        - by providing possible conclusions for a given set of two premises
        - (not supported) by providing one specific conclusion that fulfills given requirements

    premises and conclusion have certain forms:
        Code quantifier subject copula predicate type example
        A All S are P universal affirmatives All humans are mortal.
        E No S are P universal negatives No humans are perfect.
        I Some S are P particular affirmatives Some humans are healthy.
        O Some S are not P particular negatives Some humans are not clever

    '''

    def __init__(self, premise1, premise2, conclusion=None, code=None) -> None:
        '''
        Syllogism constructor, can be achieved by specifying conclusion form or it will be assigned randomly otherwise
        '''
        self.premise1 = premise1
        self.premise2 = premise2
        self.premises = [self.premise1, self.premise2]
        # self.conclusion = self.generate_conclusion()


        if conclusion != None:
            self.conclusion = conclusion
        else:

            if code != None:
                self.conclusion=self.generate_conlcusion_from_code(code=code)

            # choosing code "O" is discouraged by the authors of the paper, so we won't use it for now
            self.conclusion = self.generate_conlcusion_from_code(code=random.choice(["A", "E", "I"]))

        self.premises_and_conclusion = [*self.premises, self.conclusion]
        

    def __str__(self):
        premise1 = self.premise1
        premise2 = self.premise2
        conclusion = self.conclusion

        
        text = f"""
        {' '.join(word for pair in premise1 for word in pair)}
        {' '.join(word for pair in premise2 for word in pair)}
        ---
        {' '.join(word for pair in conclusion for word in pair)}
        """

        return text
    
    def __getitem__(self, item):
         return self.premises_and_conclusion[item]
    
    def str_with_perioids(self):
        premise1 = self.premise1
        premise2 = self.premise2
        conclusion = self.conclusion

        text = f"""{' '.join(word for pair in premise1 for word in pair)}. {' '.join(word for pair in premise2 for word in pair)}. Conclusion: {' '.join(word for pair in conclusion for word in pair)}"""

        return text

                            
    def generate_conlcusion_from_code(self, code, reverse_terms=False):
        '''
        Code    quantifier  subject copula  predicate   type                    example
        A       All         S       are     P           universal affirmatives  All humans are mortal.
        E       No          S       are     P           universal negatives     No humans are perfect.
        I       Some        S       are     P           particular affirmatives Some humans are healthy.
        O       Some        S       are not P           particular negatives    Some humans are not clever
        

        'O' code was omitted by the authors of "Language models show human-like content effects on reasoning tasks"
        to avoid substantial negation, which complicates behavior both for language models and humans.
        '''

        # terms should be at this position -> a second element in a tuple
        lower_term = self.premise1[0][1]
        higher_term = self.premise2[1][1]
        # if code changed to:
        #   lower_term = self.premise1[0][1:]
        #   higher_term = self.premise2[1][1:]
        # TypeError: sequence item 1: expected str instance, tuple found


        if code == "A":
            return [("All", lower_term), ("are", higher_term)]
        
        elif code == "E":
            return [("No", lower_term), ("are", higher_term)]
            
        elif code == "I":
            return [("Some", lower_term), ("are", higher_term)]

        elif code == "O": 
            return [("Some", lower_term), ("are not", higher_term)]









## Simple test to see if code is working correctly

In [119]:
premise1 = [("All", "Big Rhinos"), ("are", "mammals")]
premise2 = [("All", "mammals"), ("are", "animals")]
sylogizm1 = Syllogism(premise1, premise2)
codes = []
for sentence in sylogizm1.str_with_perioids().split(". ")[0:]:
    print(sentence)
    codes.append(get_code_form_text(sentence))
print(codes)


All Big Rhinos are mammals
All mammals are animals
Conclusion: Some Big Rhinos are animals
['A', 'A', 'I']


# Combinatorial creation of Syllogisms

In [114]:
generation_structure = {
    # the figure of this generation structure is: 
    # Predicate - Animals
    # Middle term - Animal Group
    # Subject - Species

    # So it generates Figure 1
    
    # ANIMALS

    "Animals": ["Mammals", "Reptiles", "Birds", "Fish", "Insects", "Herbivores", "Carnivores", "Omnivores", "Rodents", "Primates", "Worms"],
    "Mammals": ["Lions", "Elephants", "Whales", "Rhinos", "Humans", "Camels"], 
    "Reptiles": ["Snakes", "Turtles", "Crocodiles"],
    "Fish": ["Salmon", "Sharks", "Clownfish"],
    "Insects": ["Butterflies", "Ants", "Beetles"],
    "Primates": ["Monkeys", "Apes", "Lemurs", "Bonobos"],
    "Rodents": ["Mice", "Rats", "Squirrels"],
    "Carnivores": ["Tigers", "Wolves", "Bears"],
    "Omnivores": ["Humans", "Pigs", "Bears"],
    "Herbivores": ["Deer", "Cows", "Giraffes"],
    "Worms": ["Earthworms", "Leeches", "Planarians"],

    "Birds": ["Eagles", "Penguins", "Sparrows", "Vultures"]
}

## Syllogism Validator

In [115]:
def is_valid_syllogism(mood, figure):
    # Valid syllogisms based on mood and figure
    valid_syllogisms = {
        '1': ['AAA', 'EAE', 'AII', 'EIO'],
        '2': ['EAE', 'AEE', 'EIO', 'AOO'],
        '3': ['IAI', 'AII', 'OAO', 'EIO'],
        '4': ['AEE', 'IAI', 'EIO', 'AEO']
    }

    # Combine the mood into a string
    mood_str = ''.join(mood)

    # Check if the mood and figure combination is valid
    return mood_str in valid_syllogisms.get(str(figure), [])



## Creating a Dataset Matrix

In [116]:
dataset = pd.DataFrame({
    "Syllogism": [],
    "Content Theme": [],
    "Content Validity": [],
    "Mood": [],
    "Figure": [],
    "Syllogism Result": []
})

In [None]:
# the figure of this generation structure is: 
# Predicate - Animals
# Middle term - Animal Group
# Subject - Species

# So it generates Figure 1
# Figure 1: M - P, S - M



for animal_group in generation_structure["Animals"]:

    for quantifier in ["All", "No", "Some"]:
        # higher_premise = f"{quantifier} {animal_group} are Animals"
        higher_premise = (quantifier, animal_group), ("are", "Animals")
        for animal in generation_structure[animal_group]:
            for quantifier in ["All", "No", "Some"]:
                # lower_premise = f"{quantifier} {animal} are {animal_group}"
                lower_premise = (quantifier, animal), ("are", animal_group)

                for letter in ["A", "E", "I"]:
                    sylogizm = Syllogism(lower_premise, higher_premise, code=letter)
                    

                    codes = []
                    for sentence in sylogizm.str_with_perioids().split(". ")[0:]:
                        codes.append(get_code_form_text(sentence))


                    dataset = pd.concat([dataset, pd.DataFrame({
                        "Syllogism": [sylogizm.str_with_perioids()],
                        "Content Theme": ["Animals"],
                        # since it follows the 1 mood, it will be consistent with syllogism result for this figure
                        "Content Validity": [is_valid_syllogism(codes, 1)],
                        "Mood": [''.join(codes)],
                        "Figure": [1],
                        "Syllogism Result": [is_valid_syllogism(codes, 1)]
                    })], ignore_index=True)
                

dataset = dataset.sample(frac=1)
dataset.to_excel("Próba_Dla_Maszyny.xlsx")



# Nonsense Dataset

In [120]:
generation_structure = {
    # the figure of this generation structure is: 
    # Predicate - Animals
    # Middle term - Animal Group
    # Subject - Species

    # So it generates Figure 1
    
    # NONSENSE

    "Dododqghjy": ["LwpSUcgkmz", "IUpDvJSHiZ", "jIzWPDwUUk", "BBaAHDWjNs", "hnBobEVDfY", "XedLccewmp", "PFlRHAaJIU", "mRTPduzuwG", "FSETVOrEem", "GPTMCAbocG", "XmnCyroGRq"],
    "LwpSUcgkmz": ["DtQctcCOEa", "TWYvAmdvcW", "HpCbPWmdWP", "TQiYVQlHDD", "oNjiHnAfpq", "ssVZCGyCJw"], 
    "IUpDvJSHiZ": ["YMwQDFGgGC", "VWQLzhrsio", "mBGIBpatRm"],
    "jIzWPDwUUk": ["EDsDkpEdrH", "NFMUWQPBBh", "lScwArKxAT"],
    "BBaAHDWjNs": ["xwDELNqzrh", "CChikBrFuS", "rQVHGWIWIM"],
    "PFlRHAaJIU": ["xtnZQfbPdB", "LavAmHgbNA", "FVAtfjIGne", "HATdmaWUvU"],
    "FSETVOrEem": ["cejJumDUJe", "WbbqtjhsDv", "zFKxXzwRKa"],
    "GPTMCAbocG": ["QToEwwUOTf", "sxyhnRmWYn", "rdQjBnrbke"],
    "XmnCyroGRq": ["RVjlyaFzKp", "aTScRSXcJu", "BfPJOWfTTo"],
    "mRTPduzuwG": ["ssTqfTqcul", "iUyshCysyl", "pKHmJlKhpk"],
    "hnBobEVDfY": ["ZCbUhSEPRR", "LFCnDYsmfZJ", "FYpTJcNLRK"],

    "XedLccewmp": ["QwUnjcuWmF", "PNgfBpfQvd", "oiaiLXyEPj", "ltaYUnUpxA"]
}

In [None]:
# the figure of this generation structure is: 
# Predicate - Animals
# Middle term - Animal Group
# Subject - Species

# So it generates Figure 1
# Figure 1: M - P, S - M



for animal_group in generation_structure["Dododqghjy"]:

    for quantifier in ["All", "No", "Some"]:
        # higher_premise = f"{quantifier} {animal_group} are Animals"
        higher_premise = (quantifier, animal_group), ("are", "Dododqghjy")
        for animal in generation_structure[animal_group]:
            for quantifier in ["All", "No", "Some"]:
                # lower_premise = f"{quantifier} {animal} are {animal_group}"
                lower_premise = (quantifier, animal), ("are", animal_group)

                for letter in ["A", "E", "I"]:
                    sylogizm = Syllogism(lower_premise, higher_premise, code=letter)
                    

                    codes = []
                    for sentence in sylogizm.str_with_perioids().split(". ")[0:]:
                        codes.append(get_code_form_text(sentence))


                    dataset = pd.concat([dataset, pd.DataFrame({
                        "Syllogism": [sylogizm.str_with_perioids()],
                        "Content Theme": ["Nonsense"],
                        # since it follows the 1 mood, it will be consistent with syllogism result for this figure
                        "Content Validity": [is_valid_syllogism(codes, 1)],
                        "Mood": [''.join(codes)],
                        "Figure": [1],
                        "Syllogism Result": [is_valid_syllogism(codes, 1)]
                    })], ignore_index=True)
                

dataset = dataset.sample(frac=1)
dataset.to_excel("Próba_Dla_Maszyny_Abstrakcja.xlsx")
