In [60]:
import re
import nltk
from nltk.sem import logic
from nltk.sem import Expression

In [61]:
#load in folio data
import pandas as pd

# Login using e.g. `huggingface-cli login` to access this dataset
splits = {'train': 'folio_v2_train.jsonl', 'validation': 'folio_v2_validation.jsonl'}

In [62]:

train = pd.read_json("hf://datasets/yale-nlp/FOLIO/" + splits["train"], lines=True)

In [63]:
print(train['premises'])

0       All people who regularly drink coffee are depe...
1       All people who regularly drink coffee are depe...
2       All people who regularly drink coffee are depe...
3       All people who regularly drink coffee are depe...
4       Miroslav Venhoda was a Czech choral conductor ...
                              ...                        
996     Any convicted criminal that is innocent is not...
997     Any convicted criminal that is innocent is not...
998     Any convicted criminal that is innocent is not...
999     Phoneix's music is classified under the indie ...
1000    Phoneix's music is classified under the indie ...
Name: premises, Length: 1001, dtype: object


In [66]:
# convert premise FOL to nltk representation so that we can convert to expression to ultimately pass to prover

######
# function citation: 
# from https://github.com/benlipkin/linc/blob/main/eval/tasks/utils.py
######
def convert_to_nltk_rep(logic_formula):
    translation_map = {
        "∀": "all ",
        "∃": "exists ",
        "→": "->",
        "¬": "-",
        "∧": "&",
        "∨": "|",
        "⟷": "<->",
        "↔": "<->",
        "0": "Zero",
        "1": "One",
        "2": "Two",
        "3": "Three",
        "4": "Four",
        "5": "Five",
        "6": "Six",
        "7": "Seven",
        "8": "Eight",
        "9": "Nine",
        ".": "Dot",
        "Ś": "S",
        "ą": "a",
        "’": "",
    }

    constant_pattern = r'\b([a-z]{2,})(?!\()'
    logic_formula = re.sub(constant_pattern, lambda match: match.group(1).capitalize(), logic_formula)

    for key, value in translation_map.items():
        logic_formula = logic_formula.replace(key, value)

    quant_pattern = r"(all\s|exists\s)([a-z])"
    def replace_quant(match):
        return match.group(1) + match.group(2) + "."
    logic_formula = re.sub(quant_pattern, replace_quant, logic_formula)

    dotted_param_pattern = r"([a-z])\.(?=[a-z])"
    def replace_dotted_param(match):
        return match.group(1)
    logic_formula = re.sub(dotted_param_pattern, replace_dotted_param, logic_formula)

    simple_xor_pattern = r"(\w+\([^()]*\)) ⊕ (\w+\([^()]*\))"
    def replace_simple_xor(match):
        return ("((" + match.group(1) + " & -" + match.group(2) + ") | (-" + match.group(1) + " & " + match.group(2) + "))")
    logic_formula = re.sub(simple_xor_pattern, replace_simple_xor, logic_formula)

    complex_xor_pattern = r"\((.*?)\)\) ⊕ \((.*?)\)\)"
    def replace_complex_xor(match):
        return ("(((" + match.group(1) + ")) & -(" + match.group(2) + "))) | (-(" + match.group(1) + ")) & (" + match.group(2) + "))))")
    logic_formula = re.sub(complex_xor_pattern, replace_complex_xor, logic_formula)

    special_xor_pattern = r"\(\(\((.*?)\)\)\) ⊕ (\w+\([^()]*\))"
    def replace_special_xor(match):
        return ("(((" + match.group(1) + ")) & -" + match.group(2) + ") | (-(" + match.group(1) + ")) & " + match.group(2) + ")")
    logic_formula = re.sub(special_xor_pattern, replace_special_xor, logic_formula)
    
    return logic_formula



In [67]:
train['premises-FOL'] = [ convert_to_nltk_rep(p) for p in train['premises-FOL']]
train['conclusion-FOL'] = train['conclusion-FOL'].apply(convert_to_nltk_rep)


In [92]:
n = 23
n = 60
n = 148
n= 261

# test
n = 850

print(train['label'][n])

Uncertain


In [93]:
print(train['premises'][n])

Surprises are either fun or dreadful.
All scares are surprises.


In [97]:
print(train['conclusion'][n])

All scares are fun.


In [94]:
print(train['premises-FOL'][n])

All xDot (Surprise(x) -> (((Fun(x) & -Dreadful(x)) | (-Fun(x) & Dreadful(x)))))
All xDot (Scare(x) -> Surprise(x))


In [95]:
print(train['conclusion-FOL'][n])

all x. (Scare(x) -> Fun(x))


In [96]:
for p, f in zip(train['premises'][n].split('\n'), train['premises-FOL'][n].split('\n')):
    print(f"TEXT: {p.strip()}\nFOL: {f.strip()}\n")
print(f"TEXT: {train['conclusion'][n].strip()}\nFOL: {train['conclusion-FOL'][n].strip()}\n")

TEXT: Surprises are either fun or dreadful.
FOL: All xDot (Surprise(x) -> (((Fun(x) & -Dreadful(x)) | (-Fun(x) & Dreadful(x)))))

TEXT: All scares are surprises.
FOL: All xDot (Scare(x) -> Surprise(x))

TEXT: All scares are fun.
FOL: all x. (Scare(x) -> Fun(x))



In [None]:
# build prompt

normal = """The following is a first-order logic (FOL) problem.
The problem is to determine whether the conclusion follows from the premises.
The premises are given in the form of a set of first-order logic sentences.
The conclusion is given in the form of a single first-order logic sentence.
The task is to translate each of the premises and conclusions into FOL expressions, so that the expressions can be evaluated by a theorem solver to determine whether the conclusion follows from the premises.
Expressions should adhere to the format of the Python NLTK package logic module.

Here are four examples of premises and corresponding FOL, preceded by whether the FOL evaluates to true or false.

TEXT: A La Liga soccer team ranks higher than another La Liga soccer team if it receives more points.
FOL: All xDot All yDot (LaLigaSoccerTeam(x) & LaLigaSoccerTeam(y) & MorePoints(x, y) -> RankHigherThan(x, y))

TEXT: If there are two La Liga soccer teams and neither has more points than the other, then the team which receives more points from the games between the two teams ranks higher.
FOL: All xDot All yDot (LaLigaSoccerTeam(x) & LaLigaSoccerTeam(y) & -MorePoints(x, y) & -MorePoints(y, x) & MorePointsInGameBetween(x, y) ->  RankHigherThan(x, y))

TEXT: Real Madrid and Barcelona are both La Liga soccer teams.
FOL: LaLigaSoccerTeam(RealMadrid) & LaLigaSoccerTeam(Barcelona)

TEXT: Real Madrid received more points than Barcelona.
FOL: MorePoints(RealMadrid, Barcelona)

TEXT: Neither Real Madrid nor Barcelona received more points from the games between them.
FOL: -MorePointsInGameBetween(RealMadrid, Barcelona) & -MorePointsInGameBetween(Barcelona, RealMadrid)

TEXT: Real Madrid ranks higher than Barcelona.
FOL: RankHigherThan(RealMadrid, Barcelona)
<EVALUATE>

TEXT: All professional athletes spend most of their time on sports.
FOL: All xDot (ProfessionalAthlete(x) -> SpendOn(x, MostOfTheirTime, Sports))

TEXT: All Olympic gold medal winners are professional athletes.
FOL: All xDot (OlympicGoldMedalWinner(x) -> ProfessionalAthlete(x))

TEXT: No full-time scientists spend the majority of their time on sports.
FOL: All xDot (FullTimeScientist(x) -> -SpendOn(x, MostOfTheirTime, Sports))

TEXT: All Nobel physics laureates are full-time scientists.
FOL: All xDot (NobelPhysicsLaureate(x) -> FullTimeScientist(x))

TEXT: Amy spends the most time on sports, or Amy is an Olympic gold medal winner.
FOL: SpendOn(Amy, MostOfTheirTime, Sports) | OlympicGoldMedalWinner(Amy)

TEXT: If Amy is not a Nobel physics laureate, then Amy is not an Olympic gold medal winner.
FOL: -NobelPhysicsLaureate(Amy) -> -OlympicGoldMedalWinner(Amy)

TEXT: If Amy is not an Olympic gold medal winner, then Amy is a Nobel physics laureate.
FOL: -OlympicGoldMedalWinner(Amy) -> NobelPhysicsLaureate(Amy)
<EVALUATE>

TEXT: No songs are visuals.
FOL: All xDot (Song(x) -> -Visual(x))

TEXT: All folk songs are songs.
FOL: All xDot (FolkSong(x) -> Song(x))

TEXT: All videos are visuals.
FOL: All xDot (Video(x) -> Visual(x))

TEXT: All movies are videos.
FOL: All xDot (Movie(x) -> Video(x))

TEXT: All sci-fi movies are movies.
FOL: All xDot (ScifiMovie(x) -> Movie(x))

TEXT: Inception is a sci-fi movie.
FOL: ScifiMovie(Inception)

TEXT: Mac is neither a folk song nor a sci-fi movie.
FOL: -FolkSong(Mac) & -ScifiMovie(Mac)

TEXT: Inception is a folk song.
FOL: FolkSong(Inception)
<EVALUATE>

TEXT: Every chef can cook.
FOL: All xDot (Chef(x) -> Can(x, Cook))

TEXT: Some people who aren’t chefs can cook.
FOL: Exists xDot (-Chef(x) & Can(x, Cook))

TEXT: People who cook can make scrambled eggs and pasta.
FOL: All xDot (Can(x, Cook) -> (CanMake(x, ScrambledEggs) & CanMake(x, Pasta)))

TEXT: If someone can make cookies and muffins, they are a baker.
FOL: All xDot (CanMake(x, Cookies) & CanMake(x, Muffins) -> Baker(x))

TEXT: Bakers who can also make scrambled eggs can make a good breakfast.
FOL: All xDot ((Baker(x) & CanMake(x, ScrambledEggs)) -> CanMake(x, GoodBreakfast))

TEXT: Luke can make cookies, scrambled eggs, and muffins, but not pasta.
FOL: CanMake(Luke, Cookies) & (CanMake(Luke, ScrambledEggs) & CanMake(Luke, Muffins) & -CanMake(Luke, Pasta)

TEXT: Luke is a chef.
FOL: Chef(Luke)
<EVALUATE>

<PREMISES>
Surprises are either fun or dreadful.
All scares are surprises.
</PREMISES>
<CONCLUSION>
All scares are fun.
</CONCLUSION>
<EVALUATE>
"""

bnf = """The following is a first-order logic (FOL) problem.
The problem is to determine whether the conclusion follows from the premises.
The premises are given in the form of a set of first-order logic sentences.
The conclusion is given in the form of a single first-order logic sentence.
The task is to translate each of the premises and conclusions into FOL expressions, so that the expressions can be evaluated by a theorem solver to determine whether the conclusion follows from the premises.
Expressions should adhere to the format of the Python NLTK package logic module.

Follow the following Backus-Naur Form grammar to construct your expression.
```
<term> ::= <constant> | <var> | <func>(<term>)
<atomic_formula> ::= <predicate>(<term>, <term>, ..., <term>)
<formula> ::= <atomic_formula> | -<formula> | (<formula> & <formula>) | (<formula> | <formula>) | (<formula> -> <formula>) | (<formula> <-> <formula>) | all <var>. <formula> | exists <var>. <formula> 
```

Here are four examples of premises and corresponding FOL, preceded by whether the FOL evaluates to true or false.

TEXT: A La Liga soccer team ranks higher than another La Liga soccer team if it receives more points.
FOL: All xDot All yDot (LaLigaSoccerTeam(x) & LaLigaSoccerTeam(y) & MorePoints(x, y) -> RankHigherThan(x, y))

TEXT: If there are two La Liga soccer teams and neither has more points than the other, then the team which receives more points from the games between the two teams ranks higher.
FOL: All xDot All yDot (LaLigaSoccerTeam(x) & LaLigaSoccerTeam(y) & -MorePoints(x, y) & -MorePoints(y, x) & MorePointsInGameBetween(x, y) ->  RankHigherThan(x, y))

TEXT: Real Madrid and Barcelona are both La Liga soccer teams.
FOL: LaLigaSoccerTeam(RealMadrid) & LaLigaSoccerTeam(Barcelona)

TEXT: Real Madrid received more points than Barcelona.
FOL: MorePoints(RealMadrid, Barcelona)

TEXT: Neither Real Madrid nor Barcelona received more points from the games between them.
FOL: -MorePointsInGameBetween(RealMadrid, Barcelona) & -MorePointsInGameBetween(Barcelona, RealMadrid)

TEXT: Real Madrid ranks higher than Barcelona.
FOL: RankHigherThan(RealMadrid, Barcelona)
<EVALUATE>

TEXT: All professional athletes spend most of their time on sports.
FOL: All xDot (ProfessionalAthlete(x) -> SpendOn(x, MostOfTheirTime, Sports))

TEXT: All Olympic gold medal winners are professional athletes.
FOL: All xDot (OlympicGoldMedalWinner(x) -> ProfessionalAthlete(x))

TEXT: No full-time scientists spend the majority of their time on sports.
FOL: All xDot (FullTimeScientist(x) -> -SpendOn(x, MostOfTheirTime, Sports))

TEXT: All Nobel physics laureates are full-time scientists.
FOL: All xDot (NobelPhysicsLaureate(x) -> FullTimeScientist(x))

TEXT: Amy spends the most time on sports, or Amy is an Olympic gold medal winner.
FOL: SpendOn(Amy, MostOfTheirTime, Sports) | OlympicGoldMedalWinner(Amy)

TEXT: If Amy is not a Nobel physics laureate, then Amy is not an Olympic gold medal winner.
FOL: -NobelPhysicsLaureate(Amy) -> -OlympicGoldMedalWinner(Amy)

TEXT: If Amy is not an Olympic gold medal winner, then Amy is a Nobel physics laureate.
FOL: -OlympicGoldMedalWinner(Amy) -> NobelPhysicsLaureate(Amy)
<EVALUATE>

TEXT: No songs are visuals.
FOL: All xDot (Song(x) -> -Visual(x))

TEXT: All folk songs are songs.
FOL: All xDot (FolkSong(x) -> Song(x))

TEXT: All videos are visuals.
FOL: All xDot (Video(x) -> Visual(x))

TEXT: All movies are videos.
FOL: All xDot (Movie(x) -> Video(x))

TEXT: All sci-fi movies are movies.
FOL: All xDot (ScifiMovie(x) -> Movie(x))

TEXT: Inception is a sci-fi movie.
FOL: ScifiMovie(Inception)

TEXT: Mac is neither a folk song nor a sci-fi movie.
FOL: -FolkSong(Mac) & -ScifiMovie(Mac)

TEXT: Inception is a folk song.
FOL: FolkSong(Inception)
<EVALUATE>

TEXT: Every chef can cook.
FOL: All xDot (Chef(x) -> Can(x, Cook))

TEXT: Some people who aren’t chefs can cook.
FOL: Exists xDot (-Chef(x) & Can(x, Cook))

TEXT: People who cook can make scrambled eggs and pasta.
FOL: All xDot (Can(x, Cook) -> (CanMake(x, ScrambledEggs) & CanMake(x, Pasta)))

TEXT: If someone can make cookies and muffins, they are a baker.
FOL: All xDot (CanMake(x, Cookies) & CanMake(x, Muffins) -> Baker(x))

TEXT: Bakers who can also make scrambled eggs can make a good breakfast.
FOL: All xDot ((Baker(x) & CanMake(x, ScrambledEggs)) -> CanMake(x, GoodBreakfast))

TEXT: Luke can make cookies, scrambled eggs, and muffins, but not pasta.
FOL: CanMake(Luke, Cookies) & (CanMake(Luke, ScrambledEggs) & CanMake(Luke, Muffins) & -CanMake(Luke, Pasta)

TEXT: Luke is a chef.
FOL: Chef(Luke)
<EVALUATE>

<PREMISES>
Surprises are either fun or dreadful.
All scares are surprises.
</PREMISES>
<CONCLUSION>
All scares are fun.
</CONCLUSION>
<EVALUATE>
"""

In [None]:
# prompt the model



In [None]:
# for prelim results 1: convert to  nltk expression + apply grammar (check for correctness of expression l8r)

