In [1]:
import json
with open("sample_data.json","r") as inputfile:
    sample_data = json.load(inputfile)

In [2]:
for dataset in sample_data.keys():
    print(f"***{dataset}***")
    for k, v in sample_data[dataset].items():
        print(k,"||", v)

***sick***
A tan dog is playing in the water on the bank of a pond || ∀x∀y∀z (TanDog(x) ∧ Water(y) ∧ Pond(z) → PlaysIn(x, y, z))
Some water is being drunk by a cat || ∃x∃y (Water(x) ∧ Cat(y) ∧ Drinking(y, x))
A person is playing a keyboard || ∀x (Person(x) ∧ PlayingKeyboard(x))
The man is sitting outdoors under a purple umbrella || ∃x (Man(x) ∧ Sitting(x) ∧ Outdoors(x) ∧ Under(x, purple_umbrella))
A man with no shirt is holding a football || ∀x (Man(x) ∧ ¬HasShirt(x) → HoldingFootball(x))
There is no person in bike gear standing steadily in front of the mountains || ¬∃x (Person(x) ∧ InBikeGear(x) ∧ StandingSteadilyInFrontOf(x, mountains))
A woman is slicing a tomato || ∃x (Woman(x) ∧ Slicing(x, tomato))
A few people are singing || ∃x∃y∃z (Person(x) ∧ Person(y) ∧ Person(z) ∧ Singing(x) ∧ Singing(y) ∧ Singing(z))
The surfer is riding a big wave || ∃x ∃y (Surfer(x) ∧ Wave(y) ∧ Riding(x, y))
Two dogs and two people are walking in a wood || ∃x ∃y ∃z ∃w (Dog(x) ∧ Dog(y) ∧ Person(z) ∧ Person(

In [3]:
# Imports 
import re
from nltk.sem.logic import Expression, Variable 

In [4]:
# Convert FOL with classic symbols to NLTK readable FOL 
def fol2nltk(s:str) -> str:
        s = re.sub('∧', '&', s)
        s = re.sub('∃(\S\d*)', 'exists \\1.', s) #counts all chars after quantifier until there is white space as the var  
        s = re.sub('∀(\S\d*)', 'all \\1.', s)
        s = re.sub('→', '->', s)
        s = re.sub('¬', 'not ', s)
        return s 

In [5]:
# Get a test sentence from the sample data set 
test_sent = sample_data['sick']['A person is playing a keyboard']
test_sent = fol2nltk(test_sent)
print(test_sent)

all x. (Person(x) & PlayingKeyboard(x))


In [6]:
# read string of expression into NLTK 
#! Expression.fromstring errors when:
    # unmatched brackets, 
    # var is not of form "letter(+any number)"
 
# test_expr = Expression.fromstring(test_sent) #option with expression from sample data set 
test_expr = Expression.fromstring('exists x. all y. (Red(c) & (Blue(x)) | House(y))') #option with custom expression 


In [7]:
test_expr

<ExistsExpression exists x.all y.((Red(c) & Blue(x)) | House(y))>

In [8]:
# info about expression object 
print(type(test_expr))
print(dir(test_expr))

<class 'nltk.sem.logic.ExistsExpression'>
['__and__', '__call__', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__neg__', '__new__', '__or__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_logic_parser', '_set_type', '_type_checking_logic_parser', 'alpha_convert', 'applyto', 'constants', 'equiv', 'findtype', 'free', 'fromstring', 'getQuantifier', 'make_VariableExpression', 'negate', 'normalize', 'predicates', 'replace', 'simplify', 'substitute_bindings', 'term', 'type', 'typecheck', 'variable', 'variables', 'visit', 'visit_structured']


In [9]:
#Get some documentation of available functions of an expressions object 
for func in dir(Expression):
    if not func.startswith('_'):
        print(f"************ {func} ************")
        help('nltk.sem.logic.Expression.' + func)
        

************ applyto ************
Help on function applyto in nltk.sem.logic.Expression:

nltk.sem.logic.Expression.applyto = applyto(self, other)

************ constants ************
Help on function constants in nltk.sem.logic.Expression:

nltk.sem.logic.Expression.constants = constants(self)
    Return a set of individual constants (non-predicates).
    :return: set of ``Variable`` objects

************ equiv ************
Help on function equiv in nltk.sem.logic.Expression:

nltk.sem.logic.Expression.equiv = equiv(self, other, prover=None)
    Check for logical equivalence.
    Pass the expression (self <-> other) to the theorem prover.
    If the prover says it is valid, then the self and other are equal.
    
    :param other: an ``Expression`` to check equality against
    :param prover: a ``nltk.inference.api.Prover``

************ findtype ************
Help on function findtype in nltk.sem.logic.Expression:

nltk.sem.logic.Expression.findtype = findtype(self, variable)
    Find

In [10]:
# Get info on an expression 

#get all free variables as set of nltk.sem.logic.Variable objects 
free_vars = test_expr.free()
print(f'free variables in "{test_expr}": {free_vars}')
print()

# Get main(?) quantifier
quantifiers = test_expr.getQuantifier()
print(f'quantifiers in "{test_expr}": {quantifiers}')
print()

# Get all constants as set of nltk.sem.logic.Variable objects 
constants = test_expr.constants()
print(f'Constants in "{test_expr}": {constants}')
print()

# Get all predicates as set of nltk.sem.logic.Variable objects 
predicates = test_expr.predicates()
print(f'Predicates in "{test_expr}": {predicates}')

free variables in "exists x.all y.((Red(c) & Blue(x)) | House(y))": {Variable('c')}

quantifiers in "exists x.all y.((Red(c) & Blue(x)) | House(y))": exists

Constants in "exists x.all y.((Red(c) & Blue(x)) | House(y))": set()

Predicates in "exists x.all y.((Red(c) & Blue(x)) | House(y))": {Variable('Red'), Variable('House'), Variable('Blue')}


In [11]:
if free_vars:
    print(4)

4


In [12]:
# Replace predicates (and more?) in an expression 
old_pred = Variable('Red')
new_pred = Expression.fromstring('Blue')
new_test_expr = test_expr.replace(old_pred, new_pred)
print(f'Changed "{test_expr}" to "{new_test_expr}"')

Changed "exists x.all y.((Red(c) & Blue(x)) | House(y))" to "exists x.all y.((Blue(c) & Blue(x)) | House(y))"


In [13]:
#Replace names of FREE variables 
bindings = {
    Variable('x'): Expression.fromstring('a'),
    Variable('y'): Expression.fromstring('b'),
    Variable('z'): Expression.fromstring('c')
}
print(test_expr.variables())
new_test_expr2 = test_expr.substitute_bindings(bindings)
print(f'Changed "{test_expr}" to "{new_test_expr2}"')

{Variable('c')}
Changed "exists x.all y.((Red(c) & Blue(x)) | House(y))" to "exists x.all y.((Red(c) & Blue(x)) | House(y))"


In [14]:
# Get et types of the sentence 

#all et types returnd as: dict{'word': et-type}
et_types = test_expr.typecheck()
print(f"The et types of the expression {test_expr} are: {et_types}")

# get et-type of specific variable 
var = Variable('Red')
et_type_var = test_expr.findtype(var)
print(f"The et type of {var} in the expression {test_expr} is: {et_type_var}")

The et types of the expression exists x.all y.((Red(c) & Blue(x)) | House(y)) are: {'c': e, 'Red': <e,t>, 'x': e, 'Blue': <e,t>, 'y': e, 'House': <e,t>}
The et type of Red in the expression exists x.all y.((Red(c) & Blue(x)) | House(y)) is: <e,t>


In [15]:
#Get all predicates in a specific data set

all_predicates = []
for item in sample_data['sick'].values():
    expression = Expression.fromstring(fol2nltk(item))
    predicates = [str(p) for p in list(expression.predicates())]
    # print(predicates)
    all_predicates = all_predicates + predicates
    # print(all_predicates)
all_predicates = set(all_predicates) 
print(all_predicates)


{'TanDog', 'Man', 'Kickboxing', 'HoldingFootball', 'Play', 'Under', 'SingingInto', 'Singing', 'InBikeGear', 'Cat', 'Sitting', 'PlaysPiano', 'Surfer', 'StandingSteadilyInFrontOf', 'HasShirt', 'WalkingInWood', 'PlaysIn', 'Outdoors', 'HappyBoy', 'CuttingOnion', 'Dog', 'Pond', 'PlayingKeyboard', 'Water', 'Slicing', 'Microphone', 'Person', 'Riding', 'Wave', 'Drinking', 'Woman', 'Spectator'}


In [16]:
#split predicates into first word + the rest 
new_predicates = []
for p in all_predicates:
    parts = re.findall('[A-Z][^A-Z]*', p)
    split_p = [parts[0]]
    if len(parts) > 1:
        second_pred =""
        for part in parts[1:]:
            second_pred = second_pred + part 
        split_p.append(second_pred)
    new_predicates += split_p
print(new_predicates)

# test_expr = Expression.fromstring('exist x. HoldingFootball(x)')
# old_pred = Variable('HoldingFootball')
# new_pred = Expression.fromstring('Holding(x, y) & Football(y)')
# new_test_expr = test_expr.replace(old_pred, new_pred)
# print(f'Changed "{test_expr}" to "{new_test_expr}"')

['Tan', 'Dog', 'Man', 'Kickboxing', 'Holding', 'Football', 'Play', 'Under', 'Singing', 'Into', 'Singing', 'In', 'BikeGear', 'Cat', 'Sitting', 'Plays', 'Piano', 'Surfer', 'Standing', 'SteadilyInFrontOf', 'Has', 'Shirt', 'Walking', 'InWood', 'Plays', 'In', 'Outdoors', 'Happy', 'Boy', 'Cutting', 'Onion', 'Dog', 'Pond', 'Playing', 'Keyboard', 'Water', 'Slicing', 'Microphone', 'Person', 'Riding', 'Wave', 'Drinking', 'Woman', 'Spectator']


In [17]:
# split predicates 
result = re.findall('[A-Z][^A-Z]*', 'HoldingFootballBlue')
result = [result[0], result[1:]]
print(result)

['Holding', ['Football', 'Blue']]


**Evaluate wrongly classified**

In [18]:
import pandas as pd
from evaluation import get_wrong_items, get_fol_expressions, get_free_vars

In [19]:
#Load csv with results 
res_SICK_trial = pd.read_csv ('evaluations/sick_trial_evaluation.csv')
res_SICK_trial.head()

Unnamed: 0.1,Unnamed: 0,p_nl,h_nl,p_fol,h_fol,label,e_pred,c_pred
0,0,The young boys are playing outdoors and the ma...,There is no boy playing outdoors and there is ...,all x.all y. (YoungBoy(x) & Outdoors(y) -> Pla...,not exists x. (Boy(x) & PlayingOutdoors(x)) & ...,c,False,False
1,1,A person in a black jacket is doing tricks on ...,A skilled person is riding a bicycle on one wheel,all x. (Person(x) & WearsBlackJacket(x) -> Tri...,all x. all y. (SkilledPerson(x) & Bicycle(y) &...,n,False,False
2,2,A player is throwing the ball,Two teams are competing in a football match,exists x. (Player(x) & ThrowingBall(x)),exists x. exists y. (Team(x) & Team(y) & Footb...,n,False,False
3,3,Five children are standing in front of a woode...,Five children are standing in a wooden hut,exists x. exists y. (Child(x) & Child(y) & Woo...,exists x. exists y. exists z. exists w. exists...,n,False,False
4,4,Few people are eating at red tables in a resta...,A large group of Asian people is eating at a r...,exists x. (Person(x) & EatingAtRedTables(x) & ...,exists x. exists y. (AsianPerson(x) & Group(x)...,n,False,False


In [20]:
st_wrong_e, st_wrong_c, st_wrong_n = get_wrong_items(res_SICK_trial)    #st = sick trial
st_wrong = pd.concat([st_wrong_e, st_wrong_c, st_wrong_n])

In [21]:
st_wrong

Unnamed: 0.1,Unnamed: 0,p_nl,h_nl,p_fol,h_fol,label,e_pred,c_pred
6,6,Two dogs are playing by a tree,Two dogs are playing by a plant,exists x.exists y.exists z. (Dog(x) & Dog(y) &...,exists x. exists y. exists z. (Dog(x) & Dog(y)...,e,False,False
7,7,A girl in white is dancing,A girl is wearing white clothes and is dancing,all x. (Girl(x) & WearingWhite(x) -> Dancing(x)),exists x. (Girl(x) & WearingWhiteClothes(x) & ...,e,False,False
12,12,A hiker is on top of the mountain and is doing...,A hiker is on top of the mountain and is dancing,all x. (Hiker(x) -> (OnTopOfMountain(x) & Joyf...,exists x. (Hiker(x) & OnTopOfMountain(x) & Dan...,e,False,False
18,18,A white and tan dog is running through the tal...,A white and tan dog is running through a field,all x. (Dog(x) & White(x) & Tan(x) -> exists y...,all x. (Dog(x) & White(x) & Tan(x) -> exists y...,e,False,False
37,37,A group of people is equipped with protective ...,A group of people is equipped with gear used f...,all x. (GroupOfPeople(x) -> EquippedWithProtec...,all x.all y. (Group(x) & Person(y) & UsesGear(...,e,False,False
...,...,...,...,...,...,...,...,...
424,424,A blond child is going down a slide and throwi...,A child with dark hair is going down a slide a...,all x. (BlondChild(x) -> (GoingDownSlide(x) & ...,all x. (Child(x) & DarkHair(x) -> (GoingDownSl...,c,False,False
450,450,No dog is in the sand that is blowing in the wind,A dog is in a sandy area with the sand that is...,all x. (Dog(x) & InSand(x) -> not BlowingInWin...,all x. all y. all z. (Dog(x) & SandyArea(y) & ...,c,False,False
454,454,A cluster of four brown dogs are playing in a ...,Four dogs are not playing in a grassy area,exists x. exists y. exists z. (Cluster(x) & Do...,exists x. (Dog(x) & not PlayingInGrassyArea(x)),c,False,False
462,462,A black and white dog with a large branch is s...,A black and white dog with a large branch is r...,all x. (Dog(x) & Black(x) & White(x) & LargeBr...,all x.all y.all z. (Dog(x) & BlackAndWhite(x) ...,c,False,False


In [22]:
st_wrong_fol_expr = get_fol_expressions(st_wrong)

In [23]:
len(st_wrong_fol_expr)

358

In [24]:
st_wrong_free_vars = get_free_vars(st_wrong)

365
369
460
460
0
28
41
68
75
153


In [25]:
st_wrong_free_vars

{365: {Variable('f')},
 369: {Variable('z')},
 460: {Variable('f')},
 0: {Variable('z')},
 28: {Variable('y')},
 41: {Variable('y')},
 68: {Variable('z')},
 75: {Variable('y'), Variable('z')},
 153: {Variable('z')}}

In [26]:
all_predicates = []
for item in sample_data['sick'].values():
    expression = Expression.fromstring(fol2nltk(item))
    predicates = [str(p) for p in list(expression.predicates())]
    # print(predicates)
    all_predicates = all_predicates + predicates
    # print(all_predicates)
all_predicates = set(all_predicates) 
print(all_predicates)

{'TanDog', 'Man', 'Kickboxing', 'HoldingFootball', 'Play', 'Under', 'SingingInto', 'Singing', 'InBikeGear', 'Cat', 'Sitting', 'PlaysPiano', 'Surfer', 'StandingSteadilyInFrontOf', 'HasShirt', 'WalkingInWood', 'PlaysIn', 'Outdoors', 'HappyBoy', 'CuttingOnion', 'Dog', 'Pond', 'PlayingKeyboard', 'Water', 'Slicing', 'Microphone', 'Person', 'Riding', 'Wave', 'Drinking', 'Woman', 'Spectator'}
