In [3]:
import json
with open("sample_data.json","r") as inputfile:
    sample_data = json.load(inputfile)

In [4]:
for dataset in sample_data.keys():
    print(f"***{dataset}***")
    for k, v in sample_data[dataset].items():
        print(k,"||", v)

***sick***
A tan dog is playing in the water on the bank of a pond || ∀x∀y∀z (TanDog(x) ∧ Water(y) ∧ Pond(z) → PlaysIn(x, y, z))
Some water is being drunk by a cat || ∃x∃y (Water(x) ∧ Cat(y) ∧ Drinking(y, x))
A person is playing a keyboard || ∀x (Person(x) ∧ PlayingKeyboard(x))
The man is sitting outdoors under a purple umbrella || ∃x (Man(x) ∧ Sitting(x) ∧ Outdoors(x) ∧ Under(x, purple_umbrella))
A man with no shirt is holding a football || ∀x (Man(x) ∧ ¬HasShirt(x) → HoldingFootball(x))
There is no person in bike gear standing steadily in front of the mountains || ¬∃x (Person(x) ∧ InBikeGear(x) ∧ StandingSteadilyInFrontOf(x, mountains))
A woman is slicing a tomato || ∃x (Woman(x) ∧ Slicing(x, tomato))
A few people are singing || ∃x∃y∃z (Person(x) ∧ Person(y) ∧ Person(z) ∧ Singing(x) ∧ Singing(y) ∧ Singing(z))
The surfer is riding a big wave || ∃x ∃y (Surfer(x) ∧ Wave(y) ∧ Riding(x, y))
Two dogs and two people are walking in a wood || ∃x ∃y ∃z ∃w (Dog(x) ∧ Dog(y) ∧ Person(z) ∧ Person(

In [9]:
# Imports 
import re
from nltk.sem.logic import Expression, Variable
from prepocessing import preprocessing

In [10]:
# Get a test sentence from the sample data set 
test_sent = sample_data['sick']['A person is playing a keyboard']
test_sent = preprocessing.fol2nltk(test_sent)
print(test_sent)

all x. (Person(x) & PlayingKeyboard(x))


In [11]:
# read string of expression into NLTK 
#! Expression.fromstring errors when:
    # unmatched brackets, 
    # var is not of form "letter(+any number)"
 
# test_expr = Expression.fromstring(test_sent) #option with expression from sample data set 
test_expr = Expression.fromstring('exists x. all y. (Red(c) & (Blue(x)) | House(y))') #option with custom expression 


In [12]:
test_expr

<ExistsExpression exists x.all y.((Red(c) & Blue(x)) | House(y))>

In [13]:
# info about expression object 
print(type(test_expr))
print(dir(test_expr))

<class 'nltk.sem.logic.ExistsExpression'>
['__and__', '__call__', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__neg__', '__new__', '__or__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_logic_parser', '_set_type', '_type_checking_logic_parser', 'alpha_convert', 'applyto', 'constants', 'equiv', 'findtype', 'free', 'fromstring', 'getQuantifier', 'make_VariableExpression', 'negate', 'normalize', 'predicates', 'replace', 'simplify', 'substitute_bindings', 'term', 'type', 'typecheck', 'variable', 'variables', 'visit', 'visit_structured']


In [14]:
#Get some documentation of available functions of an expressions object 
for func in dir(Expression):
    if not func.startswith('_'):
        print(f"************ {func} ************")
        help('nltk.sem.logic.Expression.' + func)
        

************ applyto ************
Help on function applyto in nltk.sem.logic.Expression:

nltk.sem.logic.Expression.applyto = applyto(self, other)

************ constants ************
Help on function constants in nltk.sem.logic.Expression:

nltk.sem.logic.Expression.constants = constants(self)
    Return a set of individual constants (non-predicates).
    :return: set of ``Variable`` objects

************ equiv ************
Help on function equiv in nltk.sem.logic.Expression:

nltk.sem.logic.Expression.equiv = equiv(self, other, prover=None)
    Check for logical equivalence.
    Pass the expression (self <-> other) to the theorem prover.
    If the prover says it is valid, then the self and other are equal.
    
    :param other: an ``Expression`` to check equality against
    :param prover: a ``nltk.inference.api.Prover``

************ findtype ************
Help on function findtype in nltk.sem.logic.Expression:

nltk.sem.logic.Expression.findtype = findtype(self, variable)
    Find

In [15]:
# Get info on an expression 

#get all free variables as set of nltk.sem.logic.Variable objects 
free_vars = test_expr.free()
print(f'free variables in "{test_expr}": {free_vars}')
print()

# Get main(?) quantifier
quantifiers = test_expr.getQuantifier()
print(f'quantifiers in "{test_expr}": {quantifiers}')
print()

# Get all constants as set of nltk.sem.logic.Variable objects 
constants = test_expr.constants()
print(f'Constants in "{test_expr}": {constants}')
print()

# Get all predicates as set of nltk.sem.logic.Variable objects 
predicates = test_expr.predicates()
print(f'Predicates in "{test_expr}": {predicates}')

free variables in "exists x.all y.((Red(c) & Blue(x)) | House(y))": {Variable('c')}

quantifiers in "exists x.all y.((Red(c) & Blue(x)) | House(y))": exists

Constants in "exists x.all y.((Red(c) & Blue(x)) | House(y))": set()

Predicates in "exists x.all y.((Red(c) & Blue(x)) | House(y))": {Variable('House'), Variable('Blue'), Variable('Red')}


In [16]:
if free_vars:
    print(4)

4


In [17]:
# Replace predicates (and more?) in an expression 
old_pred = Variable('Red')
new_pred = Expression.fromstring('Blue')
new_test_expr = test_expr.replace(old_pred, new_pred)
print(f'Changed "{test_expr}" to "{new_test_expr}"')

Changed "exists x.all y.((Red(c) & Blue(x)) | House(y))" to "exists x.all y.((Blue(c) & Blue(x)) | House(y))"


In [18]:
#Replace names of FREE variables 
bindings = {
    Variable('x'): Expression.fromstring('a'),
    Variable('y'): Expression.fromstring('b'),
    Variable('z'): Expression.fromstring('c')
}
print(test_expr.variables())
new_test_expr2 = test_expr.substitute_bindings(bindings)
print(f'Changed "{test_expr}" to "{new_test_expr2}"')

{Variable('c')}
Changed "exists x.all y.((Red(c) & Blue(x)) | House(y))" to "exists x.all y.((Red(c) & Blue(x)) | House(y))"


In [19]:
# Get et types of the sentence 

#all et types returnd as: dict{'word': et-type}
et_types = test_expr.typecheck()
print(f"The et types of the expression {test_expr} are: {et_types}")

# get et-type of specific variable 
var = Variable('Red')
et_type_var = test_expr.findtype(var)
print(f"The et type of {var} in the expression {test_expr} is: {et_type_var}")

The et types of the expression exists x.all y.((Red(c) & Blue(x)) | House(y)) are: {'c': e, 'Red': <e,t>, 'x': e, 'Blue': <e,t>, 'y': e, 'House': <e,t>}
The et type of Red in the expression exists x.all y.((Red(c) & Blue(x)) | House(y)) is: <e,t>


In [22]:
#Get all predicates in a specific data set

all_predicates = []
for item in sample_data['sick'].values():
    expression = Expression.fromstring(preprocessing.fol2nltk(item))
    predicates = [str(p) for p in list(expression.predicates())]
    # print(predicates)
    all_predicates = all_predicates + predicates
    # print(all_predicates)
all_predicates = set(all_predicates) 
print(all_predicates)


{'Singing', 'Water', 'CuttingOnion', 'Woman', 'Kickboxing', 'HasShirt', 'Slicing', 'WalkingInWood', 'Person', 'Man', 'TanDog', 'PlayingKeyboard', 'StandingSteadilyInFrontOf', 'Wave', 'PlaysIn', 'Surfer', 'Outdoors', 'PlaysPiano', 'InBikeGear', 'Spectator', 'Sitting', 'Pond', 'Cat', 'Microphone', 'Drinking', 'SingingInto', 'Play', 'Under', 'HoldingFootball', 'HappyBoy', 'Riding', 'Dog'}


In [21]:
#split predicates into first word + the rest 
new_predicates = []
for p in all_predicates:
    parts = re.findall('[A-Z][^A-Z]*', p)
    split_p = [parts[0]]
    if len(parts) > 1:
        second_pred =""
        for part in parts[1:]:
            second_pred = second_pred + part 
        split_p.append(second_pred)
    new_predicates += split_p
print(new_predicates)

# test_expr = Expression.fromstring('exist x. HoldingFootball(x)')
# old_pred = Variable('HoldingFootball')
# new_pred = Expression.fromstring('Holding(x, y) & Football(y)')
# new_test_expr = test_expr.replace(old_pred, new_pred)
# print(f'Changed "{test_expr}" to "{new_test_expr}"')

[]


In [23]:
import re
# split predicates 
result = re.findall('[A-Z][^A-Z]*', 'HoldingFootballBlue')
result = [result[0], result[1:]]
print(result)

['Holding', ['Football', 'Blue']]


In [25]:
all_predicates = []
for item in sample_data['sick'].values():
    expression = Expression.fromstring(preprocessing.fol2nltk(item))
    predicates = [str(p) for p in list(expression.predicates())]
    # print(predicates)
    all_predicates = all_predicates + predicates
    # print(all_predicates)
all_predicates = set(all_predicates) 
print(all_predicates)

{'Singing', 'Water', 'CuttingOnion', 'Woman', 'Kickboxing', 'HasShirt', 'Slicing', 'WalkingInWood', 'Person', 'Man', 'TanDog', 'PlayingKeyboard', 'StandingSteadilyInFrontOf', 'Wave', 'PlaysIn', 'Surfer', 'Outdoors', 'PlaysPiano', 'InBikeGear', 'Spectator', 'Sitting', 'Pond', 'Cat', 'Microphone', 'Drinking', 'SingingInto', 'Play', 'Under', 'HoldingFootball', 'HappyBoy', 'Riding', 'Dog'}


## LK testing 

In [26]:
premise = list(set("alle eenhoorns hebben blauw haar".split()))
hypo = list(set("koeien hebben staarten".split()))
print(premise)
print(hypo)
import itertools
for i in itertools.product(premise, hypo):
    print(i)

['alle', 'blauw', 'eenhoorns', 'hebben', 'haar']
['hebben', 'staarten', 'koeien']
('alle', 'hebben')
('alle', 'staarten')
('alle', 'koeien')
('blauw', 'hebben')
('blauw', 'staarten')
('blauw', 'koeien')
('eenhoorns', 'hebben')
('eenhoorns', 'staarten')
('eenhoorns', 'koeien')
('hebben', 'hebben')
('hebben', 'staarten')
('hebben', 'koeien')
('haar', 'hebben')
('haar', 'staarten')
('haar', 'koeien')


In [27]:
import itertools
import nltk

## Note: use this block if you get "error loading wordnet ... SSL:CERTIFICATE_VERIFY_FAILED ...""
import ssl
try:
    _create_unverified_https_context = ssl._create_unverified_context
except AttributeError:
    pass
else:
    ssl._create_default_https_context = _create_unverified_https_context
##########
    
nltk.download('wordnet')
from nltk.corpus import wordnet as wn

[nltk_data] Downloading package wordnet to /Users/jip/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [28]:
print(wn.synsets("couch")[0])
print(wn.synsets("sofa")[0])

Synset('sofa.n.01')
Synset('sofa.n.01')


In [29]:
wn.synsets("animal")[0].lowest_common_hypernyms(wn.synsets("cat")[0])


[Synset('animal.n.01')]

In [30]:
hyper_cat = []
for hyp in wn.synsets("cat")[0].hypernyms():
    hyper_cat.append(hyp)
    for hyp2 in hyp.hypernyms():
        hyper_cat.append(hyp2)
        for hyp3 in hyp2.hypernyms():
            hyper_cat.append(hyp3)

print(hyper_cat)

[Synset('feline.n.01'), Synset('carnivore.n.01'), Synset('placental.n.01')]


In [31]:
hypo_animal = []
for hyp in wn.synsets("animal")[0].hyponyms():
    hypo_animal.append(hyp)
    # for hyp2 in hyp.hyponyms():
    #     hypo_animal.append(hyp2)
    #     for hyp3 in hyp2.hyponyms():
    #         hypo_animal.append(hyp3)

print(hypo_animal)

[Synset('acrodont.n.01'), Synset('adult.n.02'), Synset('biped.n.01'), Synset('captive.n.02'), Synset('chordate.n.01'), Synset('creepy-crawly.n.01'), Synset('critter.n.01'), Synset('darter.n.02'), Synset('domestic_animal.n.01'), Synset('embryo.n.02'), Synset('feeder.n.06'), Synset('female.n.01'), Synset('fictional_animal.n.01'), Synset('game.n.04'), Synset('giant.n.01'), Synset('herbivore.n.01'), Synset('hexapod.n.01'), Synset('homeotherm.n.01'), Synset('insectivore.n.02'), Synset('invertebrate.n.01'), Synset('larva.n.01'), Synset('male.n.01'), Synset('marine_animal.n.01'), Synset('mate.n.03'), Synset('metazoan.n.01'), Synset('migrator.n.02'), Synset('molter.n.01'), Synset('mutant.n.02'), Synset('omnivore.n.02'), Synset('peeper.n.03'), Synset('pest.n.04'), Synset('pet.n.01'), Synset('pleurodont.n.01'), Synset('poikilotherm.n.01'), Synset('predator.n.02'), Synset('prey.n.02'), Synset('racer.n.03'), Synset('range_animal.n.01'), Synset('scavenger.n.03'), Synset('stayer.n.01'), Synset('stun

In [32]:
# wn.synsets("cat")[0].hypernyms()
wn.synsets("placental")[0].hypernyms()

[Synset('mammal.n.01')]

In [33]:
hypercat = set([i for i in wn.synsets("cat")[0].closure(lambda s:s.hypernyms())])
if wn.synsets("animal")[0] in hypercat:
    print("cat is an animal")

cat is an animal


In [34]:
wn.synsets("animal")[0].hyponyms()

[Synset('acrodont.n.01'),
 Synset('adult.n.02'),
 Synset('biped.n.01'),
 Synset('captive.n.02'),
 Synset('chordate.n.01'),
 Synset('creepy-crawly.n.01'),
 Synset('critter.n.01'),
 Synset('darter.n.02'),
 Synset('domestic_animal.n.01'),
 Synset('embryo.n.02'),
 Synset('feeder.n.06'),
 Synset('female.n.01'),
 Synset('fictional_animal.n.01'),
 Synset('game.n.04'),
 Synset('giant.n.01'),
 Synset('herbivore.n.01'),
 Synset('hexapod.n.01'),
 Synset('homeotherm.n.01'),
 Synset('insectivore.n.02'),
 Synset('invertebrate.n.01'),
 Synset('larva.n.01'),
 Synset('male.n.01'),
 Synset('marine_animal.n.01'),
 Synset('mate.n.03'),
 Synset('metazoan.n.01'),
 Synset('migrator.n.02'),
 Synset('molter.n.01'),
 Synset('mutant.n.02'),
 Synset('omnivore.n.02'),
 Synset('peeper.n.03'),
 Synset('pest.n.04'),
 Synset('pet.n.01'),
 Synset('pleurodont.n.01'),
 Synset('poikilotherm.n.01'),
 Synset('predator.n.02'),
 Synset('prey.n.02'),
 Synset('racer.n.03'),
 Synset('range_animal.n.01'),
 Synset('scavenger.n.03'

In [35]:
wn.synsets("couch")[0].lemmas()

[Lemma('sofa.n.01.sofa'), Lemma('sofa.n.01.couch'), Lemma('sofa.n.01.lounge')]

In [1]:
from lexical_knowledge import get_hypo_syn_lk

get_hypo_syn_lk("all x.Sofas(x, y) & Blue(x, y)", "all x. Couches(x, y) & Color(x, y)")

[nltk_data] Downloading package wordnet to /Users/jip/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


['all x. all y. (Blue(x, y) -> Color(x, y))',
 'all x. all y. (Sofas(x, y) -> Couches(x, y))',
 'all x. all y. (Couches(x, y) -> Sofas(x, y))']

In [80]:
args = (re.findall(f"Sofas\((.*?)\)", "all x.Sofas(x, y, z) & Blue(x)"))[0].split(", ")
print(args)

['x', 'y', 'z']


In [81]:
", ".join(args)

'x, y, z'

In [74]:
Expression.fromstring("all x. all y. all z. Blue(x) & Blue(z) & Blue(y)")

<AndExpression (all x y z.Blue(x) & Blue(z) & Blue(y))>