In [1]:
import nltk
import hashlib
import random
from random import randint
from random import seed
from nltk.corpus import framenet as fn
from nltk.corpus import stopwords
from nltk.tokenize import WhitespaceTokenizer
from nltk.corpus import wordnet
from nltk.corpus.reader.wordnet import Synset

nltk.download('framenet_v17')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package framenet_v17 to /root/nltk_data...
[nltk_data]   Package framenet_v17 is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

In [2]:
def print_frames_with_IDs():
    for x in fn.frames():
        print('{}\t{}'.format(x.ID, x.name))

def get_frams_IDs():
    return [f.ID for f in fn.frames()]

def cool_string_hashcode(string): 
    return abs(int(hashlib.sha512(string.encode('utf-8')).hexdigest(), 16))

def get_frame_set_for_student(surname, list_len=5):
    nof_frames = len(fn.frames())
    base_idx = (cool_string_hashcode(surname) % nof_frames)
    print('Student: ' + surname)
    framenet_IDs = get_frams_IDs()
    i = 0
    offset = 0 
    seed(1)
    frames = []
    while i < list_len:
        fID = framenet_IDs[(base_idx+offset)%nof_frames]
        f = fn.frame(fID)
        frames.append(f)
        fNAME = f.name
        print('\tID: {a:4d}\tframe: {framename}'.format(a=fID, framename=fNAME))
        offset = randint(0, nof_frames)
        i += 1 
    return frames

In [3]:
def flat_map_strings(list, tokenizer = WhitespaceTokenizer()):
    ret = []
    for s in list:
        for w in tokenizer.tokenize(s):
            ret.append(w)
    return ret

In [4]:
def get_context_from_frame(frame):
    raw_data = []
    raw_data.append(frame["name"].replace("_", " "))
    raw_data.append(frame["definition"])
    for _, le in frame["lexUnit"].items():
        raw_data.append(le["name"].split('.')[0])
        raw_data.append(le["definition"])
        for example in le["exemplars"]:
            raw_data.append(example["text"])
    for _, fe in frame["FE"].items():
        raw_data.append(fe["definition"])
        raw_data.append(fe["name"])
    return set(flat_map_strings(raw_data))

In [5]:
def get_context_from_synset(synset):
    raw_data = []
    raw_data.append(synset.definition())
    for e in synset.examples():
        raw_data.append(e)
    for l in synset.lemma_names():
        raw_data.append(l)
    return set(flat_map_strings(raw_data))

In [6]:
def compare_and_score_contexts(ctx1, ctx2):
     return len(ctx1.intersection(ctx2)) + 1

In [7]:
def find_best_meaning(lemma, lemma_ctx):
    best_score = 0
    best_synset = None
    for synset in wordnet.synsets(lemma):
        synset_ctx = get_context_from_synset(synset)
        score = compare_and_score_contexts(synset_ctx, lemma_ctx)
        if (score > best_score):
            best_synset = synset
            best_score = score
    return best_synset if best_synset is not None else Synset(None)

In [8]:
def select_most_important_word(words):
    best = 0
    tag = nltk.tag.pos_tag(words)
    for i in range(0, len(words)):
        if tag[i] == 'VERB':
            return words[i]
        elif tag[i] == 'NOUN' and tag[best] != 'NOUN':
           best = i
    return words[best]

def smiw(words):
    return select_most_important_word(words)

In [24]:
def disambiguate_frame_data(frame):
    frame_ctx = get_context_from_frame(frame)
    best_frame_name_word = smiw(frame["name"].split("_"))
    title_best_meaning = find_best_meaning(best_frame_name_word, frame_ctx)
    print(f'\nTitle disambiguation: {frame["name"]} | best synset: {title_best_meaning}')
    print(f'Synset definition: {title_best_meaning.definition()}')
    print("Frame elements disambiguations:")
    count = 0

    for _, fe in frame["FE"].items():
        best_fe_name_word = smiw(fe["name"].split("_"))
        fe_best_meaning = find_best_meaning(best_fe_name_word, frame_ctx)
        print(f' - {fe["name"]} | best synset: {fe_best_meaning}')
        print(f'   Synset definition: {fe_best_meaning.definition()}')

    print("Lexical units disambiguations:")
    count = 0
    for _, le in frame["lexUnit"].items():
        le_best_meaning = find_best_meaning(le["name"].split(".")[0], frame_ctx)
        print(f' - {le["name"]} | best synset: {le_best_meaning}')
        print(f'   Synset definition: {le_best_meaning.definition()}')
    return 

In [25]:
for f in get_frame_set_for_student("Basti"):
    disambiguate_frame_data(f)
    print("________________________________________________")

Student: Basti
	ID:  810	frame: Measure_by_action
	ID:  347	frame: Revenge
	ID: 1041	frame: Left_to_do
	ID: 1182	frame: Post_receiving
	ID: 1322	frame: Active_substance

Title disambiguation: Measure_by_action | best synset: Synset('measure.v.04')
Synset definition: evaluate or estimate the nature, quality, ability, extent, or significance of
Frame elements disambiguations:
 - Entity | best synset: Synset('entity.n.01')
   Synset definition: that which is perceived or known or inferred to have its own distinct existence (living or nonliving)
 - Event | best synset: Synset('consequence.n.01')
   Synset definition: a phenomenon that follows and is caused by some previous phenomenon
 - Count | best synset: Synset('count.v.08')
   Synset definition: have faith or confidence in
 - Particular_iteration | best synset: Synset('detail.n.02')
   Synset definition: a small part that can be considered separately from the whole
Lexical units disambiguations:
 - splash.n | best synset: Synset('dab.n

In [26]:
for f in get_frame_set_for_student("Pregno"):
    disambiguate_frame_data(f)
    print("________________________________________________")

Student: Pregno
	ID:    5	frame: Causation
	ID:  384	frame: Experience_bodily_harm
	ID:  228	frame: Being_named
	ID: 2654	frame: Controller_object
	ID:  156	frame: Measure_area

Title disambiguation: Causation | best synset: Synset('causing.n.01')
Synset definition: the act of causing something to happen
Frame elements disambiguations:
 - Cause | best synset: Synset('campaign.n.02')
   Synset definition: a series of actions advancing a principle or tending toward a particular end
 - Affected | best synset: Synset('feign.v.01')
   Synset definition: make believe with the intent to deceive
 - Effect | best synset: Synset('consequence.n.01')
   Synset definition: a phenomenon that follows and is caused by some previous phenomenon
 - Place | best synset: Synset('put.v.01')
   Synset definition: put into a certain place or abstract location
 - Time | best synset: Synset('time.n.02')
   Synset definition: a period of time considered as a resource under your control and sufficient to accompli