-
Notifications
You must be signed in to change notification settings - Fork 0
/
lab2.py
48 lines (36 loc) · 1.45 KB
/
lab2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import nltk
nltk.download('wordnet')
from nltk.stem.wordnet import WordNetLemmatizer
lmtzr = WordNetLemmatizer()
cause_lemma = lmtzr.lemmatize('cause')
print(cause_lemma)
# cars_lemma = lmtzr.lemmatize('cars')
# feet_lemma = lmtzr.lemmatize('feet')
# pple_lemma = lmtzr.lemmatize('people')
# fant_lemma = lmtzr.lemmatize('fantasized','v')
# print(cars_lemma)
from nltk.corpus import wordnet as wn
def morphify(word,org_pos,target_pos):
""" morph a word """
synsets = wn.synsets(word, pos=org_pos)
# Word not found
if not synsets:
return []
# Get all lemmas of the word
lemmas = [l for s in synsets \
for l in s.lemmas() if s.name().split('.')[1] == org_pos]
# Get related forms
derivationally_related_forms = [(l, l.derivationally_related_forms()) \
for l in lemmas]
# filter only the targeted pos
related_lemmas = [l for drf in derivationally_related_forms \
for l in drf[1] if l.synset().name().split('.')[1] == target_pos]
# Extract the words from the lemmas
words = [l.name() for l in related_lemmas]
len_words = len(words)
# Build the result in the form of a list containing tuples (word, probability)
result = [(w, float(words.count(w))/len_words) for w in set(words)]
result.sort(key=lambda w: -w[1])
# return all the possibilities sorted by probability
return result
print (morphify('cause','n','v'))