Skip to content

Commit

Permalink
small fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
Gabor Recski committed May 30, 2017
1 parent 9aeed0c commit f4a6aea
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 6 deletions.
14 changes: 11 additions & 3 deletions scripts/get_defs.py
@@ -1,8 +1,16 @@
import json import json
import sys import sys
import traceback


data = json.load(sys.stdin) data = json.load(sys.stdin)
for e in data.itervalues(): for e in data.itervalues():
if e['senses'] and e['senses'][0]['definition']: if not e['senses']:
print u"{0}\t{1}".format( continue
e['hw'], e['senses'][0]['definition']['sen']).encode('utf-8') defs = [sense.get('definition') for sense in e['senses']]
for definition in defs:
if not definition:
sys.stderr.write(e['hw'].encode('utf-8')+'\n')
continue
if isinstance(definition, unicode):
continue
print u"{0}\t{1}".format(e['hw'], definition['sen']).encode('utf-8')
3 changes: 2 additions & 1 deletion scripts/get_graph.py
Expand Up @@ -7,7 +7,8 @@
def main(): def main():
lex_fn, word = sys.argv[1:3] lex_fn, word = sys.argv[1:3]
lex = Lexicon.load_from_binary(lex_fn) lex = Lexicon.load_from_binary(lex_fn)
machines = lex.lexicon.get(word, lex.ext_lexicon.get(word)) # machines = lex.lexicon.get(word, lex.ext_lexicon.get(word))
machines = [lex.get_machine(word)]
if machines is None: if machines is None:
print '404 :(' print '404 :('
else: else:
Expand Down
6 changes: 5 additions & 1 deletion src/fourlang/corenlp_wrapper.py
Expand Up @@ -102,9 +102,13 @@ def parse_entries(self, entries):
for entry in entries: for entry in entries:
for sense in entry['senses']: for sense in entry['senses']:
sentence = sense['definition'] sentence = sense['definition']
if sentence is None: if not sentence:
continue continue
deps, corefs, parse_trees = self.parse_text(sentence) deps, corefs, parse_trees = self.parse_text(sentence)
if not deps:
logging.warning(
'no deps: {0}'.format(sentence.encode('utf-8')))
continue
sense['definition'] = { sense['definition'] = {
"sen": sentence, "sen": sentence,
"deps": deps[0], "deps": deps[0],
Expand Down
13 changes: 12 additions & 1 deletion src/fourlang/wiktionary_parser.py
Expand Up @@ -110,5 +110,16 @@ def test():
for entry in WiktParser.parse_xml(xml): for entry in WiktParser.parse_xml(xml):
print entry print entry


def print_defs():
xml = sys.stdin.read()
for entry in WiktParser.parse_xml(xml):
hw, senses = entry['hw'], entry['senses']
if not senses:
continue
for sense in senses:
d = sense['definition']
if d:
print "{0}\t{1}".format(hw, d)

if __name__ == "__main__": if __name__ == "__main__":
test() print_defs()

0 comments on commit f4a6aea

Please sign in to comment.