Skip to content

Commit

Permalink
Fixed issue with spaces between punctuation, and stop parser crashing…
Browse files Browse the repository at this point in the history
… for invalid xml, now reports issue with file
  • Loading branch information
keiffster committed Mar 24, 2017
1 parent 5c25761 commit 134eb73
Show file tree
Hide file tree
Showing 8 changed files with 84 additions and 30 deletions.
31 changes: 14 additions & 17 deletions src/programy/parser/aiml_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,10 @@ def __init__(self, aiml_parser):
self.aiml_parser = aiml_parser

def load_file_contents(self, filename):
logging.info(filename)
return self.aiml_parser.parse_from_file(filename)

try:
return self.aiml_parser.parse_from_file(filename)
except Exception as e:
logging.exception("Failed to load contents of file from [%s]"%filename, e)

class AIMLParser(object):
def __init__(self, supress_warnings=False, stop_on_invalid=False):
Expand Down Expand Up @@ -75,20 +76,16 @@ def parse_from_file(self, filename):

logging.info("Loading aiml file file: " + self._filename)

tree = ET.parse(filename, parser=LineNumberingParser())
aiml = tree.getroot()

if aiml is None or aiml.tag != 'aiml':
raise ParserException("Error, root tag is not <aiml>", filename=filename)
else:
try:
try:
tree = ET.parse(filename, parser=LineNumberingParser())
aiml = tree.getroot()
if aiml is None or aiml.tag != 'aiml':
raise ParserException("Error, root tag is not <aiml>", filename=filename)
else:
self.parse_aiml(aiml, filename)
except ParserException as parser_excep:
parser_excep.filename = filename
raise parser_excep
except ET.ParseError as xmlpe:
xmlpe.filename = filename
xmlpe.xml_exception = xmlpe
except Exception as e:
logging.error("Failed to load contents of AIML file from [%s] - [%s]"%(filename, e))


def parse_from_text(self, text):
"""
Expand All @@ -100,7 +97,7 @@ def parse_from_text(self, text):
aiml = ET.fromstring(text)

if aiml is None or aiml.tag != 'aiml':
ParserException("Error, root tag is not <aiml>", filename="text")
raise ParserException("Error, root tag is not <aiml>", filename="text")
else:
self.parse_aiml(aiml, "text")

Expand Down
18 changes: 8 additions & 10 deletions src/programy/processors/post/cleanup.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@


import logging
import re

from programy.processors.processing import PostProcessor

class CleanUpPostProcessor(PostProcessor):
Expand All @@ -24,13 +26,9 @@ def __init__(self):

def process(self, bot, clientid, string):
logging.debug("Cleaning up output...")
stripped = string.strip()
if stripped.endswith(" ."):
stripped = stripped[:len(stripped)-2] + "."
return stripped

#
#first = stripped[:1]
#rest = stripped[1:]
#result = first.upper() + rest.lower()
#return result

pass1 = re.split(r"""("[^"]*"|'[^']*')""", string)
pass2 = [val.strip() for val in pass1]
pass3 = " ".join(re.sub(r'("\s+)(.*)(\s+")', r'"\2"', val) for val in pass2)
pass4 = re.sub(r'\s+([,:;?.!](?:\s|$))', r'\1', pass3)
return pass4
8 changes: 7 additions & 1 deletion src/programy/utils/files/filefinder.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
"""

import logging
import os

from abc import ABCMeta, abstractmethod

class FileFinder(object):
Expand Down Expand Up @@ -54,6 +56,10 @@ def load_dir_contents(self, path_to_sets, subdir=False, extension=".txt"):
else:
filename = file[0]
filename = filename.upper()
collection[filename] = self.load_file_contents(file[1])

try:
collection[filename] = self.load_file_contents(file[1])
except Exception as e:
logging.error ("Failed to load file contents for file [%s]"%file[1])

return collection
2 changes: 1 addition & 1 deletion src/test/aiml_tests/train_tests/test_now_ask_me.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,4 @@ def test_now_ask_me(self):
response = TrainAIMLTests.test_client.bot.ask_question("test", "daddy is great")
self.assertIsNotNone(response)
#TODO Sort out space in questions
self.assertEqual("Now you can ask me: \" Who IS GREAT \"? and \" What does my DADDY BE \"?", response)
self.assertEqual('Now you can ask me: "Who IS GREAT"? and "What does my DADDY BE"?', response)
2 changes: 1 addition & 1 deletion src/test/aiml_tests/train_tests/test_train_aiml.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def test_train_pronoun(self):
self.assertIsNotNone(response)

#TODO Fix the spacing in quotes
self.assertEqual("Now you can ask me: \" Who LIKES TO SMOKE CIGARS \"? and \" What does my MOMMY LIKE \"?", response)
self.assertEqual('Now you can ask me: "Who LIKES TO SMOKE CIGARS"? and "What does my MOMMY LIKE"?', response)

response = TrainAIMLTests.test_client.bot.ask_question("test", "who likes to smoke cigars")
self.assertIsNotNone(response)
Expand Down
9 changes: 9 additions & 0 deletions src/test/parser/invalid.aiml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
<?xml version="1.0" encoding="UTF-8"?>
<aiml>
<topic name="">
<category>
<pattern>*
<template>RESPONSE</template>
</category>
</topic>
</aiml>
9 changes: 9 additions & 0 deletions src/test/parser/test_aiml_parser.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import unittest
import os
from xml.etree.ElementTree import ParseError

from programy.parser.aiml_parser import AIMLParser
Expand All @@ -18,6 +19,14 @@ def setUp(self):
self.parser = AIMLParser(supress_warnings=True, stop_on_invalid=True)
self.assertIsNotNone(self.parser)

def test_parse_from_file_valid(self):
filename = os.path.dirname(__file__)+ '/valid.aiml'
self.parser.parse_from_file(filename)

def test_parse_from_file_invalid(self):
filename = os.path.dirname(__file__)+ '/invalid.aiml'
self.parser.parse_from_file(filename)

def test_crud(self):
with self.assertRaises(ParseError) as raised:
self.parser.parse_from_text(
Expand Down
35 changes: 35 additions & 0 deletions src/test/parser/valid.aiml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
<?xml version="1.0" encoding="UTF-8"?>
<aiml>
<!-- File: balance.aiml -->
<!-- -->
<!-- This AIML file is part of the Y-Bot knowledge base. -->
<!-- -->
<!-- Y-Bot is Copyright &copy; 2017 by Keith Sterling. -->
<!--
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-->

<category>
<pattern>
HELLO
</pattern>
<template>
<random>
<li>Hello!</li>
<li>Hi there!</li>
<li>Greetings!</li>
</random>
</template>
</category>

</aiml>

0 comments on commit 134eb73

Please sign in to comment.