Merge pull request #60 from jsvine/pr-55

Incorporate PR #55 and related testing
jsvine · Mar 24, 2017 · c11601b · c11601b
2 parents d3db848 + c1e27cc
commit c11601b
Show file tree

Hide file tree

Showing 4 changed files with 54 additions and 30 deletions.
diff --git a/README.md b/README.md
@@ -61,7 +61,7 @@ Notes:
 
 - By default, the `make_sentence` method tries, a maximum of 10 times per invocation, to make a sentence that doesn't overlap too much with the original text. If it is successful, the method returns the sentence as a string. If not, it returns `None`. To increase or decrease the number of attempts, use the `tries` keyword argument, e.g., call `.make_sentence(tries=100)`.
 
-- By default, `markovify.Text` tries to generate sentences that don't simply regurgitate chunks of the original text. The default rule is to suppress any generated sentences that exactly overlaps the original text by 15 words or 70% of the sentence's word count. You can change this rule by passing `max_overlap_ratio` and/or `max_overlap_total` to the `make_sentence` method.
+- By default, `markovify.Text` tries to generate sentences that don't simply regurgitate chunks of the original text. The default rule is to suppress any generated sentences that exactly overlaps the original text by 15 words or 70% of the sentence's word count. You can change this rule by passing `max_overlap_ratio` and/or `max_overlap_total` to the `make_sentence` method. Alternatively you can disable this check entirely by passing `test_output` as False.
 
 ## Advanced Usage
 
@@ -197,5 +197,6 @@ Many thanks to the following GitHub users for contributing code and/or ideas:
 - [@wodim](https://github.com/wodim)
 - [@eh11fx](https://github.com/eh11fx)
 - [@ammgws](https://github.com/ammgws)
+- [@OtakuMegane](https://github.com/OtakuMegane)
 
 Developed at [BuzzFeed](https://www.buzzfeed.com).
diff --git a/markovify/text.py b/markovify/text.py
@@ -93,9 +93,9 @@ def test_sentence_input(self, sentence):
         """
         reject_pat = re.compile(r"(^')|('$)|\s'|'\s|[\"(\(\)\[\])]")
         # Decode unicode, mainly to normalize fancy quotation marks
-        if sentence.__class__.__name__ == "str":
+        if sentence.__class__.__name__ == "str": # pragma: no cover
             decoded = sentence
-        else:
+        else: # pragma: no cover
             decoded = unidecode(sentence)
         # Sentence shouldn't contain problematic characters
         if re.search(reject_pat, decoded): return False
@@ -143,10 +143,18 @@ def make_sentence(self, init_state=None, **kwargs):
         If `init_state` (a tuple of `self.chain.state_size` words) is not specified,
         this method chooses a sentence-start at random, in accordance with
         the model.
+        
+        If `test_output` is set as False then the `test_sentence_output` check
+        will be skipped.
+        
+        If `max_words` is specified, the word count for the sentence will be
+        evaluated against the provided limit.
         """
         tries = kwargs.get('tries', DEFAULT_TRIES)
         mor = kwargs.get('max_overlap_ratio', DEFAULT_MAX_OVERLAP_RATIO)
         mot = kwargs.get('max_overlap_total', DEFAULT_MAX_OVERLAP_TOTAL)
+        test_output = kwargs.get('test_output', True)
+        max_words = kwargs.get('max_words', None)
 
         for _ in range(tries):
             if init_state != None:
@@ -157,7 +165,12 @@ def make_sentence(self, init_state=None, **kwargs):
             else:
                 prefix = []
             words = prefix + self.chain.walk(init_state)
-            if self.test_sentence_output(words, mor, mot):
+            if max_words != None and len(words) > max_words:
+                continue
+            if test_output:
+                if self.test_sentence_output(words, mor, mot):
+                    return self.word_join(words)
+            else:
                 return self.word_join(words)
         return None
 

diff --git a/test/test_basic.py b/test/test_basic.py
@@ -6,31 +6,31 @@
 def get_sorted(chain_json):
     return sorted(chain_json, key=operator.itemgetter(0))
 
-class MarkovifyTest(unittest.TestCase):
+with open(os.path.join(os.path.dirname(__file__), "texts/sherlock.txt")) as f:
+    sherlock = f.read()
+    sherlock_model = markovify.Text(sherlock)
 
-    def setUp(self):
-        with open(os.path.join(os.path.dirname(__file__), "texts/sherlock.txt")) as f:
-            self.sherlock = f.read()
+class MarkovifyTest(unittest.TestCase):
 
     def test_text_too_small(self):
         text = u"Example phrase. This is another example sentence."
         text_model = markovify.Text(text)
         assert(text_model.make_sentence() == None)
 
     def test_sherlock(self):
-        text_model = markovify.Text(self.sherlock)
+        text_model = sherlock_model
         sent = text_model.make_sentence()
         assert(len(sent) != 0)
 
     def test_json(self):
-        text_model = markovify.Text(self.sherlock)
+        text_model = sherlock_model
         json_model = text_model.to_json()
         new_text_model = markovify.Text.from_json(json_model)
         sent = text_model.make_sentence()
         assert(len(sent) != 0)
 
     def test_chain(self):
-        text_model = markovify.Text(self.sherlock)
+        text_model = sherlock_model
         chain_json = text_model.chain.to_json()
 
         stored_chain = markovify.Chain.from_json(chain_json)
@@ -43,38 +43,48 @@ def test_chain(self):
         assert(len(sent) != 0)
 
     def test_make_sentence_with_start(self):
-        text_model = markovify.Text(self.sherlock)
+        text_model = sherlock_model
         start_str = "Sherlock Holmes"
         sent = text_model.make_sentence_with_start(start_str)
         assert(sent != None)
         assert(start_str == sent[:len(start_str)])
 
     def test_make_sentence_with_start_one_word(self):
-        text_model = markovify.Text(self.sherlock)
+        text_model = sherlock_model
         start_str = "Sherlock"
         sent = text_model.make_sentence_with_start(start_str)
         assert(sent != None)
         assert(start_str == sent[:len(start_str)])
 
     def test_make_sentence_with_start_three_words(self):
         start_str = "Sherlock Holmes was"
-        text_model = markovify.Text(self.sherlock)
+        text_model = sherlock_model
         try:
             text_model.make_sentence_with_start(start_str)
             assert(False)
         except markovify.text.ParamError:
             assert(True)
-        text_model = markovify.Text(self.sherlock, state_size=3)
+        text_model = markovify.Text(sherlock, state_size=3)
         text_model.make_sentence_with_start(start_str)
         text_model.make_sentence_with_start("Sherlock")
 
     def test_short_sentence(self):
-        text_model = markovify.Text(self.sherlock)
+        text_model = sherlock_model
         sent = None
         while sent == None:
             sent = text_model.make_short_sentence(45)
         assert len(sent) < 45
 
+    def test_dont_test_output(self):
+        text_model = sherlock_model
+        sent = text_model.make_sentence(test_output=False)
+        assert sent is not None 
+
+    def test_max_words(self):
+        text_model = sherlock_model
+        sent = text_model.make_sentence(max_words=0)
+        assert sent is None 
+
     def test_newline_text(self):
         with open(os.path.join(os.path.dirname(__file__), "texts/senate-bills.txt")) as f:
             model = markovify.NewlineText(f.read())

diff --git a/test/test_combine.py b/test/test_combine.py
@@ -6,32 +6,32 @@
 def get_sorted(chain_json):
     return sorted(chain_json, key=operator.itemgetter(0))
 
-class MarkovifyTest(unittest.TestCase):
+with open(os.path.join(os.path.dirname(__file__), "texts/sherlock.txt")) as f:
+    sherlock = f.read()
+    sherlock_model = markovify.Text(sherlock)
 
-    def setUp(self):
-        with open(os.path.join(os.path.dirname(__file__), "texts/sherlock.txt")) as f:
-            self.sherlock = f.read()
+class MarkovifyTest(unittest.TestCase):
 
     def test_simple(self):
-        text_model = markovify.Text(self.sherlock)
+        text_model = sherlock_model
         combo = markovify.combine([ text_model, text_model ], [ 0.5, 0.5 ])
         assert(combo.chain.model == text_model.chain.model)
 
     def test_double_weighted(self):
-        text_model = markovify.Text(self.sherlock)
+        text_model = sherlock_model
         combo = markovify.combine([ text_model, text_model ])
         assert(combo.chain.model != text_model.chain.model)
 
     def test_combine_chains(self):
-        chain = markovify.Text(self.sherlock).chain
+        chain = sherlock_model.chain
         combo = markovify.combine([ chain, chain ])
 
     def test_combine_dicts(self):
-        _dict = markovify.Text(self.sherlock).chain.model
+        _dict = sherlock_model.chain.model
         combo = markovify.combine([ _dict, _dict ])
 
     def test_combine_lists(self):
-        _list = list(markovify.Text(self.sherlock).chain.model.items())
+        _list = list(sherlock_model.chain.model.items())
         combo = markovify.combine([ _list, _list ])
 
     def test_bad_types(self):
@@ -40,19 +40,19 @@ def test_bad_types(self):
 
     def test_bad_weights(self):
         with self.assertRaises(Exception) as context:
-            text_model = markovify.Text(self.sherlock)
+            text_model = sherlock_model
             combo = markovify.combine([ text_model, text_model ], [ 0.5  ])
 
     def test_mismatched_state_sizes(self):
         with self.assertRaises(Exception) as context:
-            text_model_a = markovify.Text(self.sherlock, state_size=2)
-            text_model_b = markovify.Text(self.sherlock, state_size=3)
+            text_model_a = markovify.Text(sherlock, state_size=2)
+            text_model_b = markovify.Text(sherlock, state_size=3)
             combo = markovify.combine([ text_model_a, text_model_b ])
 
     def test_mismatched_model_types(self):
         with self.assertRaises(Exception) as context:
-            text_model_a = markovify.Text(self.sherlock)
-            text_model_b = markovify.NewlineText(self.sherlock)
+            text_model_a = sherlock_model
+            text_model_b = markovify.NewlineText(sherlock)
             combo = markovify.combine([ text_model_a, text_model_b ])
 
 if __name__ == '__main__':