piskvorky · tmylk · Sep 25, 2016 · Sep 24, 2016 · Sep 25, 2016
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,8 +2,10 @@ Changes
 =======
 
 0.13.2, 2016-08-19
-
-* wordtopics has changed to word_topics in ldamallet, and fixed issue #764. (@bhargavvader, [#771](https://github.com/RaRe-Technologies/gensim/pull/771)) 
+* export_phrases in Phrases model changed. Fixed issue #794 and added test cases in test/test_phrases.py(@AadityaJ,
+[#879](https://github.com/RaRe-Technologies/gensim/pull/879))
+    - bigram construction can now support multiple bigrams within one sentence
+* wordtopics has changed to word_topics in ldamallet, and fixed issue #764. (@bhargavvader, [#771](https://github.com/RaRe-Technologies/gensim/pull/771))
   - assigning wordtopics value of word_topics to keep backward compatibility, for now
 * topics, topn parameters changed to num_topics and num_words in show_topics() and print_topics()(@droudy, [#755](https://github.com/RaRe-Technologies/gensim/pull/755))
   - In hdpmodel and dtmmodel
@@ -45,7 +47,7 @@ Changes
 * Control whether to use lowercase for computing word2vec accuracy. (@alantian, #607)
 * Easy import of GloVe vectors using Gensim (Manas Ranjan Kar, #625)
   - Allow easy port of GloVe vectors into Gensim
-  - Standalone script with command line arguments, compatible with Python>=2.6 
+  - Standalone script with command line arguments, compatible with Python>=2.6
   - Usage: python -m gensim.scripts.glove2word2vec -i glove_vectors.txt -o output_word2vec_compatible.txt
 * Add `similar_by_word()` and `similar_by_vector()` to word2vec (@isohyt, #381)
 * Convenience method for similarity of two out of training sentences to doc2vec (@ellolo, #707)

diff --git a/gensim/models/phrases.py b/gensim/models/phrases.py
@@ -214,6 +214,8 @@ def export_phrases(self, sentences):
                         if score > threshold:
                             yield (b' '.join((word_a, word_b)), score)
                             last_bigram = True
+                            continue
+                        last_bigram = False
 
     def __getitem__(self, sentence):
         """

diff --git a/gensim/test/test_phrases.py b/gensim/test/test_phrases.py
@@ -32,7 +32,8 @@
     ['trees'],
     ['graph', 'trees'],
     ['graph', 'minors', 'trees'],
-    ['graph', 'minors', 'survey']
+    ['graph', 'minors', 'survey'],
+    ['graph', 'minors', 'survey','human','interface'] #test bigrams within same sentence
 ]
 
 
@@ -58,34 +59,37 @@ def testBigramConstruction(self):
                 bigram2_seen = True
             if bigram1_seen and bigram2_seen:
                 break
-                
+
         self.assertTrue(bigram1_seen and bigram2_seen)
 
         # check the same thing, this time using single doc transformation
+        # last sentence should contain both graph_minors and human_interface
         self.assertTrue(u'response_time' in bigram[sentences[1]])
         self.assertTrue(u'response_time' in bigram[sentences[4]])
+        self.assertTrue(u'graph_minors' in bigram[sentences[-3]])
         self.assertTrue(u'graph_minors' in bigram[sentences[-2]])
         self.assertTrue(u'graph_minors' in bigram[sentences[-1]])
+        self.assertTrue(u'human_interface' in bigram[sentences[-1]])
 
     def testExportPhrases(self):
         """Test Phrases bigram export_phrases functionality."""
         bigram = Phrases(sentences, min_count=1, threshold=1)
-        
+
         # with this setting we should get response_time and graph_minors
         bigram1_seen = False
         bigram2_seen = False
-        
+
         for phrase, score in bigram.export_phrases(sentences):
             if not bigram1_seen and b'response time' == phrase:
                 bigram1_seen = True
             elif not bigram2_seen and b'graph minors' == phrase:
                 bigram2_seen = True
             if bigram1_seen and bigram2_seen:
                 break
-        
+
         self.assertTrue(bigram1_seen)
         self.assertTrue(bigram2_seen)
-        
+
     def testBadParameters(self):
         """Test the phrases module with bad parameters."""
         # should fail with something less or equal than 0