Skip to content

Commit

Permalink
Merge pull request #1 from piskvorky/develop
Browse files Browse the repository at this point in the history
catchup develop
  • Loading branch information
gojomo committed Jun 3, 2014
2 parents 0436558 + 668d701 commit 4c92610
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 11 deletions.
2 changes: 1 addition & 1 deletion docs/src/gensim_theme/layout.html
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ <h1 class="h1gensim">
<div class="consulting-banner">
<h3><a href="http://radimrehurek.com/">Get Expert Help</a></h3>
<p>• machine learning, NLP, data mining</p>
<p>• custom system design, development, optimizations</p>
<p>• custom SW design, development, optimizations</p>
<p>• tech trainings &amp; IT consulting</p>
</div>
</div>
Expand Down
6 changes: 3 additions & 3 deletions gensim/models/ldamodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,8 +391,8 @@ def inference(self, chunk, collect_sstats=False):
sstats[:, ids] += numpy.outer(expElogthetad.T, cts / phinorm)

if len(chunk) > 1:
logger.info("%i/%i documents converged within %i iterations" %
(converged, len(chunk), self.iterations))
logger.debug("%i/%i documents converged within %i iterations" %
(converged, len(chunk), self.iterations))

if collect_sstats:
# This step finishes computing the sufficient statistics for the
Expand Down Expand Up @@ -518,7 +518,7 @@ def update(self, corpus, chunksize=None, decay=None, passes=None, update_every=N
logger.info("running %s LDA training, %s topics, %i passes over "
"the supplied corpus of %i documents, updating model once "
"every %i documents, evaluating perplexity every %i documents, "
"iterating %i with a convergence threshold of %i" %
"iterating %ix with a convergence threshold of %f" %
(updatetype, self.num_topics, passes, lencorpus,
updateafter, evalafter, iterations,
gamma_threshold))
Expand Down
14 changes: 8 additions & 6 deletions gensim/models/word2vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -805,18 +805,20 @@ def __iter__(self):


class LineSentence(object):
"""Simple format: one sentence = one line; words already preprocessed and separated by whitespace."""
def __init__(self, source):
"""Simple format: one sentence = one line; words already preprocessed and separated by whitespace.
source can be either a string or a file object
"""
`source` can be either a string or a file object.
Thus, one can use this for just plain files:
Example::
sentences = LineSentence('myfile.txt')
Or for compressed files:
Or for compressed files::
sentences = LineSentence('compressed_text.txt.bz2')
sentences = LineSentence('compressed_text.txt.gz')
sentences = LineSentence(bz2.BZ2File('compressed_text.bz2'))
"""
self.source = source

Expand Down
2 changes: 1 addition & 1 deletion gensim/models/word2vec_inner.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ from scipy.linalg.blas import fblas
REAL = np.float32
ctypedef np.float32_t REAL_t

DEF MAX_SENTENCE_LEN = 1000
DEF MAX_SENTENCE_LEN = 10000

ctypedef void (*scopy_ptr) (const int *N, const float *X, const int *incX, float *Y, const int *incY) nogil
ctypedef void (*saxpy_ptr) (const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY) nogil
Expand Down

0 comments on commit 4c92610

Please sign in to comment.