Skip to content

Commit

Permalink
correct comments and imports
Browse files Browse the repository at this point in the history
  • Loading branch information
JenZhao committed May 8, 2017
1 parent ffaa324 commit b77bceb
Showing 1 changed file with 6 additions and 3 deletions.
9 changes: 6 additions & 3 deletions hidi/factorization.py
@@ -1,5 +1,6 @@
import warnings
import numpy as np
import pandas as pd
import collections
from numpy.random import permutation
from sklearn.decomposition import TruncatedSVD
Expand All @@ -14,7 +15,9 @@
class W2VStringTransform(Transform):
"""
Takes a pandas Dataframe and transforms it into a
string
list of sentences. Each sentence is a list
of words/items (unicode strings) that will be used for
training.
:param n_shuffles: The number of suffles for the
`item_id`.
Expand Down Expand Up @@ -108,7 +111,7 @@ def transform(self, **kwargs):

class W2VBuildDatasetTransform(Transform):
"""
Takes a string of list of items(words) and tokenize it.
Takes a string of list of items(words) and tokenizes it.
:param vocabulary_size: top n most frequent items(words)
:type vocabulary_size: int
"""
Expand All @@ -130,7 +133,7 @@ def transform(self, words, **kwargs):
words = words.split()
count = [['UNK', -1]]
count_words = collections.Counter(words)
count.extend(count_words.most_common((self.vocabulary_size-1)))
count.extend(count_words.most_common((self.vocabulary_size - 1)))
dictionary = dict()
for word, _ in count:
dictionary[word] = len(dictionary)
Expand Down

0 comments on commit b77bceb

Please sign in to comment.