Skip to content

Commit

Permalink
Copied csr_matrix_to_dict() from ops.py
Browse files Browse the repository at this point in the history
  • Loading branch information
mikeqfu committed Nov 26, 2019
1 parent 78b446f commit ad191dd
Showing 1 changed file with 16 additions and 0 deletions.
16 changes: 16 additions & 0 deletions pyhelpers/text.py
Expand Up @@ -45,3 +45,19 @@ def find_matched_str(x, lookup_list):
for y in lookup_list:
if re.match(x, y, re.IGNORECASE):
return y


# Convert compressed sparse matrix to a dictionary
def csr_matrix_to_dict(csr_matrix, vectorizer):
features = vectorizer.get_feature_names()
dict_data = []
for i in range(len(csr_matrix.indptr) - 1):
sid, eid = csr_matrix.indptr[i: i + 2]
row_feat = [features[x] for x in csr_matrix.indices[sid:eid]]
row_data = csr_matrix.data[sid:eid]
dict_data.append(dict(zip(row_feat, row_data)))

import pandas as pd
mat_dict = pd.Series(dict_data).to_frame('word_count')

return mat_dict

0 comments on commit ad191dd

Please sign in to comment.