Skip to content

Commit

Permalink
Merge pull request #1 from ziky90/summarization_fixes
Browse files Browse the repository at this point in the history
Consistency with gensim and pep 8
  • Loading branch information
fbarrios committed Apr 27, 2015
2 parents 68dcc08 + 2425874 commit fad4a10
Show file tree
Hide file tree
Showing 8 changed files with 36 additions and 9 deletions.
5 changes: 5 additions & 0 deletions gensim/parsing/preprocessing.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html

import re
import string
import glob
Expand Down
4 changes: 4 additions & 0 deletions gensim/summarization/commons.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html

from gensim.summarization.graph import Graph

Expand Down
13 changes: 5 additions & 8 deletions gensim/summarization/graph.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html

from abc import ABCMeta, abstractmethod


class IGraph:
class IGraph(object):
""" Represents the interface or contract that the graph for TextRank
should implement.
"""
Expand All @@ -18,7 +22,6 @@ def nodes(self):
"""
pass


@abstractmethod
def edges(self):
"""
Expand All @@ -42,7 +45,6 @@ def neighbors(self, node):
"""
pass


@abstractmethod
def has_node(self, node):
"""
Expand All @@ -56,7 +58,6 @@ def has_node(self, node):
"""
pass


@abstractmethod
def add_node(self, node, attrs=None):
"""
Expand All @@ -75,7 +76,6 @@ def add_node(self, node, attrs=None):
"""
pass


@abstractmethod
def add_edge(self, edge, wt=1, label='', attrs=[]):
"""
Expand All @@ -98,7 +98,6 @@ def add_edge(self, edge, wt=1, label='', attrs=[]):
"""
pass


@abstractmethod
def has_edge(self, edge):
"""
Expand All @@ -112,7 +111,6 @@ def has_edge(self, edge):
"""
pass


@abstractmethod
def edge_weight(self, edge):
"""
Expand All @@ -126,7 +124,6 @@ def edge_weight(self, edge):
"""
pass


@abstractmethod
def del_node(self, node):
"""
Expand Down
4 changes: 4 additions & 0 deletions gensim/summarization/keywords.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html

from gensim.summarization.pagerank_weighted import pagerank_weighted_scipy as _pagerank
from gensim.summarization.textcleaner import clean_text_by_word as _clean_text_by_word
Expand Down
4 changes: 4 additions & 0 deletions gensim/summarization/pagerank_weighted.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html

from numpy import empty as empty_matrix
from scipy.sparse import csr_matrix
Expand Down
4 changes: 4 additions & 0 deletions gensim/summarization/summarizer.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html

from gensim.summarization.pagerank_weighted import pagerank_weighted_scipy as _pagerank
from gensim.summarization.textcleaner import clean_text_by_sentences as _clean_text_by_sentences
Expand Down
5 changes: 5 additions & 0 deletions gensim/summarization/syntactic_unit.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html


class SyntacticUnit(object):

Expand Down
6 changes: 5 additions & 1 deletion gensim/summarization/textcleaner.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html

from gensim.summarization.syntactic_unit import SyntacticUnit
from gensim.parsing.preprocessing import preprocess_documents
Expand Down Expand Up @@ -98,7 +102,7 @@ def clean_text_by_word(text):
original_words = list(tokenize(text_without_acronyms, to_lower=True, deacc=True))
filtered_words = [join_words(word_list, "") for word_list in preprocess_documents(original_words)]
if HAS_PATTERN:
tags = tag(join_words(original_words)) # tag needs the context of the words in the text
tags = tag(join_words(original_words)) # tag needs the context of the words in the text
else:
tags = None
units = merge_syntactic_units(original_words, filtered_words, tags)
Expand Down

0 comments on commit fad4a10

Please sign in to comment.