In [1]:
tokens = [(3, 'new', 'JJ'), (5, 'ambitious', 'JJ'), (6, 'framework', 'NN'), (8, 'abstractive', 'JJ'), (9, 'summarization', 'NN'), (16, 'content', 'NN'), (19, 'summary', 'JJ'), (22, 'sentences', 'NNS'), (27, 'abstract', 'JJ'), (28, 'representation', 'NN'), (31, 
'source', 'NN'), (32, 'documents', 'NNS'), (35, 'abstract', 'JJ'), (36, 'representation', 'NN'), (37, 'relies', 'NNS'), (40, 'concept', 'NN'), (42, 'information', 'NN'), (43, 'items', 'NNS'), (45, 'init', 'NN'), (53, 'smallest', 'JJS'), (54, 'element', 'NN'), (56, 'coherent', 'JJ'), (57, 'information', 'NN'), (60, 'text', 'NN'), (63, 
'sentence', 'NN'), (66, 'framework', 'NN'), (67, 'differs', 'NNS'), (69, 'previous', 'JJ'), (70, 'abstractive', 'JJ'), (71, 'summarization', 'NN'), (72, 'models', 'NNS'), (76, 'semantic', 'JJ'), (77, 'analysis', 'NN'), (80, 'text', 'NN'), (85, 'first', 'JJ'), (86, 'attempt', 'NN'), (91, 'system', 'NN'), (94, 'framework', 'NN'), (98, 'evaluation', 'NN'), (99, 'results', 'NNS'), (103, 'tac', 'JJ'), (108, 'related', 'JJ'), (109, 'work', 'NN'), (118, 'automatic', 'JJ'), (119, 'summarization', 'NN'), (120, 'domain', 'NN')]

In [2]:
from collections import Counter

In [3]:
NOUN_GROUP = ['NN', 'NNS', 'NNP', 'NNPS'] # 4
PRONOUN_GROUP = ['PRP', 'PRP$', 'WP', 'WP$'] # 4
VERB_GROUP = ['VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ'] # 6
ADJECTIVE_GROUP = ['JJ', 'JJR', 'JJS'] # 3
ADVERB_GROUP = ['RB', 'RBR', 'RBS', 'WRB']
PREPOSITION_GROUP = ['IN']
CONJUNCTION_GROUP = ['CC', 'IN']
INTERJECTION_GROUP = ['UH']

In [4]:
class LexicalGraph(object):
    def __init__(self, filtered_tokens, N):
        self.total_cnt = len(filtered_tokens)
        words = [t[1] for t in filtered_tokens]
        unique_words = list(Counter(words))
        self.unique_cnt = len(unique_words)
        self.T = self.unique_cnt // 3
        self.conversion = {unique_words[i] : i for i in range(len(unique_words))}
        self.V = {self.conversion[v] : 1 for v in unique_words}
        self.E = {self.conversion[v] : [] for v in unique_words}
        self.jump_factor = 0.85
        self.threshold = 0.0001

        for i in range(self.total_cnt):
            token = filtered_tokens[i]
            for j in range(N):
                if i + j + 1 >= self.total_cnt:
                    break
                else:
                    next_token = filtered_tokens[i + j + 1]
                    if token[0] + N < next_token[0]:
                        break
                    else:
                        idx = self.conversion[token[1]]
                        next_idx = self.conversion[next_token[1]]
                        if next_idx not in self.E[idx]:
                            self.E[idx].append(next_idx)
                            self.E[next_idx].append(idx)
        
    def score_of(self, word):
        neighbor_list = self.E[word]
        temp = 0
        for neighbor in neighbor_list:
            temp += self.V[neighbor] / len(self.E[neighbor])
        return (1 - self.jump_factor) + self.jump_factor * temp

    def calculate_textrank(self):
        flags = [False for i in range(self.unique_cnt)]
        i = 0
        iter_cnt = 0
        while not all(flags):
            prev_score = self.V[i]
            curr_score = self.score_of(i)
            self.V[i] = curr_score
            if abs(prev_score - curr_score) < self.threshold:
                flags[i] = True
            i = (i + 1) % self.unique_cnt
            if i == 0:
                iter_cnt += 1
        return iter_cnt

In [5]:
graph = LexicalGraph(tokens, 10)

In [6]:
print(graph.total_cnt, graph.unique_cnt, graph.T)

46 37 12


In [7]:
iter_cnt = graph.calculate_textrank()
iter_cnt

22

In [8]:
graph.V

{0: 0.6266127337812242,
 1: 0.8653573861899885,
 2: 2.1779736607070377,
 3: 1.5362016751640941,
 4: 1.6074497949094422,
 5: 0.7702889716369276,
 6: 1.4594912517827767,
 7: 0.7702837820076801,
 8: 1.108444684835927,
 9: 1.1084386701789537,
 10: 0.807068410207731,
 11: 0.8070634017084651,
 12: 1.0636092043042897,
 13: 1.0075872735130404,
 14: 1.9129548977010566,
 15: 1.0066333208944118,
 16: 0.7062147180943997,
 17: 0.7273142180847985,
 18: 1.0187665258066863,
 19: 1.0187610021722502,
 20: 1.110987395054123,
 21: 0.696198165616723,
 22: 0.8182557048551224,
 23: 1.2894061839668525,
 24: 0.8145496762014492,
 25: 0.5440923014134097,
 26: 0.9272582181333997,
 27: 1.0,
 28: 1.0,
 29: 0.38141840368472146,
 30: 1.0,
 31: 1.0,
 32: 0.15000000000000002,
 33: 1.0,
 34: 1.0,
 35: 0.6569183164684528,
 36: 0.6569123387779301}