In [1]:
offset_dict = {'deep learning methods': [2], 'layers': [6], 'stratified representation': [11], 'data': [13], 'state-of-art results': [18], 'several domains': [21], 'deep learning model designs': [28], 'architectures': [30], 'context': [35], 'natural language processing': [39], 'survey': [45, 143], 'brief description': [49], 'advances': [52], 'area': [58], 'deep generative modeling': [62], 'work': [65], 'papers': [70], 'onwards': [73], 'paper': [77], 'many deep learning models': [84], 'generation': [91], 'text': [93], 'various models': [100], 'detailed understanding': [107], 'future': [114], 'text generation models': [118], 'deep learning': [121], 'dl approaches': [126], 'different application domains': [136], 'nlp': [138]}

In [2]:
topic_assign_dict = {'deep learning methods': 0, 'layers': 1, 'stratified representation': 2, 'data': 3, 'state-of-art results': 4, 'several domains': 5, 'deep learning model designs': 0, 'architectures': 6, 'context': 7, 'natural language processing': 8, 'survey': 9, 'brief description': 10, 'advances': 11, 'area': 12, 'deep generative modeling': 0, 'work': 13, 'papers': 14, 'onwards': 15, 'paper': 14, 'many deep learning models': 0, 'generation': 16, 'text': 17, 'various models': 18, 'detailed understanding': 19, 'future': 20, 'text generation models': 16, 'deep learning': 0, 'dl approaches': 0, 'different application domains': 5, 'nlp': 8}

In [3]:
first_occurence = {0: 'deep learning methods', 1: 'layers', 2: 'stratified representation', 3: 'data', 4: 'state-of-art results', 5: 'several domains', 6: 'architectures', 7: 'context', 8: 'natural language processing', 9: 'survey', 10: 'brief description', 11: 'advances', 12: 'area', 13: 'work', 14: 'papers', 15: 'onwards', 16: 'generation', 17: 'text', 18: 'various models', 19: 'detailed understanding', 20: 'future'}

In [4]:
from collections import Counter
import math

In [5]:
class MPGraph(object):
    def __init__(self, offset_dict, topic_assign_dict, first_occurence):
        self.unique_cnt = len(offset_dict)
        unique_words = list(offset_dict.keys())
        self.conversion = {unique_words[i] : i for i in range(self.unique_cnt)}
        self.V = {self.conversion[v] : 1 for v in unique_words}
        self.M = [[0 for _ in range(self.unique_cnt)] for _ in range(self.unique_cnt)]
        self.alpha = 1.1
        self.damping_factor = 0.85
        self.threshold = 0.0001

        for i in range(self.unique_cnt):
            word_i = unique_words[i]
            for j in range(i + 1, self.unique_cnt):
                word_j = unique_words[j]
                if topic_assign_dict[word_i] != topic_assign_dict[word_j]:
                    weight = 0
                    for p_i in offset_dict[word_i]:
                        for p_j in offset_dict[word_j]:
                            # print('c_i: {}, c_j: {}, p_i: {}, p_j: {}'.format(word_i, word_j, p_i, p_j))
                            weight += 1 / abs(p_i - p_j)
                    # print('weight: ', weight, '\n')
                    self.M[i][j] = weight
                    self.M[j][i] = weight

        for i in range(len(first_occurence)):
            word_i = first_occurence[i]
            i_idx = self.conversion[word_i]
            p_i = offset_dict[word_i][0]
            for j in range(len(first_occurence)):
                if i == j:
                    continue
                word_j = first_occurence[j]
                j_idx = self.conversion[word_j]
                temp = 0
                for k in range(self.unique_cnt):
                    word_k = unique_words[k]
                    if topic_assign_dict[word_j] == topic_assign_dict[word_k] and word_j != word_k:
                        temp += self.M[k][i_idx]
                        print('here!', temp)
                    print('i: {}, j: {}, k: {}, temp: {}'.format(word_i, word_j, word_k, temp))
                self.M[i_idx][j_idx] += self.alpha * math.exp(1 / p_i) * temp

        self.conversion = {val : key for key, val in self.conversion.items()}

    def score_of(self, i):
        temp = 0
        for j in range(self.unique_cnt):
            if self.M[j][i] != 0:
                temp += self.M[i][j] * self.V[j] / sum(self.M[j])
        return (1 - self.damping_factor) + self.damping_factor * temp

    def calculate_textrank(self):
        flags = [False for _ in range(self.unique_cnt)]
        i = 0
        iter_cnt = 0
        while not all(flags):
            prev_score = self.V[i]
            curr_score = self.score_of(i)
            self.V[i] = curr_score
            if abs(prev_score - curr_score) < self.threshold:
                flags[i] = True
            i = (i + 1) % self.unique_cnt
            if i == 0:
                iter_cnt += 1
        return iter_cnt

    def get_keyphrases(self, N):
        kw = list(sorted(self.V.items(), key=lambda x : x[1], reverse=True)[:N])
        keywords = []
        keywords_score = []
        for t in kw:
            keywords.append(self.conversion[t[0]])
            keywords_score.append(round(t[1], 4))
        return keywords, keywords_score

In [6]:
graph = MPGraph(offset_dict, topic_assign_dict, first_occurence)

i: deep learning methods, j: layers, k: deep learning methods, temp: 0
i: deep learning methods, j: layers, k: layers, temp: 0
i: deep learning methods, j: layers, k: stratified representation, temp: 0
i: deep learning methods, j: layers, k: data, temp: 0
i: deep learning methods, j: layers, k: state-of-art results, temp: 0
i: deep learning methods, j: layers, k: several domains, temp: 0
i: deep learning methods, j: layers, k: deep learning model designs, temp: 0
i: deep learning methods, j: layers, k: architectures, temp: 0
i: deep learning methods, j: layers, k: context, temp: 0
i: deep learning methods, j: layers, k: natural language processing, temp: 0
i: deep learning methods, j: layers, k: survey, temp: 0
i: deep learning methods, j: layers, k: brief description, temp: 0
i: deep learning methods, j: layers, k: advances, temp: 0
i: deep learning methods, j: layers, k: area, temp: 0
i: deep learning methods, j: layers, k: deep generative modeling, temp: 0
i: deep learning methods, 

In [7]:
# graph.M

In [8]:
graph.calculate_textrank()
graph.V

{0: 0.6724202483953781,
 1: 0.9068562624901729,
 2: 1.1123216424179656,
 3: 1.136587426447237,
 4: 1.0901468126508929,
 5: 1.0935406566788792,
 6: 0.9751455210065306,
 7: 1.4860940737819077,
 8: 1.128538492455347,
 9: 1.0683461022604674,
 10: 1.8988095118364878,
 11: 1.1802607892883192,
 12: 1.1747499099342509,
 13: 1.2563963476700684,
 14: 0.9641153392656773,
 15: 1.4084980610922093,
 16: 1.1398988463509747,
 17: 1.37123628840477,
 18: 0.8312199632199917,
 19: 0.7480033088092131,
 20: 1.2082857446192174,
 21: 1.2246325930061746,
 22: 0.9976856104063598,
 23: 1.016571822447009,
 24: 1.2611127791355243,
 25: 0.9155663176671401,
 26: 0.7429828393324167,
 27: 0.6003374446952003,
 28: 0.857240544239227,
 29: 0.8605976638566009}

In [9]:
kw, kw_s = graph.get_keyphrases(5)
print(kw)
print(kw_s)

['survey', 'architectures', 'work', 'onwards', 'future']
[1.8988, 1.4861, 1.4085, 1.3712, 1.2611]
