In [1]:
import networkx as nx
import matplotlib.pyplot as plt
from pyvis.network import Network

## Data Preprocessing

In [2]:
'''
#index ---- index id of this author
#n ---- name  (separated by semicolons)
#a ---- affiliations  (separated by semicolons)
#pc ---- the count of published papers of this author
#cn ---- the total number of citations of this author
#hi ---- the H-index of this author
#pi ---- the P-index with equal A-index of this author
#upi ---- the P-index with unequal A-index of this author
#t ---- research interests of this author  (separated by semicolons)
'''

with open('data/AMiner-Author.txt', 'r') as file:
    data = file.read().split('\n\n')
    
author_dict={}    
detail = ['index','n', 'a', 'pc', 'cn', 'hi', 'pi', 'upi', 't']
for auth in data:
    info = auth.split('\n')
    if info[0] is '':
        break
    index = info[0].split(' ')[1]
    author_dict[index]={}
    for num,each in enumerate(detail):
        author_dict[index][each] = info[num].replace('#'+each+' ', '')
    author_dict[index]['a'] = author_dict[index]['a'].split(', ')
    author_dict[index]['t'] = author_dict[index]['t'].split(';')
    author_dict[index]['field'] = []
    author_dict[index]['field']+=([item for field in author_dict[index]['t'] for item in field.split(' ')])

In [3]:
coauthors = []
with open('data/AMiner-Coauthor.txt', 'r') as coauthorfile:
    for line in coauthorfile:
        author1, author2, count = line[1:].split('\t')
        coauthors.append((author1, author2, int(count)))

In [24]:
'''
#index ---- index id of this paper
#* ---- paper title
#@ ---- authors (separated by semicolons)
#o ---- affiliations (separated by semicolons, and each affiliaiton corresponds to an author in order)
#t ---- year
#c ---- publication venue
#% ---- the id of references of this paper (there are multiple lines, with each indicating a reference)
#! ---- abstract
'''

with open('data/Aminer-Paper.txt', 'r') as file:
    data = file.read().split('\n\n')
    
paper_dict={}    
detail = ['index','*', '@', 'o', 't', 'c']
for paper in data:
    info = paper.split('\n')
    if info[0] is '':
        break
    index = info[0].split(' ')[1]
    paper_dict[index]={}
    for num,each in enumerate(detail):
        paper_dict[index][each] = info[num].replace('#'+each+' ', '')
    paper_dict[index]['@'] = paper_dict[index]['@'].split(';')
    paper_dict[index]['%']=[]
    paper_dict[index]['!']=[]
    for each in info[6:]:
        if each[0:2] == '#!':
            paper_dict[index]['!'].append(each.split(' ')[1])
        else:
            paper_dict[index]['%'].append(each.split(' ')[1])



In [5]:
'''Check dictionary size correctness'''
print('length of author:',len(author_dict))
print('length of paper:',len(paper_dict))

length of author: 1712433
length of paper: 2092356


## Visualization

In [6]:
# Credit: https://gist.github.com/quadrismegistus/92a7fba479fc1e7d2661909d19d4ae7e

def visualize(networkx_graph, name):
    pyvis_graph = Network(height=800, width=800, notebook=True)
    for node,node_attrs in networkx_graph.nodes(data=True):
        pyvis_graph.add_node(node,**node_attrs)

    # for each edge and its attributes in the networkx graph
    for source,target,edge_attrs in networkx_graph.edges(data=True):
        # if value/width not specified directly, and weight is specified, set 'value' to 'weight'
        if not 'value' in edge_attrs and not 'width' in edge_attrs and 'weight' in edge_attrs:
            # place at key 'value' the weight of the edge
            edge_attrs['value']=edge_attrs['weight']
        # add the edge
        pyvis_graph.add_edge(source,target,**edge_attrs)

    return pyvis_graph.show('graphs/' + name)

In [7]:
'''for weighted graph'''
def make_graph(coauth_tuples):
    G = nx.Graph()
    for coauthor in coauth_tuples:
        G.add_edge(coauthor[0], coauthor[1], weight=coauthor[2])
    return G

In [45]:
'''build edge_list for specific distance for paper'''

def paper_add_edge(paper_id, distance, distance_count):
    edge_list = []
    if distance == distance_count:
        return
    distance_count += 1
    for node in paper_dict[paper_id]['%']:
        edge_list.append((paper_id, node))
        if add_edge(node, distance, distance_count) is not None:
            edge_list += paper_add_edge(node, distance, distance_count)
    return edge_list

def paper_citation_edge_list(paper_id, distance):
    edge_list = []
    distance_count = 0
    edge_list+=paper_add_edge(paper_id, distance, distance_count)
    return (edge_list)

In [40]:
def author_add_edge(author, distance, distance_count):
    edge_list = []
    if distance == distance_count:
        return
    distance_count += 1
    paper_list = filter(paper_dict, '@', author)
    cite_author_list = []
    for paper in paper_list:
        if paper_dict[paper]['%'] is not None:
            for cite in paper_dict[paper]['%']:
                cite_author_list+=paper_dict[cite]['@']
    for cite_auth in cite_author_list:
        edge_list.append((author, cite_auth))
        if author_add_edge(cite_auth, distance, distance_count) is not None:
            edge_list += author_add_edge(cite_auth, distance, distance_count)      
    return edge_list  

def author_citation(author, distance):
    edge_list = []
    distance_count = 0
    edge_list+=author_add_edge(author, distance, distance_count)
    return (edge_list)

## Get Kuo-Chen Chou's Data

In [9]:
def filter(dataset, by_type, keyword):
    filtered_list = []
    for index in dataset:
        if keyword in dataset[index][by_type]:
            filtered_list.append(index)
    return filtered_list

In [30]:
'''Filter Kuo-Chen Chou's data '''
chou = {}
chou['author_id'] = filter(author_dict, 'n', 'Kuo-Chen Chou')
chou['paper'] = filter(paper_dict, '@', 'Kuo-Chen Chou')
chou['personal_info'] = author_dict[chou['author_id'][0]]
chou['citation'] = []
for paper in chou['paper']:
    if paper_dict[paper]['%'] is not None:
        for cite in paper_dict[paper]['%']:
            chou['citation']+=paper_dict[cite]['@']


In [32]:
print(chou['paper'])

['96529', '263321', '282893', '830830', '830854', '830896', '833077', '833623', '906249', '1021352', '1291867', '1291936', '1497118']


In [12]:
edge = citation_edge_list('1497118', 2)

In [15]:
graph = nx.Graph(edge)

In [16]:
visualize(graph, 'paper_1497118.html')

In [25]:
print(paper_dict['1083280'])

{'index': '1083280', '*': "Using Chou's pseudo amino acid composition to predict subcellular localization of apoptosis proteins: An approach with immune genetic algorithm-based ensemble classifier", '@': ['Yong-Sheng Ding', 'Tong-Liang Zhang'], 'o': 'College of Information Sciences and Technology, Donghua University, 2999 Renmin Road (N), Songjiang Campus, Shanghai 201620, China and Engineering Research Center of Digitized Textile and Fashion  ...;College of Information Sciences and Technology, Donghua University, 2999 Renmin Road (N), Songjiang Campus, Shanghai 201620, China', 't': '2008', 'c': 'Pattern Recognition Letters', '%': ['830380', '833077', '833623', '906249', '988170', '1291867'], '!': ['It']}


In [44]:
cite_edge = author_citation('Kuo-Chen Chou', 2)

['David E. Goldberg', 'Bernhard E. Boser', 'Isabelle M. Guyon', 'Vladimir N. Vapnik', 'Christopher J. C. Burges', 'Gunnar Rätsch', 'Ayhan Demiriz', 'Kristin P. Bennett', 'Yong-Sheng Ding', 'Tong-Liang Zhang', 'K. -R. Muller', 'S. Mika', 'G. Ratsch', 'K. Tsuda', 'B. Scholkopf']
['David J. Powell', 'Siu Shing Tong', 'Michael M. Skolnick', 'John H. Holland', 'John R. Koza', 'David E. Goldberg', 'John H. Holland', 'John R. Koza', 'David E. Goldberg', 'Thomas M. Mitchell', 'Heinz M&#252/hlenbein', 'Thilo Mahnig', 'Alberto Ochoa Rodriguez', 'Rafal Salustowicz', 'Jürgen Schmidhuber', 'Shumeet Baluja', 'Scott Davies', 'Michèle Sebag', 'Antoine Ducoulombier', 'Heinz Mühlenbein', 'Gerhard Paass', 'Isabelle Servet', 'Louise Travé-Massuyès', 'Daniel Stern', 'Hans-Michael Voigt', 'Heinz Mühlenbein', 'Dragan Cvetkovic', 'Shummet Baluja', 'Martin Pelikan', 'David E. Goldberg', 'Erick E. Cantú-paz', 'Heinz Mühlenbein', 'Heinz Mühlenbein', 'Thilo Mahnig', 'Richard W. Hamming', 'David H. Ackley', 'David

['David J. Powell', 'Siu Shing Tong', 'Michael M. Skolnick', 'John H. Holland', 'John R. Koza', 'David E. Goldberg', 'John H. Holland', 'John R. Koza', 'David E. Goldberg', 'Thomas M. Mitchell', 'Heinz M&#252/hlenbein', 'Thilo Mahnig', 'Alberto Ochoa Rodriguez', 'Rafal Salustowicz', 'Jürgen Schmidhuber', 'Shumeet Baluja', 'Scott Davies', 'Michèle Sebag', 'Antoine Ducoulombier', 'Heinz Mühlenbein', 'Gerhard Paass', 'Isabelle Servet', 'Louise Travé-Massuyès', 'Daniel Stern', 'Hans-Michael Voigt', 'Heinz Mühlenbein', 'Dragan Cvetkovic', 'Shummet Baluja', 'Martin Pelikan', 'David E. Goldberg', 'Erick E. Cantú-paz', 'Heinz Mühlenbein', 'Heinz Mühlenbein', 'Thilo Mahnig', 'Richard W. Hamming', 'David H. Ackley', 'David E. Goldberg', 'Charles Campbell Palmer', 'Dimitri Knjazew', 'David E. Goldberg', 'Lawrence Davis', 'David Orvosh', 'Anthony Cox', 'Yuping Qiu', 'David Orvosh', 'Lawrence Davis', 'David E. Goldberg', 'Kalyanmoy Deb', 'Hillol Kargupta', 'Georges Harik', 'Bryan A. Norman', 'Alice

['Eric B. Baum', 'David Haussler', 'Y. Le Cun', 'B. Boser', 'J. S. Denker', 'R. E. Howard', 'W. Habbard', 'L. D. Jackel', 'D. Henderson', 'Stephen M. Omohundro', 'Stuart Geman', 'Elie Bienenstock', 'René Doursat', 'Vladimir Vapnik']
['Eric B. Baum', 'David Haussler', 'Y. Le Cun', 'B. Boser', 'J. S. Denker', 'R. E. Howard', 'W. Habbard', 'L. D. Jackel', 'D. Henderson', 'Stephen M. Omohundro', 'Stuart Geman', 'Elie Bienenstock', 'René Doursat', 'Vladimir Vapnik']
['Eric B. Baum', 'David Haussler', 'Y. Le Cun', 'B. Boser', 'J. S. Denker', 'R. E. Howard', 'W. Habbard', 'L. D. Jackel', 'D. Henderson', 'Stephen M. Omohundro', 'Stuart Geman', 'Elie Bienenstock', 'René Doursat', 'Vladimir Vapnik']
['Eric B. Baum', 'David Haussler', 'Y. Le Cun', 'B. Boser', 'J. S. Denker', 'R. E. Howard', 'W. Habbard', 'L. D. Jackel', 'D. Henderson', 'Stephen M. Omohundro', 'Stuart Geman', 'Elie Bienenstock', 'René Doursat', 'Vladimir Vapnik']
['Eric B. Baum', 'David Haussler', 'Y. Le Cun', 'B. Boser', 'J. S. D

['R. Hettich', 'K. O. Kortanek', 'R. Cominetti', 'J. P. Dussault', 'Vladimir N. Vapnik', 'Yoav Freund', 'Robert E. Schapire', 'Adam J. Grove', 'Dale Schuurmans', 'Bernhard Schölkopf', 'Christopher J. C. Burges', 'Alexander J. Smola', 'P. S. Bradley', 'O. L. Mangasarian', 'J. B. Rosen', 'Jyrki Kivinen', 'Manfred K. Warmuth', 'Leo Breiman', 'Thomas G. Dietterich', 'G. R&#228/tsch', 'T. Onoda', 'K.-R. M&#252/ller', 'Yair Al Censor', 'Stavros A. Zenios', 'Eric Bauer', 'Ron Kohavi', 'Robert E. Schapire', 'Yoav Freund', 'Peter Barlett', 'Wee Sun Lee', 'Harris Drucker', 'Klaus-Robert Müller', 'Alex J. Smola', 'Gunnar Rätsch', 'Bernhard Schölkopf', 'Jens Kohlmorgen', 'Vladimir Vapnik', 'Holger Schwenk', 'Yoshua Bengio', 'Alberto Bertoni', 'Paola Campadelli', 'M. Parodi', 'Yoav Freund', 'Robert E. Schapire', 'Nigel Duffy', 'David P. Helmbold', 'Gunnar Rätsch', 'Manfred K. Warmuth', 'Sebastian Mika', 'Takashi Onoda', 'Steven Lemm', 'Klaus-Robert Müller', 'Kristin P. Bennet', 'Ayhan Demiriz', 'Jo

['R. Hettich', 'K. O. Kortanek', 'R. Cominetti', 'J. P. Dussault', 'Vladimir N. Vapnik', 'Yoav Freund', 'Robert E. Schapire', 'Adam J. Grove', 'Dale Schuurmans', 'Bernhard Schölkopf', 'Christopher J. C. Burges', 'Alexander J. Smola', 'P. S. Bradley', 'O. L. Mangasarian', 'J. B. Rosen', 'Jyrki Kivinen', 'Manfred K. Warmuth', 'Leo Breiman', 'Thomas G. Dietterich', 'G. R&#228/tsch', 'T. Onoda', 'K.-R. M&#252/ller', 'Yair Al Censor', 'Stavros A. Zenios', 'Eric Bauer', 'Ron Kohavi', 'Robert E. Schapire', 'Yoav Freund', 'Peter Barlett', 'Wee Sun Lee', 'Harris Drucker', 'Klaus-Robert Müller', 'Alex J. Smola', 'Gunnar Rätsch', 'Bernhard Schölkopf', 'Jens Kohlmorgen', 'Vladimir Vapnik', 'Holger Schwenk', 'Yoshua Bengio', 'Alberto Bertoni', 'Paola Campadelli', 'M. Parodi', 'Yoav Freund', 'Robert E. Schapire', 'Nigel Duffy', 'David P. Helmbold', 'Gunnar Rätsch', 'Manfred K. Warmuth', 'Sebastian Mika', 'Takashi Onoda', 'Steven Lemm', 'Klaus-Robert Müller', 'Kristin P. Bennet', 'Ayhan Demiriz', 'Jo

['Bernhard E. Boser', 'Isabelle M. Guyon', 'Vladimir N. Vapnik', 'Corinna Cortes', 'Vladimir Vapnik', 'Adam J. Grove', 'Dale Schuurmans', 'Kristin P. Bennett', 'Leo Breiman', 'Robert E. Schapire', 'Yoram Singer', 'Kristin P. Bennett', 'Ayhan Demiriz', 'Nello Cristianini', 'John Shawe-Taylor', 'Martin M. Anthony', 'Peter Bartlett', 'Eric Bauer', 'Ron Kohavi', 'Kristin P. Bennett', 'Erin J. Bredensteiner', 'Kristin P. Bennet', 'Ayhan Demiriz', 'John Shawe-Taylor', 'J. R. Quinlan', 'R. Hettich', 'K. O. Kortanek', 'R. Cominetti', 'J. P. Dussault', 'Vladimir N. Vapnik', 'Yoav Freund', 'Robert E. Schapire', 'Adam J. Grove', 'Dale Schuurmans', 'Bernhard Schölkopf', 'Christopher J. C. Burges', 'Alexander J. Smola', 'P. S. Bradley', 'O. L. Mangasarian', 'J. B. Rosen', 'Jyrki Kivinen', 'Manfred K. Warmuth', 'Leo Breiman', 'Thomas G. Dietterich', 'G. R&#228/tsch', 'T. Onoda', 'K.-R. M&#252/ller', 'Yair Al Censor', 'Stavros A. Zenios', 'Eric Bauer', 'Ron Kohavi', 'Robert E. Schapire', 'Yoav Freund

['Norbert Beckmann', 'Hans-Peter Kriegel', 'Ralf Schneider', 'Bernhard Seeger', 'Christos Faloutsos', 'King-Ip Lin', 'Norio Katayama', "Shin'ichi Satoh", 'Stefan Berchtold', 'Christian Böhm', 'Hans-Peter Kriegal', 'Thomas Seidl', 'Hans-Peter Kriegel', 'Piotr Indyk', 'Rajeev Motwani', 'John T. Robinson', 'Kyuseok Shim', 'Ramakrishnan Srikant', 'Rakesh Agrawal', 'Stefan Berchtold', 'Bernhard Ertl', 'Daniel A. Keim', 'Hans-Peter Kriegel', 'Thomas Seidl', 'David A. White', 'Ramesh Jain', 'Aristides Gionis', 'Piotr Indyk', 'Rajeev Motwani', 'Stefan Berchtold', 'Daniel A. Keim', 'Hans-Peter Kriegel', 'Asim Roy', 'Lark Sang Kim', 'Somnath Mukhopadhyay', 'Asim Roy', 'Sandeep Govil', 'Raymond Miranda', 'Vladimir N. Vapnik', 'Corinna Cortes', 'Vladimir Vapnik', 'Erin J. Bredensteiner', 'Kristin P. Bennett', 'Paul S. Bradley', 'O. L. Mangasarian', 'Bernhard Schölkopf', 'Chris Burges', 'Vladimir Vapnik', 'Volker Blanz', 'Bernhard Schölkopf', 'Heinrich H. Bülthoff', 'Chris Burges', 'Vladimir Vapnik

['Norbert Beckmann', 'Hans-Peter Kriegel', 'Ralf Schneider', 'Bernhard Seeger', 'Christos Faloutsos', 'King-Ip Lin', 'Norio Katayama', "Shin'ichi Satoh", 'Stefan Berchtold', 'Christian Böhm', 'Hans-Peter Kriegal', 'Thomas Seidl', 'Hans-Peter Kriegel', 'Piotr Indyk', 'Rajeev Motwani', 'John T. Robinson', 'Kyuseok Shim', 'Ramakrishnan Srikant', 'Rakesh Agrawal', 'Stefan Berchtold', 'Bernhard Ertl', 'Daniel A. Keim', 'Hans-Peter Kriegel', 'Thomas Seidl', 'David A. White', 'Ramesh Jain', 'Aristides Gionis', 'Piotr Indyk', 'Rajeev Motwani', 'Stefan Berchtold', 'Daniel A. Keim', 'Hans-Peter Kriegel', 'Asim Roy', 'Lark Sang Kim', 'Somnath Mukhopadhyay', 'Asim Roy', 'Sandeep Govil', 'Raymond Miranda', 'Vladimir N. Vapnik', 'Corinna Cortes', 'Vladimir Vapnik', 'Erin J. Bredensteiner', 'Kristin P. Bennett', 'Paul S. Bradley', 'O. L. Mangasarian', 'Bernhard Schölkopf', 'Chris Burges', 'Vladimir Vapnik', 'Volker Blanz', 'Bernhard Schölkopf', 'Heinrich H. Bülthoff', 'Chris Burges', 'Vladimir Vapnik

['Ying Huang', 'Yanda Li', 'Kuo-Chen Chou', 'Kuo-Chen Chou', 'Yu-Dong Cai', 'Hong-Bin Shen', 'Kuo-Chen Chou', 'Piyushkumar Mundra', 'Madhan Kumar', 'K. Krishna Kumar', 'Valadi K. Jayaraman', 'Bhaskar D. Kulkarni', 'Tongliang Zhang', 'Yongsheng Ding', 'Kuo-Chen Chou', 'F. Bergadano', 'A. Puliafito', 'S. Riccobene', 'G. Ruffo', 'L. Vita', 'Kassem Saleh', 'Robert Probert', 'Hassib Khanafer', 'Timothy K. Shih', 'Paul D. Manuel', 'Jarallah AlGhamdi', 'Chunlin Li', 'Layuan Li', 'Patricia Paderewski-Rodríguez', 'Ma. José Rodríguez-Fortiz', 'José Parets-Llorca', 'Takao Komiya', 'Tomoya Enokido', 'Makoto Takizawa', 'Brahim Medjahed', 'Athman Bouguettaya', 'Ahmed K. Elmagarmid', 'Lei Gao', 'Yongsheng Ding', 'Lihong Ren', 'Young-Guk Ha', 'Joo-Chan Sohn', 'Young-Jo Cho', 'Hyunsoo Yoon', 'Yongsheng Ding', 'Lei Gao', 'Da Ruan', 'Seral Şahan', 'Kemal Polat', 'Halife Kodaz', 'Salih Güneş', 'Morton Swimmer', 'Nicolás García-Pedrajas', 'Colin Fyfe', 'Yongsheng Ding', 'Hongbin Sun', 'Kuangrong Hao', 'Xia

['Ying Huang', 'Yanda Li', 'Kuo-Chen Chou', 'Kuo-Chen Chou', 'Yu-Dong Cai', 'Hong-Bin Shen', 'Kuo-Chen Chou', 'Piyushkumar Mundra', 'Madhan Kumar', 'K. Krishna Kumar', 'Valadi K. Jayaraman', 'Bhaskar D. Kulkarni', 'Tongliang Zhang', 'Yongsheng Ding', 'Kuo-Chen Chou']
['Ying Huang', 'Yanda Li', 'Kuo-Chen Chou', 'Kuo-Chen Chou', 'Yu-Dong Cai', 'Hong-Bin Shen', 'Kuo-Chen Chou', 'Piyushkumar Mundra', 'Madhan Kumar', 'K. Krishna Kumar', 'Valadi K. Jayaraman', 'Bhaskar D. Kulkarni', 'Tongliang Zhang', 'Yongsheng Ding', 'Kuo-Chen Chou']
[]
[]
[]
[]
[]
[]
['Adele Goldberg', 'David Robson', 'Umeshwar Dayal', 'John M Smith', 'Michael Stonebraker', 'Jay Banerjee', 'Hong-Tai Chou', 'Jorge F. Garza', 'Won Kim', 'Darrell Woelk', 'Nat Ballou', 'Hyoung-Joo Kim', 'Alan Purdy', 'Bruce Schuchardt', 'David Maier', 'Jeff Conklin', 'Serge Abiteboul', 'Richard Hull', 'Timothy Andrews', 'Craig Harris', 'Richard Hull', 'Roger King', 'Klaus R. Dittrich', 'Frank Manola', 'Umeshwar Dayal', 'David W. Shipman', 'Mi

In [46]:
graph = nx.Graph(cite_edge)

In [47]:
visualize(graph, 'Kuo-Chen_Chou.html')