In [1]:
import networkx as nx
import matplotlib.pyplot as plt
from pyvis.network import Network

## Read and Store Data

In [2]:
'''
#index ---- index id of this author
#n ---- name  (separated by semicolons)
#a ---- affiliations  (separated by semicolons)
#pc ---- the count of published papers of this author
#cn ---- the total number of citations of this author
#hi ---- the H-index of this author
#pi ---- the P-index with equal A-index of this author
#upi ---- the P-index with unequal A-index of this author
#t ---- research interests of this author  (separated by semicolons)
'''

with open('data/AMiner-Author.txt', 'r') as file:
    data = file.read().split('\n\n')
    
author_dict={}    
detail = ['index','n', 'a', 'pc', 'cn', 'hi', 'pi', 'upi', 't']
for auth in data:
    info = auth.split('\n')
    if info[0] is '':
        break
    index = info[0].split(' ')[1]
    author_dict[index]={}
    for num,each in enumerate(detail):
        author_dict[index][each] = info[num].replace('#'+each+' ', '')
    author_dict[index]['a'] = author_dict[index]['a'].split(', ')
    author_dict[index]['t'] = author_dict[index]['t'].split(';')
    author_dict[index]['field'] = []
    author_dict[index]['field']+=([item for field in author_dict[index]['t'] for item in field.split(' ')])

In [3]:
coauthors = []
with open('data/AMiner-Coauthor.txt', 'r') as coauthorfile:
    for line in coauthorfile:
        author1, author2, count = line[1:].split('\t')
        coauthors.append((author1, author2, int(count)))

In [4]:
'''
#index ---- index id of this paper
#* ---- paper title
#@ ---- authors (separated by semicolons)
#o ---- affiliations (separated by semicolons, and each affiliaiton corresponds to an author in order)
#t ---- year
#c ---- publication venue
#% ---- the id of references of this paper (there are multiple lines, with each indicating a reference)
#! ---- abstract
'''

with open('data/Aminer-Paper.txt', 'r') as file:
    data = file.read().split('\n\n')
    
paper_dict={}    
detail = ['index','*', '@', 'o', 't', 'c']
for paper in data:
    info = paper.split('\n')
    if info[0] is '':
        break
    index = info[0].split(' ')[1]
    paper_dict[index]={}
    for num,each in enumerate(detail):
        paper_dict[index][each] = info[num].replace('#'+each+' ', '')
    paper_dict[index]['%']=[]
    paper_dict[index]['!']=[]
    for each in info[6:]:
        if each[0:2] == '#!':
            paper_dict[index]['!'].append(each.split(' ')[1])
        else:
            paper_dict[index]['%'].append(each.split(' ')[1])



In [5]:
'''Check dictionary size correctness'''
print('length of author:',len(author_dict))
print('length of paper:',len(paper_dict))

length of author: 1712433
length of paper: 2092356


## Get Kuo-Chen Chou's Data

In [6]:
def filter(dataset, by_type, keyword):
    filtered_list = []
    for index in dataset:
        if keyword in dataset[index][by_type]:
            filtered_list.append(index)
    return filtered_list

In [7]:
'''Filter Kuo-Chen Chou's data '''
chou = {}
chou['author_id'] = filter(author_dict, 'n', 'Kuo-Chen Chou')
chou['paper'] = filter(paper_dict, '@', 'Kuo-Chen Chou')
chou['personal_info'] = author_dict[chou['author_id'][0]]

In [8]:
for paper in chou['paper']:
    print('paper_id: ', paper, ' citation_list: ', paper_dict[paper]['%'])

paper_id:  96529  citation_list:  []
paper_id:  263321  citation_list:  []
paper_id:  282893  citation_list:  []
paper_id:  830830  citation_list:  []
paper_id:  830854  citation_list:  []
paper_id:  830896  citation_list:  []
paper_id:  833077  citation_list:  []
paper_id:  833623  citation_list:  []
paper_id:  906249  citation_list:  []
paper_id:  1021352  citation_list:  []
paper_id:  1291867  citation_list:  ['369236']
paper_id:  1291936  citation_list:  []
paper_id:  1497118  citation_list:  ['116149', '420077', '425063', '1083280', '1860761']
