In [1]:
import dgl
import torch
from tqdm import trange, tqdm

graph_list, _ = dgl.load_graphs('../save3/graph_vfc.graph')
graph = graph_list[0]

author_ids = torch.where(graph.in_degrees(etype='written') >= 50)[0]
paper_ids = torch.arange(graph.number_of_nodes('paper'))
topic_ids = torch.arange(graph.number_of_nodes('topic'))
venue_ids = torch.arange(graph.number_of_nodes('venue'))
field_ids = torch.arange(graph.number_of_nodes('field'))
org_ids = torch.arange(graph.number_of_nodes('org'))
country_ids = torch.arange(graph.number_of_nodes('country'))

g = graph.subgraph({'author': author_ids, 'paper': paper_ids, 'topic':topic_ids, \
    'venue': venue_ids, 'field': field_ids, 'org':org_ids, 'country': country_ids})
print(g)

Using backend: pytorch


Graph(num_nodes={'author': 36288, 'country': 54, 'field': 142726, 'org': 379, 'paper': 5319925, 'topic': 16081, 'venue': 46347},
      num_edges={('country', 'has', 'org'): 432, ('field', 'has', 'venue'): 29480166, ('org', 'has', 'author'): 721202, ('paper', 'cites', 'paper'): 25626821, ('paper', 'written', 'author'): 3728457, ('topic', 'contains', 'paper'): 13160952, ('venue', 'publishes', 'paper'): 3278873},
      metagraph=[('country', 'org', 'has'), ('org', 'author', 'has'), ('field', 'venue', 'has'), ('venue', 'paper', 'publishes'), ('paper', 'paper', 'cites'), ('paper', 'author', 'written'), ('topic', 'paper', 'contains')])


In [4]:
g = dgl.node_type_subgraph(g, ['paper', 'venue', 'author', 'topic', 'field'])
print(g)

Graph(num_nodes={'author': 36288, 'field': 142726, 'paper': 5319925, 'topic': 16081, 'venue': 46347},
      num_edges={('field', 'has', 'venue'): 29480166, ('paper', 'cites', 'paper'): 25626821, ('paper', 'written', 'author'): 3728457, ('topic', 'contains', 'paper'): 13160952, ('venue', 'publishes', 'paper'): 3278873},
      metagraph=[('field', 'venue', 'has'), ('venue', 'paper', 'publishes'), ('paper', 'paper', 'cites'), ('paper', 'author', 'written'), ('topic', 'paper', 'contains')])


In [7]:
import pandas as pd
import numpy as np
cite_year = g.edges['cites'].data['year']
df_cite = pd.DataFrame({'year': np.array(cite_year)})
print(df_cite.describe())

               year
count  2.562682e+07
mean   2.009078e+03
std    1.578606e+01
min    0.000000e+00
25%    2.006000e+03
50%    2.011000e+03
75%    2.014000e+03
max    2.300000e+03


In [9]:
for year in range(2000, 2022):
    idx = torch.where(cite_year==year)[0]
    print(f'{year}: {len(idx)}, {idx}')

2000: 366305, tensor([2520059, 2520060, 2520061,  ..., 2886361, 2886362, 2886363])
2001: 391142, tensor([2886364, 2886365, 2886366,  ..., 3277503, 3277504, 3277505])
2002: 453307, tensor([3277506, 3277507, 3277508,  ..., 3730810, 3730811, 3730812])
2003: 500198, tensor([3730813, 3730814, 3730815,  ..., 4231008, 4231009, 4231010])
2004: 617796, tensor([4231011, 4231012, 4231013,  ..., 4848804, 4848805, 4848806])
2005: 848673, tensor([4848807, 4848808, 4848809,  ..., 5697477, 5697478, 5697479])
2006: 1029859, tensor([5697480, 5697481, 5697482,  ..., 6727336, 6727337, 6727338])
2007: 1163989, tensor([6727339, 6727340, 6727341,  ..., 7891325, 7891326, 7891327])
2008: 1293157, tensor([7891328, 7891329, 7891330,  ..., 9184482, 9184483, 9184484])
2009: 1445515, tensor([ 9184485,  9184486,  9184487,  ..., 10629997, 10629998, 10629999])
2010: 1579246, tensor([10630000, 10630001, 10630002,  ..., 12209243, 12209244, 12209245])
2011: 1670732, tensor([12209246, 12209247, 12209248,  ..., 13879975, 1

In [26]:
import sys
def retrieve_name_ex(var):
    frame = sys._getframe(2)
    while(frame):
        for item in frame.f_locals.items():
            if (var is item[1]):
                return item[0]
        frame = frame.f_back
    return ""
 

def myout(var, mode=''):
    """
    mode = 'len' or 'shape'
    """
    def get_mode(var):
        if isinstance(var, (list, dict, set)):
            return 'len'
        elif isinstance(var, (np.ndarray, torch.Tensor)):
            return 'shape'
        else: return ''

    name = retrieve_name_ex(var)
    if mode=='':
        mode = get_mode(var)
    if mode=='len':
        print(f'{name} : len={len(var)}, {var}')
    elif mode=='shape':
        sp = var.shape
        if len(sp)<2:
            print(f'{name} : shape={sp}, {var}')
        else:
            print(f'{name} : shape={sp}')
            print(var)
    else:
        print(f"{name} = {var}")

a = torch.ones(2)
b = [1, 2]
c = torch.ones((2,3))
d = np.ones((2,3))
e = {1:2, 2:3}
f = set([1,2])
myout(a)
myout(b)
myout(e)
myout(f)

a : shape=torch.Size([2]), tensor([1., 1.])
b : len=2, [1, 2]
e : len=2, {1: 2, 2: 3}
f : len=2, {1, 2}


In [16]:
a = torch.ones((2,3))
b = np.ones((2,3))
if isinstance(b, np.ndarray):
    print(b)
if isinstance(a, torch.Tensor):
    print(a)

[[1. 1. 1.]
 [1. 1. 1.]]
tensor([[1., 1., 1.],
        [1., 1., 1.]])
