In [None]:
ri_confs = ['CVPR',
 'ICRA',
 'ICCV',
 'IROS',
 'SIGGRAPH',
 'NIPS',
 'ECCV',
 'I. J. Robotics Res.',
 'AAAI',
 'Robotics: Science and Systems',
 'IEEE Trans. Pattern Anal. Mach. Intell.',
 'International Journal of Computer Vision',
 'ICML',
 'Symposium on Computer Animation',
 'WAFR',
 'ACM Trans. Graph.',
 'AAMAS',
 'Auton. Robots',
 'Humanoids',
 'IEEE Trans. Robotics and Automation',
 'ISER',
 'ISRR',
 'WACV',
 'IJCAI',
 'AISTATS',
 'UAI',
 'CHI',
 'Comput. Graph. Forum',
 'SIGGRAPH Asia',
 'FGR',
 'BMVC',
 'ICAPS',
 'CVPR Workshops',
 'ACC',
 'FSR',
 'Commun. ACM',
 'HRI',
 '3DV',
 'ICLR',
 'MICCAI',
 'AI Magazine',
 'IEEE Robot. Automat. Mag.',
 'Proceedings of the IEEE',
 'IEEE Robotics and Automation Letters',
 'ICIP',
 'IEEE Intelligent Systems',
 'IEEE Computer Graphics and Applications',
 'ACCV',
 'CDC',
 'Artif. Intell.',
 'ICPR',
 'SOCS',
 'SMC',
 'AAAI Spring Symposia',
 'ICCV Workshops',
 'SSRR',
 'Robotics and Autonomous Systems',
 'RO-MAN',
 'CHI Extended Abstracts',
 'J. Field Robotics',
 'AAAI Fall Symposia',
 'EMBC',
 'Graphics Interface',
 'CoRL',
 'ECCV Workshops',
 'Computer Vision and Image Understanding',
 'HICSS',
 'ICCP',
 'ISBI',
 'Conference on Designing Interactive Systems',
 'DARS',
 'ICME',
 'Image Vision Comput.',
 'Ann. Math. Artif. Intell.',
 'ICWS',
 'AVBPA',
 'IEEE Trans. Systems, Man, and Cybernetics',
 'SAP',
 'Advanced Robotics',
 'FUSION',
 'ITSC',
 'CogSci',
 'IPMI',
 'International Semantic Web Conference',
 'IEEE Trans. Biomed. Engineering',
 'IAT',
 'CASE',
 'ROBIO',
 'TAP',
 'MVA',
 'Intelligent Vehicles Symposium',
 'IAS',
 'DICTA']
ri_confs = set(ri_confs)

In [None]:
import os
import sys
import fnmatch
import zipfile
import xmltodict
import numpy as np
import pandas as pd
import json
import gzip
import pickle
import csv
import scipy.sparse

In [None]:
# setup the update to work despite the broken scipy documentation
try:
    a = scipy.sparse.dok_matrix((10,10))
    a.update({(0,0):1.0})
    scipy.sparse.dok_matrix.my_update = scipy.sparse.dok_matrix.update
except:
    a = scipy.sparse.dok_matrix((10,10))
    a._update({(0,0):1.0})
    scipy.sparse.dok_matrix.my_update = scipy.sparse.dok_matrix._update

In [None]:
with gzip.open('useful_venue_list.pkl.gz','rb') as fp:
    all_venues = pickle.load(fp)
with gzip.open('useful_authors_list.pkl.gz','rb') as fp:
    all_authors = pickle.load(fp)
with gzip.open('useful_papers.pkl.gz','rb') as fp:
    all_papers = pickle.load(fp)

In [None]:
conf_idx = {v:i for i,v in enumerate(all_venues)}
name_idx = {v:i for i,v in enumerate(all_authors)}
n_confs = len(all_venues)
n_auths = len(all_authors)

In [None]:
names_to_remove = []
for a in all_authors:
    if a + ' 0001' in name_idx:
        names_to_remove.append(a)
for name in names_to_remove:
    del name_idx[name]

In [None]:
len(names_to_remove)

In [None]:
faculty_affil = pd.read_csv('faculty-affiliations.csv')

In [None]:
def pg(M,alpha=0.85,tol=1e-6,max_iter=1,verbose=False):
    N = M.shape[0]
    nodelist = np.arange(N)
    S = scipy.array(M.sum(axis=1)).flatten()
    S[S != 0] = 1.0 / S[S != 0]
    Q = scipy.sparse.spdiags(S.T, 0, *M.shape, format='csr')
    M = Q * M

    # initial vector
    x = scipy.repeat(1.0 / N, N)

    # Personalization vector
    p = scipy.repeat(1.0 / N, N)

    # Dangling nodes
    dangling_weights = p
    is_dangling = scipy.where(S == 0)[0]

    # power iteration: make up to max_iter iterations
    for _ in range(max_iter):
        xlast = x
        x = alpha * (x * M + sum(x[is_dangling]) * dangling_weights) + \
            (1 - alpha) * p
        # check convergence, l1 norm
        err = scipy.absolute(x - xlast).sum()
        if verbose:
            print(_,err)
        if err < N * tol:
            return x
            
    return x

In [None]:
import itertools
gauth_auth = scipy.sparse.dok_matrix((n_auths,n_auths))
g_auth = {}

for paper in all_papers:
    tag,title, authors, conf, pages, startPage,paper_year,volume,number,url,publtype,eb_toofew,eb_skip = paper
    #if conf not in ri_confs:
    #    continue
    n = len(authors)

    #if clf[span_years*conf_idx[conf]:span_years*(conf_idx[conf]+1)].max() > 0:
    for a,a2 in itertools.product(authors,authors):
        if a in name_idx and a2 in name_idx:
            g_auth[(name_idx[a],name_idx[a2])] = 1/n + g_auth.get((name_idx[a],name_idx[a2]),0)
gauth_auth.my_update(g_auth)

In [None]:
gauth_auth = scipy.sparse.csr_matrix(gauth_auth)
pr = pg(gauth_auth,max_iter=100,verbose=True,tol=1e-12)
print(gauth_auth.shape[0])

In [None]:
pr_s = np.argsort(pr)[::-1]
top_k = 100
i = 0
j = 0 
while i < top_k:
    j += 1
    idx = pr_s[j]
    #if(ri_scores[idx]/rs < 20.0):
    #    continue
    print(all_authors[idx],pr[idx])
    i += 1

In [None]:
curious_names = [
    'Aditya Dhawale',
    'Tesca Fitzgerald',
    'Adam W. Harley',
    "Xiaolong Wang 0004",
    "Judy Hoffman",
    "Paris Siminelakis",
    "Roie Levin",
    "Leonid Keselman",
    "Rick Goldstein",
    "Nicholas Rhinehart",
    "Vincent Sitzmann",
    "Siddharth Ancha",
    "Xingyu Lin",
    "Humphrey Hu",
    "David F. Fouhey",
    "Chelsea Finn",
    "Dinesh Jayaraman",
    "Wen Sun 0002",
    "Lerrel Pinto",
    "Justin Johnson",
    "Amir Roshan Zamir",
    "Dominik Peters",
    "Jonathan T. Barron",
    "Dorsa Sadigh",
    "Derek Hoiem",
    "Vaggos Chatziafratis",
    "Brian Okorn",
    "David Held"
]
sorted([(pr[name_idx[n]],n) for n in curious_names if n in name_idx])
    

In [None]:
pickle.dump(pr,open('new_pagerank_people.pkl','wb'))

In [None]:
gauth_auth = None
pr = None
g_auth = None
import gc
gc.collect()

In [None]:
from collections import defaultdict
auth_confs = defaultdict(set)

for paper in all_papers:
    tag,title, authors, conf, pages, startPage,paper_year,volume,number,url,publtype,eb_toofew,eb_skip = paper
    n = len(authors)
    for a in authors:
        if not a in name_idx:
            continue
        auth_confs[a].add(conf_idx[conf])


In [None]:
auth_confs = {k: list(v) for k,v in auth_confs.items()}

In [None]:
import itertools
auth_confs_iter = {k: itertools.combinations_with_replacement(v,2) for k,v in auth_confs.items()}

In [None]:
import itertools
dconf = dict()

gconf_conf = scipy.sparse.dok_matrix((n_confs,n_confs))
dconf = {}
for k,v in auth_confs_iter.items():
    for i,j in v:
        tmp = 1 + dconf.get((i,j),0)
        dconf[(i,j)] = tmp
        if i != j:
            dconf[(j,i)] = tmp

gconf_conf.my_update(dconf)

In [None]:
gconf_conf = scipy.sparse.csr_matrix(gconf_conf)
prc = pg(gconf_conf,max_iter=100,verbose=True,tol=1e-12)

In [None]:
prc_s = np.argsort(prc)[::-1]
top_k = 100
i = 0
while i < top_k:
    idx = prc_s[i]
    print(all_venues[idx],prc[idx])
    i += 1

In [None]:
pickle.dump(prc,open('new_pagerank_conf.pkl','wb'))