In [None]:
import os
import sys
import fnmatch
import numpy as np
import pandas as pd
import json
import gzip
import pickle
import csv
import scipy.sparse
Xauth = None
from collections import defaultdict
import matplotlib.pyplot as plt

In [None]:
# setup the update to work despite the broken scipy documentation
try:
    a = scipy.sparse.dok_matrix((10,10))
    a.update({(0,0):1.0})
    scipy.sparse.dok_matrix.my_update = scipy.sparse.dok_matrix.update
except:
    a = scipy.sparse.dok_matrix((10,10))
    a._update({(0,0):1.0})
    scipy.sparse.dok_matrix.my_update = scipy.sparse.dok_matrix._update

In [None]:
with gzip.open('useful_venue_list.pkl.gz','rb') as fp:
    all_venues = pickle.load(fp)
with gzip.open('useful_authors_list.pkl.gz','rb') as fp:
    all_authors = pickle.load(fp)
with gzip.open('useful_papers.pkl.gz','rb') as fp:
    all_papers = pickle.load(fp)


In [None]:
min_year = all_papers[0][6]
max_year = all_papers[-1][6]
span_years = max_year - min_year + 1
print(min_year,max_year,span_years)
conf_idx = {v:i for i,v in enumerate(all_venues)}
name_idx = {v:i for i,v in enumerate(all_authors)}
n_confs = len(all_venues)
n_auths = len(all_authors)
n_papers = len(all_papers)
print(n_confs,n_auths,n_papers)

In [None]:
total_scores = np.load('total.npy')

In [None]:
clf =  np.load('clf_gold.pkl.npy')
years_per_conf = clf.shape[0]//n_confs
YEAR_BLOCKS = span_years//years_per_conf

In [None]:
valid_ns = set()
for paper in all_papers:
    tag,title, authors, venue, pages, startPage,year,volume,number,url,publtype,eb_toofew,eb_skip = paper
    n = len(authors)
    valid_ns.add(n)

per_author_val = {}
for n in valid_ns:
    author_scores = 1/(np.arange(n)+1)
    per_author_val[n] = author_scores/author_scores.sum()

In [None]:
faculty_affil = pd.read_csv('faculty-affiliations.csv')
cmu_uni = pd.read_csv('other_ranks/cmu_faculty.csv')
cmu_uni = cmu_uni.fillna('Other')
cmu_uni = cmu_uni[(cmu_uni.dept == 'RI')]
#uni_names = list(faculty_affil[faculty_affil.affiliation == 'Stanford University'].name) 
uni_names = set(list(cmu_uni.name))
uni_names = ['Richard Newcombe']

In [None]:
from collections import Counter,defaultdict
def di():
    return defaultdict(float)
author_by_year = {}
for paper in all_papers:
    tag,title, authors, venue, pages, startPage,year,volume,number,url,publtype,eb_toofew,eb_skip = paper
    n = len(authors)
    if sum([_ in uni_names for _ in authors]) == 0:
        continue
    for a,c in zip(authors,per_author_val[n]):
        nv = author_by_year.get(a,np.zeros(years_per_conf))
        nv[(year-min_year)//YEAR_BLOCKS] += c*clf[years_per_conf*conf_idx[venue] + (year-min_year)//YEAR_BLOCKS]
        author_by_year[a] = nv

In [None]:
from scipy.ndimage.filters import gaussian_filter1d
smoothed_auth={k:gaussian_filter1d(v,1.4,mode='nearest') for k,v in author_by_year.items()}


In [None]:
max_auth_per_year = sorted([(max(v),np.argmax(v)*YEAR_BLOCKS+min_year,k) for k,v in smoothed_auth.items()],reverse=True)


In [None]:
affil_set = set(faculty_affil.name)
peeps = []
for row in faculty_affil.itertuples():
    i,name,affil = row
    if name + ' 0001' in name_idx and name in name_idx:
        peeps.append((total_scores[name_idx[name]],name,affil))
sorted(peeps,reverse=True),total_scores[name_idx['Zhao Wang 0004']]

In [None]:
for i in range(min(len(max_auth_per_year),180)):
    if max_auth_per_year[i][2] + ' 0001' in name_idx:
        print('.',end='')
    if max_auth_per_year[i][2] in faculty_affil:
        print('woot',end='\t')
    print(max_auth_per_year[i])

In [None]:
#plt.plot(smoothed_auth['Sergey Levine'],label="levine")
#plt.plot(smoothed_auth['Pieter Abbeel'],label='Abbeel')

In [None]:
plt.figure(figsize=(20,8))
import cycler
import matplotlib as mpl
mpl.rcParams.update({'font.size': 22})
n = 20
color = plt.cm.tab20(np.linspace(0, 1,n))
mpl.rcParams['axes.prop_cycle'] = cycler.cycler('color', color)
i = -1
j= 0
while j < n:
    i+=1
    name = max_auth_per_year[i][2]
    if name + ' 0001' in name_idx:
        continue#print('.',end='')
    if name in list(uni_names):
        continue
    peak_year = max_auth_per_year[i][1]

    plt.plot(np.arange(min_year,max_year,YEAR_BLOCKS),smoothed_auth[name],label='{:25s}({:d})'.format(name,peak_year),lw=6,alpha=0.9)
    j+=1
plt.legend(loc=3,fancybox=True, framealpha=0.8, borderpad=1,frameon=True,markerfirst=True,prop={'stretch':0,'size':15,'family': 'monospace','weight':500})
plt.title('Value from being a {} co-author'.format(uni_names[0]),size=20)
plt.tight_layout()
plt.xlim(1995,2020)
plt.ylim(bottom=0)
plt.savefig('{}.png'.format(uni_names[0].split()[0].lower()),edgecolor='w',facecolor='w')


In [None]:
'Olivier Faugeras' in name_idx