In [1]:
import numpy as np
import json

In [2]:
with open("fauci-email-graph-2.json") as f:
    data = json.loads(f.read())

In [3]:
emails = data["emails"]
names = data["names"]

keep = set(range(len(names) + 1))
while True:
    inds = []
    all_s = []
    all_r = []
    all_c = []

    for chain in emails:
        for email in chain:
            s = email["sender"]            
            if s not in keep: continue
            
            recipients = [r for r in email["recipients"] if r in keep]
            ccs = [c for c in email["cc"] if c in keep]
            
            num_inds = len(recipients) * len(ccs)
            if num_inds == 0: continue
        
            all_s.append(s)
            for r in recipients: all_r.append(r)
            for c in ccs: all_c.append(c)
        
            for r in recipients:
                for c in ccs:
                    inds.append((s, r, c, 1.0 / num_inds))
                    
    new_keep = set(all_s).intersection(set(all_r)).intersection(set(all_c))
    no_updates = (new_keep == keep)
    keep = new_keep
    if no_updates or len(new_keep) == 0:
        break
    else:
        keep = new_keep
print(len(keep), "nodes in SCC")

44 nodes in SCC


In [4]:
clean_names = [names[k] for k in keep]
id_map = {k : i for (i, k) in enumerate(keep)}
rev_id_map = {i : k for (i, k) in enumerate(keep)}
clean_inds = [(id_map[s], id_map[r], id_map[c], v) for (s, r, c, v) in inds]

In [5]:
x_curr = np.random.rand(len(keep))
x_curr /= np.sum(x_curr)
max_iters = 10000
tol = 1e-6
compute_hec = True  # if False, computes Z-eigenvector centrality

for i in range(max_iters):
    x_next = np.zeros(len(x_curr))
    for (s, r, c, v) in clean_inds:
        x_next[c] += v * x_curr[r] * x_curr[s]

    if compute_hec:
        x_next = np.sqrt(x_next)
    
    x_next /= np.sum(x_next)    
    diff = np.linalg.norm(x_next - x_curr, ord=1)
    x_curr = x_next
    if diff < tol: 
        print("converged", i, diff)
        break
    
    if i == max_iters - 1:
        print("did not converge")

converged 20 9.106402192840708e-07


In [6]:
sp = np.argsort(-x_curr)
for (i, ind) in enumerate(sp):
    print(i + 1, clean_names[ind])

1 conrad, patricia
2 folkers, greg
3 billet, courtney
4 routh, jennifer
5 stover, kathy
6 marston, hilary
7 haskins, melinda
8 tabak, lawrence
9 fauci, anthony
10 mascola, john
11 antoniak, cynthia
12 lerner, andrea
13 lane, cliff
14 collins, francis
15 crawford, chase
16 burklow, john
17 pau, alice
18 myles, renate
19 selgrade, sara
20 schwetz, tara
21 goldner, shannah
22 cassetti, cristina
23 handley, gray
24 graham, barney
25 stecker, judy
26 zebley, kyle
27 kerr, lawrence
28 hall, bill
29 fine, amanda
30 grigsby, garrett
31 cetron, marty
32 fritz, craig
33 mellors, john w
34 lepore, loretta
35 harper, jill
36 johnson, martin
37 oakley, caitlin b
38 schneider, johanna
39 kadlec, robert
40 bright, rick
41 farrar, jeremey
42 galatas, kate
43 brennan, patrick
44 andersen, kristian


In [7]:
top_n = 10
for (i, ind) in enumerate(sp[:top_n]):
    print(f'textcolor{{LightGray}}{{{i + 1}}} & {clean_names[ind]}, {x_curr[ind]:.6f} \\\\')

textcolor{LightGray}{1} & conrad, patricia, 0.123106 \\
textcolor{LightGray}{2} & folkers, greg, 0.095163 \\
textcolor{LightGray}{3} & billet, courtney, 0.075963 \\
textcolor{LightGray}{4} & routh, jennifer, 0.064585 \\
textcolor{LightGray}{5} & stover, kathy, 0.061403 \\
textcolor{LightGray}{6} & marston, hilary, 0.055949 \\
textcolor{LightGray}{7} & haskins, melinda, 0.043622 \\
textcolor{LightGray}{8} & tabak, lawrence, 0.043443 \\
textcolor{LightGray}{9} & fauci, anthony, 0.037472 \\
textcolor{LightGray}{10} & mascola, john, 0.034682 \\


In [8]:
len(keep), len(clean_inds)

(44, 1413)