In [1]:
using HTTP, JSON, PrettyTables, JLD, DotEnv,  Random, Dates
institutions = load("institutions.jld")["institutions"]
academic_list = load("academic_list.jld")["academic_list"]
adjacency = load("adjacency_2023.jld")["out"];

In [3]:
C = zeros(Int32,12,length(institutions))
T = zeros(Int32, length(academic_list), 5);

In [4]:
# fill in the hard coded sinks
est_alloc = load("est_alloc.jld")["est_alloc"];
for i in length(academic_list)+1:length(institutions)
    C[est_alloc[i], i] = 1
end

In [6]:
# sinks have been hard coded - start by assigning all academic institutions to community 1
for i in 1:length(academic_list)
    T[i,1] = 1
    C[1,i] = 1
end
#now all academic institutions are in the same tier

In [7]:
sum(C)

991

In [8]:
sum(T)

412

In [9]:
c = sum(C, dims = 2)

12×1 Matrix{Int64}:
 412
   0
   0
   0
   0
  66
  69
 188
  50
   1
  13
 192

In [10]:
t = sum(T, dims=1)

1×5 Matrix{Int64}:
 412  0  0  0  0

In [11]:
C*adjacency*T

12×5 Matrix{Int32}:
 650  0  0  0  0
   0  0  0  0  0
   0  0  0  0  0
   0  0  0  0  0
   0  0  0  0  0
 153  0  0  0  0
 146  0  0  0  0
  81  0  0  0  0
  12  0  0  0  0
 266  0  0  0  0
  52  0  0  0  0
 236  0  0  0  0

In [12]:
function likelihood(adjacency, c, t)
    # the number of institutions in each tier
    # the number of academic institions in each tier
    l = 0.0
    for i in 1:12,j in 1:5
        l +=  -adjacency[i,j]*(log(max(adjacency[i,j]/(max(c[i]*t[j],1)),.0001)) - 1)
    end
    return l
end
    

likelihood (generic function with 1 method)

In [13]:
likelihood(C*adjacency*T, c, t)

9150.45365384631

In [14]:
sum(adjacency)

1596

In [15]:
function best_alloc(C,T,adjacency, iterations = 10) 
    c = sum(C, dims = 2)
    t = sum(T, dims=1)
    start = likelihood(C*adjacency*T, c, t)
    sq = [C, T, start]
    println("Starting value: ", start)
    n = 0
    while n < iterations
        shuffle = []
        to_tier = []
        new_C = zeros(Int32,size(C,1),size(C,2))
        new_T = zeros(Int32, size(T,1),size(T,2))
        for j in 1:1
            push!(shuffle, rand(1:size(T,1)))
            push!(to_tier, rand(1:size(T,2)))
        end
    
        for i in 1:size(C,2), j in 1:size(C,1)
            index = findfirst(x -> x == i, shuffle)
            if !isnothing(index)
                if j == to_tier[index] 
                    new_C[j, i] = 1
                    new_T[i, j] = 1
                else
                    new_C[j, i] = 0
                    if j < size(T,2) + 1
                        new_T[i, j] = 0
                    end
                end
            else
                new_C[j,i] = sq[1][j,i]
                if i < size(T,1) + 1
                    if j < size(T,2) + 1
                        new_T[i, j] = sq[2][i,j]
                    end
                end
            end
        end
        c = sum(new_C, dims = 2)
        t = sum(new_T, dims = 1)
        new_l = likelihood(new_C*adjacency*new_T,c,t)
        
        if new_l < sq[3]
            println("Iteration ", n, ": " , new_l)
            sq[3] = new_l
            sq[1] = new_C
            sq[2] = new_T
        end
        n += 1
    end
                
    return sq
end
        

best_alloc (generic function with 2 methods)

In [16]:
sq = best_alloc(C,T,adjacency, 1000);



Starting value: 9150.45365384631
Iteration 0: 9140.866538550988
Iteration 2: 9136.928173418903
Iteration 4: 9131.914034922876
Iteration 5: 9130.489943749297
Iteration 6: 9130.03755434833
Iteration 8: 9128.404913008028
Iteration 10: 9128.227724518169
Iteration 11: 9125.361634730876
Iteration 13: 9122.702860865034
Iteration 15: 9120.375220034939
Iteration 16: 9113.437336042405
Iteration 19: 9112.431394792751
Iteration 25: 9110.354130443748
Iteration 26: 9109.767515466114
Iteration 28: 9106.577122518202
Iteration 29: 9104.3217092057
Iteration 34: 9102.963990119182
Iteration 36: 9100.90047145319
Iteration 38: 9099.12904242737
Iteration 40: 9095.410312693444
Iteration 41: 9092.513173276599
Iteration 42: 9090.430975627663
Iteration 43: 9087.037376311573
Iteration 44: 9084.31634312318
Iteration 45: 9081.056759324112
Iteration 53: 9080.666937419319
Iteration 56: 9078.74471184605
Iteration 58: 9075.160235742185
Iteration 59: 9072.95705138202
Iteration 62: 9069.94805315333
Iteration 63: 9069.335

In [17]:
sq[1]*adjacency*sq[2]

12×5 Matrix{Int32}:
 366  87  12  12  10
  44  32   2   2   1
   2   0   4   2   1
  30   1   2  12   0
   8   2   2   1  15
 115  27   5   6   0
 102  38   1   5   0
  45  12   5   5  14
   9   0   2   0   1
 164  22  36  20  24
  38  12   0   0   2
 150  46  16   6  18

In [18]:
for i in 1:size(adjacency,2)
    if sq[2][i,2] == 1
        println(institutions[i])
    end
end

Columbia University
Duke University
McGill University
Purdue University
Rutgers, The State University of New Jersey
Stockholm University
Texas A&M University, College Station
Universidad Carlos III de Madrid
University of Bonn
University of California, Irvine
University of California, San Diego
University of Lausanne
University of Mannheim
University of Maryland
University of Pennsylvania
University of Pittsburgh
University of Texas at Austin
University of Virginia
University of Wisconsin, Madison
University of Zurich
Université Paris 1 Panthéon-Sorbonne
Vanderbilt University
Yale University


In [19]:
sum(sq[2], dims = 1)

1×5 Matrix{Int64}:
 206  23  69  59  55