In [1]:
include("type_allocation_flexible.jl")

using Random

First we configure the settings:

In [2]:
Random.seed!(0)            # for reproducibility: ensures random results are the same on script restart
YEAR_INTERVAL = 2019:2021  # change this to select the years of data to include in the estimation
NUMBER_OF_TYPES = 4        # change this to select the number of types to classify academic departments into

4

We also need to collect the placements data. You can either use a JSON file in your current directory:

In [3]:
to_from_by_year = SBM_flexible.fetch_data("to_from_by_year.json")

Dict{String, Any} with 19 entries:
  "2003" => Dict{String, Any}("17827"=>Dict{String, Any}("to_shortname"=>"Econo…
  "2005" => Dict{String, Any}("41691"=>Dict{String, Any}("to_shortname"=>"Compt…
  "2014" => Dict{String, Any}("20524"=>Dict{String, Any}("to_shortname"=>"Schoo…
  "2018" => Dict{String, Any}("37159"=>Dict{String, Any}("to_shortname"=>"Dalla…
  "2020" => Dict{String, Any}("45907"=>Dict{String, Any}("to_shortname"=>"Econo…
  "2010" => Dict{String, Any}("5422"=>Dict{String, Any}("to_shortname"=>"Manage…
  "2016" => Dict{String, Any}("29795"=>Dict{String, Any}("to_shortname"=>"Corne…
  "2019" => Dict{String, Any}("42218"=>Dict{String, Any}("to_shortname"=>"Econo…
  "2004" => Dict{String, Any}("24943"=>Dict{String, Any}("to_shortname"=>"Econo…
  "2007" => Dict{String, Any}("20070"=>Dict{String, Any}("to_shortname"=>"Econo…
  "2017" => Dict{String, Any}("27626"=>Dict{String, Any}("to_shortname"=>"Econo…
  "2021" => Dict{String, Any}("45950"=>Dict{String, Any}("to_shortname"=>"

Or you can call the API:

In [4]:
to_from_by_year_api = SBM_flexible.fetch_data("https://support.econjobmarket.org/api/placement_data")

DataStructures.DefaultDict{Any, Any, UnionAll} with 23 entries:
  "2003" => Dict{Any, Any}("17827"=>Dict{String, Any}("to_shortname"=>"Economic…
  "2005" => Dict{Any, Any}("11382"=>Dict{String, Any}("to_shortname"=>"Economic…
  "2014" => Dict{Any, Any}("56926"=>Dict{String, Any}("to_shortname"=>"Business…
  "2018" => Dict{Any, Any}("37159"=>Dict{String, Any}("to_shortname"=>"Dallas o…
  "2020" => Dict{Any, Any}("45907"=>Dict{String, Any}("to_shortname"=>"Economic…
  "2010" => Dict{Any, Any}("5422"=>Dict{String, Any}("to_shortname"=>"Managemen…
  "2016" => Dict{Any, Any}("1886"=>Dict{String, Any}("to_shortname"=>"Cons Fina…
  "2019" => Dict{Any, Any}("43054"=>Dict{String, Any}("to_shortname"=>"Computer…
  "2004" => Dict{Any, Any}("24943"=>Dict{String, Any}("to_shortname"=>"Economic…
  "2007" => Dict{Any, Any}("24350"=>Dict{String, Any}("to_shortname"=>"Economic…
  "2023" => Dict{Any, Any}("57428"=>Dict{String, Any}("to_shortname"=>"Health P…
  "2017" => Dict{Any, Any}("27626"=>Dict{Stri

Auto-recognition of API endpoints is provided assuming that the API URL is either `http` or `https` (and that the endpoint returns a raw list of placements with a `year` field). Otherwise, the endpoint is assumed to be a `json` file at the specified path.

Using the raw placements, we can sort them into academic and sink placements, as well as collect some labels:

In [5]:
academic, academic_to, academic_builder, rough_sink_builder, institution_mapping = SBM_flexible.get_builders(to_from_by_year_api, YEAR_INTERVAL);

We are mostly flexible in how we choose to design the set of sinks to include. One exception is teaching universities, which must always be included:

In [6]:
# sink of teaching universities that do not graduate PhDs
# this must be constructed using academic placements, not pre-defined sink placements

teaching_universities = Set() 
for dept_name in academic_to
    if !(dept_name in academic)
        # the department hired an assistant professor but never graduated anyone
        push!(teaching_universities, dept_name)
    end
end

The rest are built by standardized `if` statements:

In [7]:
public_sector = ("Public Sector", Set())
private_sector = ("Private Sector", Set())
other_groups = ("Other Groups", Set())

postdocs = ("Postdocs", Set())
lecturers = ("Lecturers", Set())
other_academic = ("Other Academic", Set())

for outcome in rough_sink_builder
    if outcome["recruiter_type"] == 5 # government institution
        push!(public_sector[2], (string(outcome["to_name"], " ($(public_sector[1]))"), outcome))
    elseif outcome["recruiter_type"] in [6, 7] # private sector: for and not for profit
        push!(private_sector[2], (string(outcome["to_name"], " ($(private_sector[1]))"), outcome))
    elseif outcome["recruiter_type"] == 8 # international organizations, think tanks, assorted
        push!(other_groups[2], (string(outcome["to_name"], " ($(other_groups[1]))"), outcome))

    # some other examples
    # every example here must also have a corresponding sink Set() above, 
    #     and an entry in sinks_to_include below
   
    elseif outcome["postype"] == 6
        # postdocs that are not in the above (i.e. academic; not public, private, or other)
        # please note that the included JSON does not contain postdocs; use the API
        push!(postdocs[2], (string(outcome["to_name"], " ($(postdocs[1]))"), outcome))
    elseif outcome["postype"] in [5, 7]
        # lecturers that are not in the above
        # please note that the included JSON does not contain lecturers; use the API
        push!(lecturers[2], (string(outcome["to_name"], " ($(lecturers[1]))"), outcome))
    else
        # everything else including terminal academic positions
        # this sink can only be constructed as an "else" statement
        push!(other_academic[2], (string(outcome["to_name"], " ($(other_academic[1]))"), outcome))
    
    end
end

# sort to ensure consistent ordering
academic_list = sort(collect(academic))
teaching_list = sort(collect(teaching_universities))
sinks_to_include = (other_groups, public_sector, private_sector, postdocs, lecturers, other_academic)
sink_builder = []

sinks = []
sink_labels = []
println("Including the following sinks:")
for (sink_name, sink_placements) in sinks_to_include
    println(" ", sink_name)
    # generate list of department names
    dept_names = Set()
    for (outcome_to_name, outcome) in sink_placements
        push!(dept_names, outcome_to_name)
        push!(sink_builder, (outcome_to_name, outcome))
    end
    push!(sinks, sort(collect(dept_names)))
    push!(sink_labels, sink_name)
end

push!(sinks, teaching_list) # teaching_list must always be included
push!(sink_labels, "Teaching Universities")
NUMBER_OF_SINKS = length(sink_labels)
numtotal = NUMBER_OF_TYPES + NUMBER_OF_SINKS
println(" Teaching Universities")
println("Total $(NUMBER_OF_SINKS) sinks")

institutions = vcat(academic_list, sinks...)
println("$(length(academic_list)) academic departments, $(length(institutions)) total departments")

Including the following sinks:
 Other Groups
 Public Sector
 Private Sector
 Postdocs
 Lecturers
 Other Academic
 Teaching Universities
Total 7 sinks
681 academic departments, 2293 total departments


Next, the adjacency matrix:

In [8]:
length(academic_builder) + length(sink_builder)

5588

In [9]:
out = SBM_flexible.get_adjacency(academic_list, institutions, academic_builder, sink_builder);

Total 5588 Placements (found 5588 by sequence counting, 5588 by matrix sum)


In [10]:
sum(out)

5588

We are now ready to run the SBM itself:

In [11]:
@time est_obj, est_alloc = SBM_flexible.doit(out, length(academic_list), [length(s) for s in sinks], NUMBER_OF_TYPES, numtotal, 500 * (NUMBER_OF_TYPES-2) + 1000)

132.766982 seconds (389.54 k allocations: 20.298 MiB, 0.19% compilation time)


(25307.5182131608, Int32[2, 1, 1, 1, 4, 1, 2, 4, 1, 4  …  11, 11, 11, 11, 11, 11, 11, 11, 11, 11])

In [12]:
placement_rates, counts, sorted_allocation, full_likelihood = SBM_flexible.get_allocation(est_alloc, out, NUMBER_OF_TYPES, numtotal, institutions)

(Int32[314 71 12 0; 525 366 26 20; … ; 123 206 87 95; 252 316 87 71], Int32[1849 7740 10406 9288; 7740 32400 43560 38880; … ; 13244 55440 74536 66528; 17458 73080 98252 87696], Int32[3, 2, 2, 2, 4, 2, 3, 4, 2, 4  …  11, 11, 11, 11, 11, 11, 11, 11, 11, 11], -31377.249971524267)

In [13]:
placement_rates

11×4 Matrix{Int32}:
 314   71   12   0
 525  366   26  20
  61   82   51   0
  10   28    0  37
  97   71   29   0
 256  196   25  36
 340  182   37   6
 435  453  243  22
  65  139   19  97
 123  206   87  95
 252  316   87  71

In [14]:
placement_rates ./ counts

11×4 Matrix{Float64}:
 0.169822    0.00917313   0.00115318   0.0
 0.0678295   0.0112963    0.000596878  0.000514403
 0.005862    0.00188246   0.000870842  0.0
 0.00107666  0.000720165  0.0          0.000793038
 0.0777867   0.0136015    0.00413223   0.0
 0.0504533   0.00922787   0.000875473  0.00141243
 0.053789    0.00687831   0.00104009   0.000188964
 0.0257412   0.00640373   0.00255504   0.000259165
 0.00716411  0.00365982   0.000372097  0.00212831
 0.00928722  0.00371573   0.00116722   0.00142797
 0.0144346   0.00432403   0.000885478  0.000809615

In [15]:
full_likelihood

-31377.249971524267

In [16]:
SBM_flexible.nice_table(placement_rates, NUMBER_OF_TYPES, NUMBER_OF_SINKS, sink_labels)

┌───────────────────────┬────────┬────────┬────────┬────────┬────────────┐
│[1m                       [0m│[1m Tier 1 [0m│[1m Tier 2 [0m│[1m Tier 3 [0m│[1m Tier 4 [0m│[1m Row Totals [0m│
├───────────────────────┼────────┼────────┼────────┼────────┼────────────┤
│[1m                Tier 1 [0m│    314 │     71 │     12 │      0 │        397 │
│[1m                Tier 2 [0m│    525 │    366 │     26 │     20 │        937 │
│[1m                Tier 3 [0m│     61 │     82 │     51 │      0 │        194 │
│[1m                Tier 4 [0m│     10 │     28 │      0 │     37 │         75 │
│[1m          Other Groups [0m│     97 │     71 │     29 │      0 │        197 │
│[1m         Public Sector [0m│    256 │    196 │     25 │     36 │        513 │
│[1m        Private Sector [0m│    340 │    182 │     37 │      6 │        565 │
│[1m              Postdocs [0m│    435 │    453 │    243 │     22 │       1153 │
│[1m             Lecturers [0m│     65 │    139 │     19 │   

In [17]:
for sorted_type in 1:NUMBER_OF_TYPES
    counter = 0
    inst_hold = []
    println("TYPE $sorted_type:")
    for (i, sbm_type) in enumerate(sorted_allocation)
        if sbm_type == sorted_type
            push!(inst_hold, institutions[i])
            counter += 1
        end
    end
    for inst in sort(inst_hold)
        println("  ", inst)
    end
    println("Total Institutions: $counter")
    println()
end

TYPE 1:
  Bocconi University
  Boston University
  Brown University
  Columbia University
  Cornell University
  Duke University
  Harvard University
  London School of Economics and Political Science
  Massachusetts Institute of Technology
  Michigan State University
  New York University
  Northwestern University
  Ohio State University
  Pennsylvania State University
  Princeton University
  Purdue University
  Stanford University
  Stockholm University
  Texas A&M University, College Station
  Tilburg University
  University College London
  University of British Columbia
  University of California Los Angeles (UCLA)
  University of California, Berkeley
  University of California, Davis
  University of California, San Diego
  University of Chicago
  University of Illinois at Urbana-Champaign
  University of Maryland
  University of Michigan
  University of Minnesota, Twin Cities
  University of Oxford
  University of Pennsylvania
  University of Rochester
  University of Southern C