In [1]:
include("type_allocation_flexible.jl")

using Random, JSON

First we configure the settings:

In [2]:
Random.seed!(0)            # for reproducibility: ensures random results are the same on script restart
YEAR_INTERVAL = 2003:2025; # change this to select the years of data to include in the analysis

In this example, we want the most flexibility with the sinks, so we use the largest dataset:

In [3]:
to_from_by_year_api = SBM_flexible.fetch_data("https://support.econjobmarket.org/api/placement_data")

DataStructures.DefaultDict{Any, Any, UnionAll} with 23 entries:
  "2003" => Dict{Any, Any}("17827"=>Dict{String, Any}("to_shortname"=>"Economic…
  "2005" => Dict{Any, Any}("11382"=>Dict{String, Any}("to_shortname"=>"Economic…
  "2014" => Dict{Any, Any}("56926"=>Dict{String, Any}("to_shortname"=>"Business…
  "2018" => Dict{Any, Any}("37159"=>Dict{String, Any}("to_shortname"=>"Dallas o…
  "2020" => Dict{Any, Any}("45907"=>Dict{String, Any}("to_shortname"=>"Economic…
  "2010" => Dict{Any, Any}("5422"=>Dict{String, Any}("to_shortname"=>"Managemen…
  "2016" => Dict{Any, Any}("1886"=>Dict{String, Any}("to_shortname"=>"Cons Fina…
  "2019" => Dict{Any, Any}("43054"=>Dict{String, Any}("to_shortname"=>"Computer…
  "2004" => Dict{Any, Any}("24943"=>Dict{String, Any}("to_shortname"=>"Economic…
  "2007" => Dict{Any, Any}("24350"=>Dict{String, Any}("to_shortname"=>"Economic…
  "2023" => Dict{Any, Any}("57428"=>Dict{String, Any}("to_shortname"=>"Health P…
  "2017" => Dict{Any, Any}("27626"=>Dict{Stri

We can then sort the placements into academic and sinks, and collect some initial department labels:

In [4]:
academic, academic_to, academic_builder, rough_sink_builder, institution_mapping, reverse_mapping = SBM_flexible.get_builders(to_from_by_year_api, YEAR_INTERVAL);

We also need to retrieve the type allocation. We can either obtain this from a JSON file:

In [5]:
# tier_data = JSON.parsefile(".estimates/id_to_type_api.json")

or from the API, which is what we will use here:

In [6]:
tier_data = SBM_flexible.fetch_api("https://support.econjobmarket.org/api/academic_tiers")

839-element Vector{Any}:
 Dict{String, Any}("name" => "Harvard University", "institution_id" => 350, "type" => 1)
 Dict{String, Any}("name" => "COMSATS University Islamabad", "institution_id" => 1197, "type" => 5)
 Dict{String, Any}("name" => "Florida State University", "institution_id" => 324, "type" => 3)
 Dict{String, Any}("name" => "Thammasat University", "institution_id" => 3576, "type" => 5)
 Dict{String, Any}("name" => "Heinrich Heine University Dusseldorf", "institution_id" => 1258, "type" => 5)
 Dict{String, Any}("name" => "University of Portsmouth", "institution_id" => 1981, "type" => 5)
 Dict{String, Any}("name" => "Deakin University", "institution_id" => 1818, "type" => 4)
 Dict{String, Any}("name" => "Université de Picardie Jules Verne", "institution_id" => 3591, "type" => 5)
 Dict{String, Any}("name" => "Jinan University", "institution_id" => 902, "type" => 4)
 Dict{String, Any}("name" => "University of Iowa", "institution_id" => 1542, "type" => 3)
 Dict{String, Any}("nam

In [7]:
api_allocation = Dict{}()
for entry in tier_data
    api_allocation[entry["name"]] = entry["type"]
end
NUMBER_OF_TYPES = maximum(values(api_allocation)) + 1 # accomodate departments with no type allocation

6

Here we can control which sinks we wish to display. Teaching universities must always be included:

In [8]:
# sink of teaching universities that do not graduate PhDs
# this must be constructed using academic placements, not pre-defined sink placements

teaching_universities = Set() 
for dept_name in academic_to
    if !(dept_name in academic)
        # the department hired an assistant professor but never graduated anyone
        push!(teaching_universities, dept_name)
    end
end

but the rest are more flexible:

In [9]:
public_sector = ("Public Sector", Set())
private_sector = ("Private Sector", Set())
other_groups = ("Other Groups", Set())

postdocs = ("Postdocs", Set())
lecturers = ("Lecturers", Set())
other_academic = ("Other Academic", Set())

for outcome in rough_sink_builder
    if outcome["recruiter_type"] == 5 # government institution
        push!(public_sector[2], (string(outcome["to_name"], " ($(public_sector[1]))"), outcome))
    elseif outcome["recruiter_type"] in [6, 7] # private sector: for and not for profit
        push!(private_sector[2], (string(outcome["to_name"], " ($(private_sector[1]))"), outcome))
    elseif outcome["recruiter_type"] == 8 # international organizations, think tanks, assorted
        push!(other_groups[2], (string(outcome["to_name"], " ($(other_groups[1]))"), outcome))

    # some other examples
    # every example here must also have a corresponding sink Set() above, 
    #     and an entry in sinks_to_include below
   
    elseif outcome["postype"] == 6
        # postdocs that are not in the above (i.e. academic; not public, private, or other)
        # please note that the included JSON does not contain postdocs; use the API
        push!(postdocs[2], (string(outcome["to_name"], " ($(postdocs[1]))"), outcome))
    elseif outcome["postype"] in [5, 7]
        # lecturers that are not in the above
        # please note that the included JSON does not contain lecturers; use the API
        push!(lecturers[2], (string(outcome["to_name"], " ($(lecturers[1]))"), outcome))
    else
        # everything else including terminal academic positions
        # this sink can only be constructed as an "else" statement
        push!(other_academic[2], (string(outcome["to_name"], " ($(other_academic[1]))"), outcome))
    
    end
end

# sort to ensure consistent ordering
academic_list = sort(collect(academic))
teaching_list = sort(collect(teaching_universities))
sinks_to_include = (public_sector, private_sector, other_groups, postdocs, lecturers, other_academic)

sink_builder, sinks, sink_labels = SBM_flexible.build_sinks(sinks_to_include, teaching_list)

NUMBER_OF_SINKS = length(sink_labels)
numtotal = NUMBER_OF_TYPES + NUMBER_OF_SINKS
institutions = vcat(academic_list, sinks...)
println("$(length(academic_list)) academic departments, $(length(institutions)) total departments")

Including the following sinks:
 Public Sector
 Private Sector
 Other Groups
 Postdocs
 Lecturers
 Other Academic
 Teaching Universities
Total 7 sinks
870 academic departments, 3647 total departments


We also need the adjacency matrix:

In [10]:
length(academic_builder) + length(sink_builder)

17364

In [11]:
out = SBM_flexible.get_adjacency(academic_list, institutions, academic_builder, sink_builder);

Total 17364 Placements (found 17364 by sequence counting, 17364 by matrix sum)


In [12]:
sum(out)

17364

as well as the assembled mapping between institution names and types:

In [13]:
sorted_allocation = Vector{Int32}(undef, length(institutions))
cursor = 1

for institution_name in academic_list
    if institution_name in keys(api_allocation)
        sorted_allocation[cursor] = api_allocation[institution_name]
    else
        # name not found, collect in last type
        sorted_allocation[cursor] = NUMBER_OF_TYPES
    end
    cursor += 1
end

for (i, sink_list) in enumerate(sinks)
    for _ in sink_list
        sorted_allocation[cursor] = NUMBER_OF_TYPES + i # the sinks must stay in fixed types
        cursor += 1
    end
end

This allows us to compute the results:

In [14]:
placement_rates, counts, full_likelihood = SBM_flexible.bucket_extract(sorted_allocation, out, NUMBER_OF_TYPES, numtotal)

(Int32[770 119 … 4 2; 709 373 … 15 10; … ; 254 280 … 214 21; 374 523 … 155 21], Int32[289 697 … 7718 544; 697 1681 … 18614 1312; … ; 9758 23534 … 260596 18368; 12121 29233 … 323702 22816], -81206.13779041327)

In [15]:
placement_rates

13×6 Matrix{Int32}:
 770  119   46   15    4   2
 709  373  121   45   15  10
 706  660  336  110   25  27
 493  702  422  242   44  20
  28   76   85   78  104   5
  18    7    6    2    3   5
 591  491  303  139   58  12
 768  449  262  127   48   7
 201  160  108   59   28   3
 654  689  690  495  293  47
 149  229  204  230  151  12
 254  280  309  294  214  21
 374  523  509  320  155  21

In [16]:
placement_rates ./ counts # means

13×6 Matrix{Float64}:
 2.66436     0.170732    0.0267909   0.00392157   0.000518269  0.00367647
 1.01722     0.221892    0.02922     0.00487805   0.000805845  0.00762195
 0.411182    0.159382    0.0329379   0.00484048   0.000545209  0.00835396
 0.128889    0.0760976   0.0185699   0.00478025   0.000430739  0.00277778
 0.00362788  0.00408295  0.00185371  0.000763583  0.00050457   0.000344163
 0.0330882   0.00533537  0.00185644  0.000277778  0.000206498  0.00488281
 0.203302    0.0700328   0.0175439   0.00361274   0.000747095  0.00219298
 0.187454    0.0454407   0.0107637   0.00234209   0.000438701  0.000907676
 0.257033    0.0848356   0.0232458   0.00570048   0.00134074   0.00203804
 0.0649841   0.0283866   0.01154     0.00371622   0.00109016   0.002481
 0.0199198   0.012694    0.00459046  0.00232323   0.000755907  0.000852273
 0.0260299   0.0118977   0.00532998  0.00227642   0.000821194  0.00114329
 0.0308555   0.0178907   0.00706817  0.0019947    0.000478835  0.000920407

In [17]:
# if there are missing departments below that belong in higher tiers, this metric may not be useful
# to fix, filter the academic placements in the API data to only have institutions from tier_data
# or create a new tier_data type allocation containing all academic institutions
full_likelihood

-81206.13779041327

In [18]:
SBM_flexible.nice_table(placement_rates, NUMBER_OF_TYPES, NUMBER_OF_SINKS, sink_labels; has_unassigned = true)

┌───────────────────────┬────────┬────────┬────────┬────────┬────────┬─────────┬────────────┐
│[1m                       [0m│[1m Tier 1 [0m│[1m Tier 2 [0m│[1m Tier 3 [0m│[1m Tier 4 [0m│[1m Tier 5 [0m│[1m Missing [0m│[1m Row Totals [0m│
├───────────────────────┼────────┼────────┼────────┼────────┼────────┼─────────┼────────────┤
│[1m                Tier 1 [0m│    770 │    119 │     46 │     15 │      4 │       2 │        956 │
│[1m                Tier 2 [0m│    709 │    373 │    121 │     45 │     15 │      10 │       1273 │
│[1m                Tier 3 [0m│    706 │    660 │    336 │    110 │     25 │      27 │       1864 │
│[1m                Tier 4 [0m│    493 │    702 │    422 │    242 │     44 │      20 │       1923 │
│[1m                Tier 5 [0m│     28 │     76 │     85 │     78 │    104 │       5 │        376 │
│[1m               Missing [0m│     18 │      7 │      6 │      2 │      3 │       5 │         41 │
│[1m         Public Sector [0m│    591 │

In [19]:
for sorted_type in 1:NUMBER_OF_TYPES
    counter = 0
    inst_hold = []
    if sorted_type != NUMBER_OF_TYPES
        println("TYPE $sorted_type:")
    else
        println("MISSING (Unassigned Academic Departments):")
    end
    for (i, sbm_type) in enumerate(sorted_allocation)
        if sbm_type == sorted_type
            push!(inst_hold, institutions[i])
            counter += 1
        end
    end
    for inst in sort(inst_hold)
        println("  ", inst)
    end
    println("Total Institutions: $counter")
    println()
end

TYPE 1:
  Columbia University
  Duke University
  Harvard University
  London School of Economics and Political Science
  Massachusetts Institute of Technology
  New York University
  Northwestern University
  Princeton University
  Stanford University
  University of California Los Angeles (UCLA)
  University of California, Berkeley
  University of Chicago
  University of Maryland
  University of Michigan
  University of Pennsylvania
  University of Wisconsin, Madison
  Yale University
Total Institutions: 17

TYPE 2:
  Arizona State University
  Bocconi University
  Boston College
  Boston University
  Brown University
  Carnegie Mellon University
  Cornell University
  Erasmus University Rotterdam
  European University Institute
  Johns Hopkins University
  Maastricht University
  Michigan State University
  National University of Singapore
  Ohio State University
  Pennsylvania State University
  Purdue University
  Stockholm School of Economics
  Texas A&M University, College Stati