In [1]:
using HTTP, JSON, PrettyTables, JLD, MySQL, DotEnv, DataFrames, LinearAlgebra
include("functions/type_allocation_flexible.jl")
cfg = DotEnv.config(path="../.env")
#if you want to refresh the adjustment information set refresh to true
# to load saved data instead (which means you'll just be reading previously saved 
# adjustments, set refresh to false
refresh = false
files_path = cfg["files_path"]

"/home/peters/code/mapinator/estimation/current_estimates_and_files/"

The point of this worksheet is to adjust the size of the placements adjacency matrix
to accommodate the fact that the highest tiers are oversampled.  Eventually this should not be needed,
but at this point it is the only way to produce a reasonable set of estimates.

This first part of the file is just reading in classification information.

In [2]:
#load adjacency matrix and custom row titles from create classification
placement_rates = load(files_path*"placement_rates.jld")["placement_rates"]
row_names = load(files_path*"row_names.jld")["names"]
#recreate constants from create classification in order to build new latex table
numtotal = size(placement_rates,1)
NUM_TYPES = size(placement_rates,2)
NUM_SINKS = numtotal - NUM_TYPES
if numtotal !== length(row_names) 
    throw(DomainError(length(row_names), "row_names doesn't match dimensions of the placements matrix"))
else
printstyled("names and placement matrix dimensions match\n"; color = :green)
end


[32mnames and placement matrix dimensions match[39m


This next part of the file is an attempt to compute the corrections.  

The first part attempts to find the coverage by tiers using information from econjobmarket 
registrations data.

Set refresh equal to false at the top of the worksheet if you just want to see the current corrections.

In [3]:
if refresh
    query = """
    select t.type,count(*) as count from ftrecruiter.applicants a join ftrecruiter.organizations o 
    join ftrecruiter.institutions i join t_distribution t on a.degreeinst_oid=o.oid and 
    o.institution_id=i.institution_id and t.institution_id=i.institution_id 
    where a.degreetype=? and a.degreeendyear>=? and a.degreeendyear<=? 
    and t.algorithm_run_id=? group by type
    """
    params = ["Phd", "2017", "2023", 4]
    registrations = SBM_flexible.db_query(query, params)
    regs = registrations[:,:count]
    query = """
    select t.type,count(*) as count from ftrecruiter.applicants a join ftrecruiter.organizations o 
    join ftrecruiter.institutions i join t_distribution t on a.degreeinst_oid=o.oid 
    and i.institution_id=o.institution_id and t.institution_id=i.institution_id 
    where year(enrolldate)>=? and year(enrolldate)<=? and degreetype=? 
    and t.algorithm_run_id=? and aid not in (select aid from applicant_results) group by t.type
    """
    params = ["2018", "2023", "Phd", 4]
    missing = SBM_flexible.db_query(query, params)
    miss = missing[:, :count]
    proportion_missing = missing[!,"count"]./registrations[!, "count"]
    s = []
    for x in proportion_missing
        push!(s,1/(1-x))
    end
    data = hcat(regs, miss, s)
    save(files_path*"adjustment_matrix.jld", "data", data)
else
    data = load(files_path*"adjustment_matrix.jld")["data"]
    s = data[:,3]
    adjusted_placement_rates = round.(Int,placement_rates*Diagonal(I*s))
   
end

12×5 Matrix{Int64}:
  928   168    85    14   12
 1010   684   297    94   20
 1425  1935  1173   224   60
  388   670   661   642   64
    0    29    51     0  196
  800   877   602   231  100
 1041   649   515   177  112
 1031  1446  1694  1519  572
  177   346   441   458  380
    9    15    33    18   12
  594   817  1094  1003  749
  558  1073  1563  1010  636

In [4]:
#head = ["Tier 1", "Tier 2", "Tier 3", "Tier 4", "Tier 5"]
head = []
for i in 1:NUM_TYPES
    push!(head, string("Type ", i))
end
rowh = ["Registrations - 2018-23", "Registrants without placements", "Adjustment"]
pretty_table(data', header= head, row_names=rowh)

┌────────────────────────────────┬─────────┬─────────┬─────────┬─────────┬─────────┐
│[1m                                [0m│[1m  Type 1 [0m│[1m  Type 2 [0m│[1m  Type 3 [0m│[1m  Type 4 [0m│[1m  Type 5 [0m│
├────────────────────────────────┼─────────┼─────────┼─────────┼─────────┼─────────┤
│[1m        Registrations - 2018-23 [0m│    2591 │    3304 │    4102 │    2508 │    1357 │
│[1m Registrants without placements [0m│     933 │    1574 │    2501 │    1813 │    1018 │
│[1m                     Adjustment [0m│ 1.56273 │ 1.90983 │ 2.56215 │ 3.60863 │ 4.00295 │
└────────────────────────────────┴─────────┴─────────┴─────────┴─────────┴─────────┘


In [5]:
if refresh
    open(files_path*"adjustment_table.tex", "w") do f
    SBM_flexible.pretty_table(
        f,
        data',
        header = head,
        row_names = rowh,
        backend = Val(:latex)
        )
    end
end


In [6]:
Diagonal(I*s)
adjusted_placement_rates = round.(Int,placement_rates*Diagonal(I*s))

12×5 Matrix{Int64}:
  928   168    85    14   12
 1010   684   297    94   20
 1425  1935  1173   224   60
  388   670   661   642   64
    0    29    51     0  196
  800   877   602   231  100
 1041   649   515   177  112
 1031  1446  1694  1519  572
  177   346   441   458  380
    9    15    33    18   12
  594   817  1094  1003  749
  558  1073  1563  1010  636

In [7]:
if refresh
    save(files_path*"adjusted_placement_rates.jld", "adjusted_placement_rates", adjusted_placement_rates)
end  

In [8]:
SBM_flexible.nice_adjacency_table(adjusted_placement_rates,row_names)

┌───────────────────────────────────┬────────┬────────┬────────┬────────┬────────┬────────────┐
│[1m                                   [0m│[1m Tier 1 [0m│[1m Tier 2 [0m│[1m Tier 3 [0m│[1m Tier 4 [0m│[1m Tier 5 [0m│[1m Row Totals [0m│
├───────────────────────────────────┼────────┼────────┼────────┼────────┼────────┼────────────┤
│[1m                 TYPE 1 (19 insts) [0m│    928 │    168 │     85 │     14 │     12 │       1207 │
│[1m                 TYPE 2 (51 insts) [0m│   1010 │    684 │    297 │     94 │     20 │       2105 │
│[1m                TYPE 3 (159 insts) [0m│   1425 │   1935 │   1173 │    224 │     60 │       4817 │
│[1m                TYPE 4 (313 insts) [0m│    388 │    670 │    661 │    642 │     64 │       2425 │
│[1m                TYPE 5 (486 insts) [0m│      0 │     29 │     51 │      0 │    196 │        276 │
│[1m         Public Sector (141 insts) [0m│    800 │    877 │    602 │    231 │    100 │       2610 │
│[1m        Private Sector (200

In [9]:
if refresh
    SBM_flexible.nice_adjacency_table(adjusted_placement_rates, row_names, files_path*"adjusted_adjacency_table.tex")
end