In [1]:
using HTTP, JSON, PrettyTables, JLD, MySQL, DotEnv, DataFrames, LinearAlgebra
include("functions/type_allocation_flexible.jl")
cfg = DotEnv.config("../.env")
#if you want to refresh the adjustment information set refresh to true
# to load saved data instead (which means you'll just be reading previously saved 
# adjustments, set refresh to false
refresh = true
files_path = cfg["files_path"]

"/home/peters/code/mapinator/estimation/current_estimates_and_files/"

The point of this worksheet is to adjust the size of the placements adjacency matrix
to accommodate the fact that the highest tiers are oversampled.  Eventually this should not be needed,
but at this point it is the only way to produce a reasonable set of estimates.

This first part of the file is just reading in classification information.

In [2]:
#load adjacency matrix and custom row titles from create classification
placement_rates = load(files_path*"placement_rates.jld")["placement_rates"]
row_names = load(files_path*"row_names.jld")["names"]
#recreate constants from create classification in order to build new latex table
numtotal = size(placement_rates)[1]
NUM_TYPES = size(placement_rates)[2]
NUM_SINKS = numtotal - NUM_TYPES
if numtotal !== length(row_names) 
    throw(DomainError(length(row_names), "row_names doesn't match dimensions of the placements matrix"))
else
printstyled("names and placement matrix dimensions match\n"; color = :green)
end


[32mnames and placement matrix dimensions match[39m


This next part of the file is an attempt to compute the corrections.  

The first part attempts to find the coverage by tiers using information from econjobmarket 
registrations data.

Set refresh equal to false at the top of the worksheet if you just want to see the current corrections.

In [11]:
if refresh
    query = """
    select t.type,count(*) as count from ftrecruiter.applicants a join ftrecruiter.organizations o 
    join ftrecruiter.institutions i join t_distribution t on a.degreeinst_oid=o.oid and 
    o.institution_id=i.institution_id and t.institution_id=i.institution_id 
    where a.degreetype=? and a.degreeendyear>=? and a.degreeendyear<=? 
    and t.algorithm_run_id=? group by type
    """
    params = ["Phd", "2017", "2023", 5]
    registrations = SBM_flexible.db_query(query, params)
    regs = registrations[:,:count]
    query = """
    select t.type,count(*) as count from ftrecruiter.applicants a join ftrecruiter.organizations o 
    join ftrecruiter.institutions i join t_distribution t on a.degreeinst_oid=o.oid 
    and i.institution_id=o.institution_id and t.institution_id=i.institution_id 
    where year(enrolldate)>=? and year(enrolldate)<=? and degreetype=? 
    and t.algorithm_run_id=? and aid not in (select aid from applicant_results) group by t.type
    """
    params = ["2018", "2023", "Phd", 5]
    ms = SBM_flexible.db_query(query, params)
    miss = ms[:, :count]
    proportion_missing = ms[!,"count"]./registrations[!, "count"]
    s = []
    for x in proportion_missing
        push!(s,1/(1-x))
    end
    data = hcat(regs, miss, s)
    save(files_path*"adjustment_matrix.jld", "data", data)
else
    data = load(files_path*"adjustment_matrix.jld")["data"]
    s = data[:,3]
    adjusted_placement_rates = round.(Int,placement_rates*Diagonal(I*s))
   
end

In [12]:
registrations

Row,type,count
Unnamed: 0_level_1,Int32?,Int64
1,1,2664
2,2,3632
3,3,4472
4,4,2282
5,5,1339


In [14]:
data

5×3 Matrix{Any}:
 2664   431  1.19301
 3632  1016  1.38838
 4472  2113  1.89572
 2282  1388  2.55257
 1339   863  2.81303

In [25]:
#head = ["Tier 1", "Tier 2", "Tier 3", "Tier 4", "Tier 5"]
head = []
for i in 1:NUM_TYPES
    push!(head, string("Type ", i))
end
rowh = ["Registrations - 2018-23", "Registrants without placements", "Adjustment"]

pretty_table(transpose(data), header= head, row_labels=rowh, backend = Val(:text))

┌────────────────────────────────┬─────────┬─────────┬─────────┬─────────┬─────────┐
│[1m                                [0m│[1m  Type 1 [0m│[1m  Type 2 [0m│[1m  Type 3 [0m│[1m  Type 4 [0m│[1m  Type 5 [0m│
├────────────────────────────────┼─────────┼─────────┼─────────┼─────────┼─────────┤
│[1m        Registrations - 2018-23 [0m│    2664 │    3632 │    4472 │    2282 │    1339 │
│[1m Registrants without placements [0m│     431 │    1016 │    2113 │    1388 │     863 │
│[1m                     Adjustment [0m│ 1.19301 │ 1.38838 │ 1.89572 │ 2.55257 │ 2.81303 │
└────────────────────────────────┴─────────┴─────────┴─────────┴─────────┴─────────┘


In [26]:
if refresh
    open(files_path*"adjustment_table.tex", "w") do f
    SBM_flexible.pretty_table(
        f,
        data',
        header = head,
        row_labels = rowh,
        backend = Val(:latex)
        )
    end
end


In [27]:
Diagonal(I*s)
adjusted_placement_rates = round.(Int,placement_rates*Diagonal(I*s))

12×5 Matrix{Int64}:
 1424   469   326   110   11
 1133  1184   595   232   28
 1280  1917  1869   373   96
  322   703   855  1044   93
    0    58   100    71  459
  678   757   540   145   65
  927   612   436   140   73
  103   180   256   143   76
   41    82   131   105  135
  438   832  1426  1054  959
  251   229   157    82   25
  286   523   796   406  194

In [28]:
if refresh
    save(files_path*"adjusted_placement_rates.jld", "adjusted_placement_rates", adjusted_placement_rates)
end  

In [29]:
SBM_flexible.nice_adjacency_table(adjusted_placement_rates,row_names)

┌───────────────────────────────────┬────────┬────────┬────────┬────────┬────────┬────────────┐
│[1m                                   [0m│[1m Tier 1 [0m│[1m Tier 2 [0m│[1m Tier 3 [0m│[1m Tier 4 [0m│[1m Tier 5 [0m│[1m Row Totals [0m│
├───────────────────────────────────┼────────┼────────┼────────┼────────┼────────┼────────────┤
│[1m                 TYPE 1 (20 insts) [0m│   1424 │    469 │    326 │    110 │     11 │       2340 │
│[1m                 TYPE 2 (58 insts) [0m│   1133 │   1184 │    595 │    232 │     28 │       3172 │
│[1m                TYPE 3 (180 insts) [0m│   1280 │   1917 │   1869 │    373 │     96 │       5535 │
│[1m                TYPE 4 (334 insts) [0m│    322 │    703 │    855 │   1044 │     93 │       3017 │
│[1m                TYPE 5 (522 insts) [0m│      0 │     58 │    100 │     71 │    459 │        688 │
│[1m         Public Sector (152 insts) [0m│    678 │    757 │    540 │    145 │     65 │       2185 │
│[1m        Private Sector (227

In [30]:
if refresh
    SBM_flexible.nice_adjacency_table(adjusted_placement_rates, row_names, files_path*"adjusted_adjacency_table.tex")
end