In [7]:
using DataFrames
using SyntheticPopulation

#each individual and each household represent 100.000 individuals or households
SCALE = 0.0001 

#all values are based on China census data
individual_popoulation_size = 21890000

#individuals
marginal_ind_age_sex = DataFrame(
    sex = repeat(['M', 'F'], 18),
    age = repeat(2:5:87, inner = 2), 
    population = SCALE .* 10000 .* [52.6, 49.0, 48.5, 44.8, 33.6, 30.6, 34.6, 28.8, 71.6, 63.4, 99.6, 90.9, 130.9, 119.4, 110.8, 103.5, 83.8, 76.4, 84.2, 77.7, 84.2, 77.8, 82.8, 79.9, 67.7, 71.0, 56.9, 62.6, 31.5, 35.3, 18.5, 23.0, 15.2, 19.7, 12.5, 16.0]
    )

marginal_ind_sex_maritalstatus = DataFrame(
    sex = repeat(['M', 'F'], 4), 
    maritalstatus = repeat(["Never_married", "Married", "Divorced", "Widowed"], inner = 2), 
    population = SCALE .* [1679, 1611, 5859, 5774, 140, 206, 128, 426] ./ 0.00082
    )

marginal_ind_income = DataFrame(
    income = [25394, 44855, 63969, 88026, 145915], 
    population = repeat([individual_popoulation_size * SCALE / 5], 5)
    )

#households
household_total_population = 8230000
marginal_hh_size = DataFrame(
    hh_size = [1,2,3,4,5],
    population = Int.(round.(SCALE * household_total_population .* [0.299, 0.331, 0.217, 0.09, 0.063]))
    )

#=
#areas
URL = "https://osm-boundaries.com/api/v1/download?apiKey=6817153008f8ba7ae5587eaa8d01f052&db=osm20240401&osmIds=-912940&boundary=administrative&format=GeoJSON&srid=4326"
areas_filepath = download_osm_boundaries(URL)

areas = generate_areas_dataframe_from_file(areas_filepath)

#aggregated_areas - population referenced from https://nj.tjj.beijing.gov.cn/nj/main/2021-tjnj/zk/indexeh.htm
aggregated_areas = copy(areas)
aggregated_areas.:population = SCALE .* 10000 .* [56.8, 313.2, 201.9, 345.1, 34.6, 184.0, 132.4, 45.7, 52.8, 39.3, 44.1, 131.3, 199.4, 226.9, 110.6, 70.9]
aggregated_areas
=#

#generation of dataframe of individuals
aggregated_individuals = generate_joint_distribution(marginal_ind_sex_maritalstatus, marginal_ind_income, marginal_ind_age_sex, config_file = "tutorial_notebooks/config_file.json")

#generation of dataframe of households
aggregated_households = generate_joint_distribution(marginal_hh_size)


┌ Info: Inconsistent target margins, converting `X` and `mar` to proportions. Margin totals: [1930.0, 2190.0]
└ @ ProportionalFitting C:\Users\plzurekma\.julia\packages\ProportionalFitting\gNJEu\src\ipf.jl:61
┌ Info: Converged in 1 iterations.
└ @ ProportionalFitting C:\Users\plzurekma\.julia\packages\ProportionalFitting\gNJEu\src\ipf.jl:130
┌ Info: Inconsistent target margins, converting `X` and `mar` to proportions. Margin totals: [1085.0, 917.0]
└ @ ProportionalFitting C:\Users\plzurekma\.julia\packages\ProportionalFitting\gNJEu\src\ipf.jl:61
┌ Info: Converged in 1 iterations.
└ @ ProportionalFitting C:\Users\plzurekma\.julia\packages\ProportionalFitting\gNJEu\src\ipf.jl:130
┌ Info: Inconsistent target margins, converting `X` and `mar` to proportions. Margin totals: [1110.0, 951.0]
└ @ ProportionalFitting C:\Users\plzurekma\.julia\packages\ProportionalFitting\gNJEu\src\ipf.jl:61
┌ Info: Converged in 1 iterations.
└ @ ProportionalFitting C:\Users\plzurekma\.julia\packages\Proportiona

Row,id,hh_size,population
Unnamed: 0_level_1,Int64,Int64,Int64
1,1,1,246
2,2,2,272
3,3,3,179
4,4,4,74
5,5,5,52


In [8]:
model, allocation_values, disaggregated_individuals, disaggregated_households = assign_and_optimize_individuals_to_households(aggregated_individuals, aggregated_households)

In [None]:
model

In [None]:
sum(allocation_values)

In [3]:
disaggregated_individuals

Row,id,household_id
Unnamed: 0_level_1,Int64,Int64?
1,1,58
2,2,59
3,3,60
4,4,61
5,5,62
6,6,26
7,7,27
8,8,28
9,9,29
10,10,30


In [5]:
aggregated_individuals

Row,id,maritalstatus,income,sex,age,population
Unnamed: 0_level_1,Int64,String?,Int64?,Char?,Int64?,Int64
1,1,Divorced,25394,F,22,0
2,2,Married,25394,F,22,1
3,3,Never_married,25394,F,22,0
4,4,Widowed,25394,F,22,0
5,5,Divorced,44855,F,22,0
6,6,Married,44855,F,22,1
7,7,Never_married,44855,F,22,0
8,8,Widowed,44855,F,22,0
9,9,Divorced,63969,F,22,0
10,10,Married,63969,F,22,1


In [4]:
describe(disaggregated_households)

Row,variable,mean,min,median,max,nmissing,eltype
Unnamed: 0_level_1,Symbol,Float64,Union…,Union…,Union…,Int64,Type
1,id,41.5,1.0,41.5,82.0,0,Int64
2,agg_hh_id,2.26829,1.0,2.0,5.0,0,"Union{Missing, Int64}"
3,head_id,52.3537,6.0,51.5,98.0,0,"Union{Missing, Int64}"
4,partner_id,117.0,99.0,117.0,135.0,45,"Union{Missing, Int64}"
5,child1_id,85.1,1.0,84.0,149.0,52,"Union{Missing, Int64}"
6,child2_id,152.5,150.0,152.5,155.0,76,"Union{Missing, Int64}"
7,child3_id,,,,,82,"Union{Missing, Int64}"
8,child4_id,,,,,82,"Union{Missing, Int64}"
