In [1]:
%load_ext watermark
%watermark

Last updated: 2025-03-20T16:03:12.120203-04:00

Python implementation: CPython
Python version       : 3.12.9
IPython version      : 9.0.2

Compiler    : Clang 18.1.8 
OS          : Darwin
Release     : 24.3.0
Machine     : arm64
Processor   : arm
CPU cores   : 8
Architecture: 64bit



In [2]:
import time

import jax
import jax.numpy as jnp
import jaxlib
import jaxopt
import numpy as np
import pandas as pd

import pymedm
from pymedm.diagnostics import moe_fit_rate

jax.config.update("jax_enable_x64", True)

%watermark -w
%watermark -iv

Watermark: 2.5.0

numpy : 2.2.4
jaxopt: 0.8.3
pandas: 2.2.3
jax   : 0.4.31
jaxlib: 0.4.31
pymedm: 2.2.3



In [3]:
pymedm.processor_availability()

Unnamed: 0,device,available,count
0,cpu,yes,1
1,gpu,no,0
2,tpu,no,0


## Individual Constraints

In [4]:
cind = pd.read_csv("../data/knox/cind_4701603.csv.gz")
cind.head()

Unnamed: 0,SERIALNO,population,group_quarters_pop,housing_units,occhu,civ_noninst_pop,male_hours_GE35,male_hours_15.34,male_hours_1.14,female_hours_GE35,...,txv_own_02_vehicle,txv_own_03_vehicle,txv_own_04_vehicle,txv_own_GE05_vehicle,txv_rent_no_vehicle,txv_rent_01_vehicle,txv_rent_02_vehicle,txv_rent_03_vehicle,txv_rent_04_vehicle,txv_rent_GE05_vehicle
0,2015000000763,2.76,0.0,1,1,2.76,1.0,0.0,0.0,0.0,...,False,False,False,False,False,True,False,False,False,False
1,2015000001743,2.714286,0.0,1,1,2.714286,1.0,0.0,0.0,0.928571,...,False,True,False,False,False,False,False,False,False,False
2,2015000004434,5.875,0.0,1,1,5.875,1.0,0.0,0.0,0.0,...,False,False,False,False,False,False,False,False,False,False
3,2015000010209,2.177419,0.0,1,1,2.177419,0.0,0.0,0.0,0.0,...,False,False,False,False,True,False,False,False,False,False
4,2015000011609,1.0,1.0,0,0,1.0,0.0,0.0,0.0,0.0,...,False,False,False,False,False,False,False,False,False,False


In [5]:
## response IDs
serial = cind.SERIALNO.values

In [6]:
cind = cind.drop("SERIALNO", axis=1)

In [7]:
## sample weights
wt = pd.read_csv("../data/knox/wt_4701603.csv.gz", index_col=0).values.flatten()
wt

array([25., 14.,  8., ..., 10.,  9., 43.], shape=(5017,))

### Geographic Constraints

In [8]:
cg2 = pd.read_csv("../data/knox/cg2_4701603.csv.gz", index_col=0)
cg2.head()

Unnamed: 0_level_0,population,group_quarters_pop,housing_units,occhu,civ_noninst_pop,male_hours_GE35,male_hours_15.34,male_hours_1.14,female_hours_GE35,female_hours_15.34,...,txv_own_02_vehicle,txv_own_03_vehicle,txv_own_04_vehicle,txv_own_GE05_vehicle,txv_rent_no_vehicle,txv_rent_01_vehicle,txv_rent_02_vehicle,txv_rent_03_vehicle,txv_rent_04_vehicle,txv_rent_GE05_vehicle
GEOID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
470930001001,2463,236,1837,1603,2282,686,265,68,484,115,...,180,4,0,0,355,710,144,8,11,0
470930008001,1697,7,1031,831,1659,305,195,72,243,170,...,19,10,0,0,146,309,162,125,17,0
470930008002,574,88,279,233,486,69,16,0,182,53,...,6,31,0,0,0,83,80,0,0,0
470930008003,1336,151,467,401,1336,251,245,0,225,37,...,48,0,0,40,8,58,76,68,69,0
470930009011,1965,1958,4,4,1965,68,351,142,145,380,...,0,0,0,0,0,4,0,0,0,0


In [9]:
cg1 = pd.read_csv("../data/knox/cg1_4701603.csv.gz", index_col=0)
cg1.head()

Unnamed: 0_level_0,population,group_quarters_pop,housing_units,occhu,civ_noninst_pop,male_hours_GE35,male_hours_15.34,male_hours_1.14,female_hours_GE35,female_hours_15.34,...,txv_own_02_vehicle,txv_own_03_vehicle,txv_own_04_vehicle,txv_own_GE05_vehicle,txv_rent_no_vehicle,txv_rent_01_vehicle,txv_rent_02_vehicle,txv_rent_03_vehicle,txv_rent_04_vehicle,txv_rent_GE05_vehicle
GEOID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
47093000100,2463,236,1837,1603,2282,686,265,68,484,115,...,180,4,0,0,355,710,144,8,11,0
47093000800,3607,246,1777,1465,3481,625,456,72,650,260,...,73,41,0,40,154,450,318,193,86,0
47093000901,1965,1958,4,4,1965,68,351,142,145,380,...,0,0,0,0,0,4,0,0,0,0
47093000902,3034,2581,66,39,3034,341,528,242,207,537,...,0,0,0,0,0,15,0,5,0,19
47093001400,2447,0,1069,791,2447,307,42,10,254,115,...,85,57,10,0,180,252,82,35,0,0


In [10]:
sg2 = pd.read_csv("../data/knox/sg2_4701603.csv.gz", index_col=0)
sg2.head()

Unnamed: 0_level_0,population,group_quarters_pop,housing_units,occhu,civ_noninst_pop,male_hours_GE35,male_hours_15.34,male_hours_1.14,female_hours_GE35,female_hours_15.34,...,txv_own_02_vehicle,txv_own_03_vehicle,txv_own_04_vehicle,txv_own_GE05_vehicle,txv_rent_no_vehicle,txv_rent_01_vehicle,txv_rent_02_vehicle,txv_rent_03_vehicle,txv_rent_04_vehicle,txv_rent_GE05_vehicle
GEOID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
470930001001,137.993921,62.613982,49.848024,66.869301,122.796353,82.674772,68.085106,34.650456,68.693009,44.3769,...,45.592705,4.255319,7.294833,7.294833,42.553191,74.772036,35.258359,8.510638,11.550152,7.294833
470930008001,203.039514,9.118541,76.595745,72.340426,198.784195,88.145897,76.595745,43.768997,58.358663,68.693009,...,13.37386,9.118541,7.294833,7.294833,48.632219,60.182371,55.927052,44.984802,11.550152,7.294833
470930008002,142.857143,52.887538,52.279635,46.808511,130.699088,24.316109,11.550152,7.294833,47.416413,37.082067,...,6.079027,20.668693,7.294833,7.294833,7.294833,25.531915,35.866261,7.294833,7.294833,7.294833
470930008003,272.340426,136.778116,64.43769,64.43769,272.340426,79.027356,114.285714,7.294833,91.18541,26.139818,...,26.74772,7.294833,7.294833,21.884498,7.294833,28.571429,32.218845,42.553191,37.082067,7.294833
470930009011,181.762918,181.762918,3.039514,3.039514,181.762918,46.200608,55.319149,57.142857,48.632219,110.030395,...,7.294833,7.294833,7.294833,7.294833,7.294833,3.039514,7.294833,7.294833,7.294833,7.294833


In [11]:
sg1 = pd.read_csv("../data/knox/sg1_4701603.csv.gz", index_col=0)
sg1.head()

Unnamed: 0_level_0,population,group_quarters_pop,housing_units,occhu,civ_noninst_pop,male_hours_GE35,male_hours_15.34,male_hours_1.14,female_hours_GE35,female_hours_15.34,...,txv_own_02_vehicle,txv_own_03_vehicle,txv_own_04_vehicle,txv_own_GE05_vehicle,txv_rent_no_vehicle,txv_rent_01_vehicle,txv_rent_02_vehicle,txv_rent_03_vehicle,txv_rent_04_vehicle,txv_rent_GE05_vehicle
GEOID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
47093000100,137.993921,62.613982,49.848024,66.869301,122.796353,82.674772,68.085106,34.650456,68.693009,44.3769,...,45.592705,4.255319,7.294833,7.294833,42.553191,74.772036,35.258359,8.510638,11.550152,7.294833
47093000800,330.091185,151.367781,35.258359,79.635258,316.109422,121.580547,136.778116,43.768997,119.756839,84.49848,...,29.179331,21.884498,7.294833,21.884498,49.240122,70.516717,66.869301,61.398176,39.513678,7.294833
47093000901,181.762918,181.762918,3.039514,3.039514,181.762918,46.200608,55.319149,57.142857,48.632219,110.030395,...,7.294833,7.294833,7.294833,7.294833,7.294833,3.039514,7.294833,7.294833,7.294833,7.294833
47093000902,713.069909,652.279635,23.100304,20.06079,713.069909,99.696049,164.741641,82.066869,121.580547,158.662614,...,7.294833,7.294833,7.294833,7.294833,7.294833,15.197568,7.294833,4.255319,7.294833,14.589666
47093001400,221.276596,7.294833,62.613982,64.43769,221.276596,82.066869,26.74772,10.334347,62.006079,55.319149,...,27.355623,23.708207,10.334347,7.294833,56.534954,51.671733,35.866261,21.884498,7.294833,7.294833


## Solve P-MEDM Problem

In [12]:
# setup
pmd = pymedm.PMEDM(2019, serial, wt, cind, cg1, cg2, sg1, sg2, verbose=True)

In [13]:
start_time = time.time()
pmd.solve()
exec_time = np.round(time.time() - start_time, 4)
print(f"\nCompleted in {exec_time} seconds.")

Initializing P-MEDM solver...
P-MEDM completed in 117.3602 seconds.

Completed in 117.5807 seconds.


## Validation

In [14]:
mfrs = moe_fit_rate(cind, cg2, sg2, pmd.almat)
mfrs

{'Ycomp':                            variable   acs        pmedm        err    moe  \
 470930001001             population  2463  2485.794320  22.794320  227.0   
 470930008001             population  1697  1709.609699  12.609699  334.0   
 470930008002             population   574   597.594241  23.594241  235.0   
 470930008003             population  1336  1367.214443  31.214443  448.0   
 470930009011             population  1965  1867.150477  97.849523  299.0   
 ...                             ...   ...          ...        ...    ...   
 470930070001  txv_rent_GE05_vehicle    36    34.473413   1.526587   55.0   
 470930070002  txv_rent_GE05_vehicle     0     0.807732   0.807732   12.0   
 470930071001  txv_rent_GE05_vehicle     0     0.761110   0.761110   12.0   
 470930071002  txv_rent_GE05_vehicle     0     0.317241   0.317241   12.0   
 470930071003  txv_rent_GE05_vehicle     0     0.368795   0.368795   12.0   
 
               in_moe  
 470930001001    True  
 470930008001    

#### Compare Population Totals

In [15]:
Ycomps = mfrs["Ycomp"]
ycp = Ycomps[Ycomps.variable == "population"]
ych = Ycomps[Ycomps.variable == "housing_units"]

In [16]:
acs_totalpop = ycp.acs.sum()
acs_totalpop

np.int64(178350)

In [17]:
pmedm_totalpop = ycp.pmedm.sum()
pmedm_totalpop

np.float64(177881.0973392912)

In [18]:
acs_totalhu = ych.acs.sum()
acs_totalhu

np.int64(87369)

In [19]:
pmedm_totalhu = ych.pmedm.sum()
pmedm_totalhu

np.float64(87760.50704316913)