In [1]:
# Import standard packages.
import os
# Import installed packages.
import numpy as np
import pandas as pd

In [18]:
# Load the 2013 5-year PUMS housing records for Washington DC
# and the 2013 5-year PUMS user verification estimates.
path_acs = r'/mnt/disk-20151227t211000z/www2-census-gov/programs-surveys/acs/'
dfh = pd.read_csv(os.path.join(path_acs, r'data/pums/2013/5-Year/ss13hdc.csv'))
dfe = pd.read_csv(os.path.join(path_acs, r'tech_docs/pums/estimates/pums_estimates_9_13.csv'))
# Select the verification estimates for Washington DC
# then for the "characteristic" 'Owner occupied units (TEN in 1,2)'.
tfmask_dc = dfe['state'] == 'District of Columbia'
dfe_dc = dfe.loc[tfmask_dc]
print("This exammple verifies these quantities:")
dfe_dc.loc[[310]]

This exammple verifies these quantities:


Unnamed: 0,st,state,characteristic,pums_est_09_to_13,pums_se_09_to_13,pums_moe_09_to_13
310,11,District of Columbia,"Owner occupied units (TEN in 1,2)",110362,1363,2242


In [16]:
# Define the column names for the housing weights.
hwt = 'WGTP'
hwts = [hwt+str(inum) for inum in range(1, 81)] # ['PWGTP1', ..., 'PWGTP80']
# Select the reference verification data for the characteristic.
char = 'Owner occupied units (TEN in 1,2)'
print("'{char}'".format(char=char))
tfmask_ref = dfe_dc['characteristic'] == char
# Select the person records for the characteristic.
tfmask_test = np.logical_or(dfh['TEN'] == 1, dfh['TEN'] == 2)
# Calculate the estimate ('est') for the characteristic.
# The estimate is sum with weights 'PWGTP'.
col = 'pums_est_09_to_13'
print("    '{col}':".format(col=col), end=' ')
ref_est = int(dfe_dc.loc[tfmask_ref, col].values[0].replace(',', ''))
test_est = dfh.loc[tfmask_test, hwt].sum()
assert np.isclose(ref_est, test_est, rtol=0, atol=1)
print("(ref, test) = {tup}".format(tup=(ref_est, test_est)))
# Calculate the "direct standard error" ('se') of the estimate.
# The direct standard error is a modified root-mean-square deviation
# using "replicate weights" 'PWGTP[1-80]'.
col = 'pums_se_09_to_13'
print("    '{col}' :".format(col=col), end=' ')
ref_se = dfe_dc.loc[tfmask_ref, col].values[0]
test_se = ((4/80)*((dfh.loc[tfmask_test, hwts].sum() - test_est)**2).sum())**0.5
assert np.isclose(ref_se, test_se, rtol=0, atol=1)
print("(ref, test) = {tup}".format(tup=(ref_se, test_se)))
# Calculate the margin of error ('moe') at the 90% confidence level
# (+/- 1.645 standard errors)
col = 'pums_moe_09_to_13'
print("    '{col}':".format(col=col), end=' ')
ref_moe = dfe_dc.loc[tfmask_ref, col].values[0]
test_moe = 1.645*test_se
assert np.isclose(ref_moe, test_moe, rtol=0, atol=1)
print("(ref, test) = {tup}".format(tup=(ref_moe, test_moe)))

'Owner occupied units (TEN in 1,2)'
    'pums_est_09_to_13': (ref, test) = (110362, 110362)
    'pums_se_09_to_13' : (ref, test) = (1363, 1363.1910174293257)
    'pums_moe_09_to_13': (ref, test) = (2242, 2242.449223671241)
