#### **Census TopDown algorithm**
Abowd, J., Ashmead, R., Simson, G., Kifer, D., Leclerc, P., Machanavajjhala, A., & Sexton, W. (2019). Census topdown: Differentially private data, incremental schemas, and consistency with public knowledge. US Census Bureau.

County level: initialization   
HHGQ (8) ∗ VOTINGAGE (2) ∗ HISPANIC (2) ∗ RACE (63)

In [27]:
import pandas as pd

filename_sf1 = 'franklin/microdata/franklin_hist_ppmf_20210608.csv'
hist_sf1 = pd.read_csv(filename_sf1)

# setup parameters
n1, n2, n3, n4 = 8, 2, 2, 63        # number of attribute combinations: HHGQ (8) ∗ VOTINGAGE (2) ∗ HISPANIC (2) ∗ RACE (63)
m2, m3, m4, m6, m7, m8, m9, m10, m11 = [], [], [], [], [], [], [], [], []
## county-level $H^0$
hist_cou = hist_sf1.sum(axis = 0).to_frame().T
hist_cou = hist_cou.drop(['GEOID10'], axis=1)

## noisy answers to workload queries
## Q1: TOTAL (1 cell) [invariant]
## Q2: CENRACE (63 cells)
for x in range(n4):     # race
    x = '{number:0{width}d}'.format(width=2, number=x)
    col_names = [col for col in hist_cou.columns if x in col[6:8]]
    m2.append(hist_cou[col_names].sum(axis=1).values[0])

## Q3: HISPANIC (2 cells)
for x in range(n3):     # hispanic
    x = '{number:0{width}d}'.format(width=2, number=x)
    col_names = [col for col in hist_cou.columns if x in col[4:6]]
    m3.append(hist_cou[col_names].sum(axis=1).values[0])

## Q4: VOTINGAGE (2 cells)
for x in range(n2):     # voting age
    x = '{number:0{width}d}'.format(width=2, number=x)
    col_names = [col for col in hist_cou.columns if x in col[2:4]]
    m4.append(hist_cou[col_names].sum(axis=1).values[0])

## Q6: HHGQ (8 cells)
for x in range(n1):  # hhgq
    x = '{number:0{width}d}'.format(width=2, number=x)
    col_names = [col for col in hist_cou.columns if x in col[0:2]]
    m6.append(hist_cou[col_names].sum(axis=1).values[0])

## Q7: HISPANIC*CENRACE (126 cells)
for x in range(n3):     # hispanic
    x = '{number:0{width}d}'.format(width=2, number=x)
    for y in range(n4):     # race
        y = '{number:0{width}d}'.format(width=2, number=y)
        col_names = [col for col in hist_cou.columns if x in col[4:6] and y in col[6:8]]
        m7.append(hist_cou[col_names].sum(axis=1).values[0])

## Q8: VOTINGAGE*CENRACE (126 cells)
for x in range(n2):     # voting age
    x = '{number:0{width}d}'.format(width=2, number=x)
    for y in range(n4):     # race
        y = '{number:0{width}d}'.format(width=2, number=y)
        col_names = [col for col in hist_cou.columns if x in col[2:4] and y in col[6:8]]
        m8.append(hist_cou[col_names].sum(axis=1).values[0])

## Q9: VOTINGAGE*HISPANIC (4 cells)
for x in range(n2):     # voting age
    x = '{number:0{width}d}'.format(width=2, number=x)
    for y in range(n3):     # hispanic
        y = '{number:0{width}d}'.format(width=2, number=y)
        col_names = [col for col in hist_cou.columns if x in col[2:4] and y in col[4:6]]
        m9.append(hist_cou[col_names].sum(axis=1).values[0])

## Q10: VOTINGAGE*HISPANIC*CENRACE (252 cells)
for x in range(n2):     # voting age
    x = '{number:0{width}d}'.format(width=2, number=x)
    for y in range(n3):     # hispanic
        y = '{number:0{width}d}'.format(width=2, number=y)
        for z in range(n4):     # race
            z = '{number:0{width}d}'.format(width=2, number=z)
            col_names = [col for col in hist_cou.columns if x in col[2:4] and y in col[4:6] and z in col[6:8]]
            print(col_names)
            m10.append(hist_cou[col_names].sum(axis=1).values[0])

## Q11: HHGQ*VOTINGAGE*HISPANIC*CENRACE (2,016 cells)
m11 = hist_cou.to_numpy()

print(len(m2), len(m3), len(m4), len(m6), len(m7), len(m8), len(m9), len(m10), len(m11))

['00000000', '01000000', '02000000', '03000000', '04000000', '05000000', '06000000', '07000000']
['00000001', '01000001', '02000001', '03000001', '04000001', '05000001', '06000001', '07000001']
['00000002', '01000002', '02000002', '03000002', '04000002', '05000002', '06000002', '07000002']
['00000003', '01000003', '02000003', '03000003', '04000003', '05000003', '06000003', '07000003']
['00000004', '01000004', '02000004', '03000004', '04000004', '05000004', '06000004', '07000004']
['00000005', '01000005', '02000005', '03000005', '04000005', '05000005', '06000005', '07000005']
['00000006', '01000006', '02000006', '03000006', '04000006', '05000006', '06000006', '07000006']
['00000007', '01000007', '02000007', '03000007', '04000007', '05000007', '06000007', '07000007']
['00000008', '01000008', '02000008', '03000008', '04000008', '05000008', '06000008', '07000008']
['00000009', '01000009', '02000009', '03000009', '04000009', '05000009', '06000009', '07000009']
['00000010', '01000010', '0200