In [1]:
import pandas as pd
import numpy as np
import glob
import matplotlib.pyplot as plt

In [2]:
NUM_RUNS = 8
col_names = ['TOTPOP', 'HISP', 'NH_WHITE', 'NH_BLACK', 'NH_AMIN', 
             'NH_ASIAN', 'NH_NHPI', 'NH_OTHER', 'NH_2MORE',
             'VAP', 'HVAP', 'WVAP', 'BVAP', 'AMINVAP', 'ASIANVAP', 
             'NHPIVAP', 'OTHERVAP', '2MOREVAP']
epsilon_values = [0.25, 0.5, 1, 2]
epsilon_splits = ["equal", "top_heavy", "mid_heavy", "bottom_heavy"]

In [3]:
for eps in epsilon_values:
    for split in epsilon_splits:
        fs = glob.glob("DallasRuns/dallas_county_2010_POP_VAP_eps_{}_{}_split_run_*.npy".format(eps, split))
        print(eps, split)
        runs = pd.DataFrame()
        for i, f in enumerate(fs):
            data = np.load(f, allow_pickle=True)
            for j in range(len(data)):
                blocks = {k:v for k,v in data[j].items() if len(k) == 15}
                counts = np.zeros((len(blocks), len(col_names)))
                geoids = np.zeros(len(blocks), dtype=object)
                for k, (geoid, pop_cols) in enumerate(blocks.items()):
                    geoids[k] = geoid
                    counts[k] = np.append(pop_cols["TOTPOP"], pop_cols["VAP"])
                df = pd.DataFrame(counts, 
                              columns=col_names, 
                              index=geoids).reset_index().rename(columns={"index": "GEOID"}).assign(run=i*NUM_RUNS+j, 
                                                                                                    epsilon=eps, 
                                                                                                    split=split)
                runs = runs.append(df)
                print(i*NUM_RUNS+j, runs.shape)
        runs.to_csv("results/noised_runs_{}_{}.csv".format(eps, split), index=False)

0.25 equal
0 (44113, 22)
1 (88226, 22)
2 (132339, 22)
3 (176452, 22)
4 (220565, 22)
5 (264678, 22)
6 (308791, 22)
7 (352904, 22)
8 (397017, 22)
9 (441130, 22)
10 (485243, 22)
11 (529356, 22)
12 (573469, 22)
13 (617582, 22)
14 (661695, 22)
15 (705808, 22)
16 (749921, 22)
17 (794034, 22)
18 (838147, 22)
19 (882260, 22)
20 (926373, 22)
21 (970486, 22)
22 (1014599, 22)
23 (1058712, 22)
24 (1102825, 22)
25 (1146938, 22)
26 (1191051, 22)
27 (1235164, 22)
28 (1279277, 22)
29 (1323390, 22)
30 (1367503, 22)
31 (1411616, 22)
0.25 top_heavy
0 (44113, 22)
1 (88226, 22)
2 (132339, 22)
3 (176452, 22)
4 (220565, 22)
5 (264678, 22)
6 (308791, 22)
7 (352904, 22)
8 (397017, 22)
9 (441130, 22)
10 (485243, 22)
11 (529356, 22)
12 (573469, 22)
13 (617582, 22)
14 (661695, 22)
15 (705808, 22)
16 (749921, 22)
17 (794034, 22)
18 (838147, 22)
19 (882260, 22)
20 (926373, 22)
21 (970486, 22)
22 (1014599, 22)
23 (1058712, 22)
24 (1102825, 22)
25 (1146938, 22)
26 (1191051, 22)
27 (1235164, 22)
28 (1279277, 22)
29 (1

22 (1014599, 22)
23 (1058712, 22)
24 (1102825, 22)
25 (1146938, 22)
26 (1191051, 22)
27 (1235164, 22)
28 (1279277, 22)
29 (1323390, 22)
30 (1367503, 22)
31 (1411616, 22)


In [4]:
runs.head()

Unnamed: 0,GEOID,TOTPOP,HISP,NH_WHITE,NH_BLACK,NH_AMIN,NH_ASIAN,NH_NHPI,NH_OTHER,NH_2MORE,...,WVAP,BVAP,AMINVAP,ASIANVAP,NHPIVAP,OTHERVAP,2MOREVAP,run,epsilon,split
0,481130078091007,1.236751,0.5950679,4.547268e-10,0.1045124,0.0,1.779867e-07,0.0,2.193474e-09,0.5371708,...,4.037813e-10,1.602047e-10,0.0,1.779087e-07,0.0,8.290582e-11,0.5371708,0,2,bottom_heavy
1,481130078091004,3.045803e-09,1.879555e-10,7.149362e-10,4.449413e-10,0.0,1.379126e-09,0.0,1.700959e-10,1.487479e-10,...,6.589382e-10,5.684007e-11,0.0,1.26401e-09,0.0,5.425121e-11,1.487476e-10,0,2,bottom_heavy
2,481130078091008,278.777,62.77246,61.30985,137.3614,0.0,8.21907,0.0,3.527251,5.58698,...,59.62733,103.5651,0.0,7.725136,0.0,1.720089,5.58698,0,2,bottom_heavy
3,481130078091015,69.29919,32.1543,5.201583,31.13856,0.0,0.8047423,0.0,1.085193e-10,3.195054e-10,...,5.201583,24.12912,0.0,0.8047423,0.0,4.868202e-11,3.195051e-10,0,2,bottom_heavy
4,481130078091003,0.1779502,1.228253e-10,1.819826e-10,1.012685e-10,0.0,9.73123e-11,0.0,7.926049e-11,0.1779502,...,8.960681e-11,3.261437e-11,0.0,4.586374e-11,0.0,2.697051e-11,0.1779502,0,2,bottom_heavy


In [7]:
epsilon_values[1:]

[0.5, 1, 2]

In [8]:
for eps in epsilon_values[1:]:
    for split in epsilon_splits:
        fs = glob.glob("DallasRuns_allow_neg/dallas_county_2010_POP_VAP_eps_{}_{}_split_run_*_allow_neg.npy".format(eps, split))
        print(eps, split)
        runs = pd.DataFrame()
        for i, f in enumerate(fs):
            data = np.load(f, allow_pickle=True)
            for j in range(len(data)):
                blocks = {k:v for k,v in data[j].items() if len(k) == 15}
                counts = np.zeros((len(blocks), len(col_names)))
                geoids = np.zeros(len(blocks), dtype=object)
                for k, (geoid, pop_cols) in enumerate(blocks.items()):
                    geoids[k] = geoid
                    counts[k] = np.append(pop_cols["TOTPOP"], pop_cols["VAP"])
                df = pd.DataFrame(counts, 
                              columns=col_names, 
                              index=geoids).reset_index().rename(columns={"index": "GEOID"}).assign(run=i*NUM_RUNS+j, 
                                                                                                    epsilon=eps, 
                                                                                                    split=split)
                runs = runs.append(df)
                print(i*NUM_RUNS+j, runs.shape)
        runs.to_csv("results/noised_runs_allow_neg_{}_{}.csv".format(eps, split), index=False)

0.5 equal
0 (44113, 22)
1 (88226, 22)
2 (132339, 22)
3 (176452, 22)
4 (220565, 22)
5 (264678, 22)
6 (308791, 22)
7 (352904, 22)
8 (397017, 22)
9 (441130, 22)
10 (485243, 22)
11 (529356, 22)
12 (573469, 22)
13 (617582, 22)
14 (661695, 22)
15 (705808, 22)
16 (749921, 22)
17 (794034, 22)
18 (838147, 22)
19 (882260, 22)
20 (926373, 22)
21 (970486, 22)
22 (1014599, 22)
23 (1058712, 22)
24 (1102825, 22)
25 (1146938, 22)
26 (1191051, 22)
27 (1235164, 22)
28 (1279277, 22)
29 (1323390, 22)
30 (1367503, 22)
31 (1411616, 22)
0.5 top_heavy
0 (44113, 22)
1 (88226, 22)
2 (132339, 22)
3 (176452, 22)
4 (220565, 22)
5 (264678, 22)
6 (308791, 22)
7 (352904, 22)
8 (397017, 22)
9 (441130, 22)
10 (485243, 22)
11 (529356, 22)
12 (573469, 22)
13 (617582, 22)
14 (661695, 22)
15 (705808, 22)
16 (749921, 22)
17 (794034, 22)
18 (838147, 22)
19 (882260, 22)
20 (926373, 22)
21 (970486, 22)
22 (1014599, 22)
23 (1058712, 22)
24 (1102825, 22)
25 (1146938, 22)
26 (1191051, 22)
27 (1235164, 22)
28 (1279277, 22)
29 (132

### Gater reconstructed data

In [4]:
col_names = ['TOTPOP', 'HISP', 'NH_WHITE', 'NH_BLACK', 'NH_AMIN', 
             'NH_ASIAN', 'NH_NHPI', 'NH_OTHER*',
             'VAP', 'HVAP', 'WVAP', 'BVAP', 'AMINVAP', 'ASIANVAP', 
             'NHPIVAP', 'OTHERVAP*']

In [5]:
for neg in ["allow_neg", "non_neg"]:
    for eps in epsilon_values:
        for split in epsilon_splits:
            fs = glob.glob("DallasRunsRecon/dallas_county_recon_POP_VAP_eps_{}_{}_split_run_*_{}.npy".format(eps, split, neg))
            print(eps, split)
            runs = pd.DataFrame()
            for i, f in enumerate(fs):
                data = np.load(f, allow_pickle=True)
                for j in range(len(data)):
                    blocks = {k:v for k,v in data[j].items() if len(k) == 15}
                    counts = np.zeros((len(blocks), len(col_names)))
                    geoids = np.zeros(len(blocks), dtype=object)
                    for k, (geoid, pop_cols) in enumerate(blocks.items()):
                        geoids[k] = geoid
                        counts[k] = np.append(pop_cols["TOTPOP"], pop_cols["VAP"])
                    df = pd.DataFrame(counts, 
                                  columns=col_names, 
                                  index=geoids).reset_index().rename(columns={"index": "GEOID"}).assign(run=i*NUM_RUNS+j, 
                                                                                                        epsilon=eps, 
                                                                                                        split=split)
                    runs = runs.append(df)
                    print(i*NUM_RUNS+j, runs.shape)
            runs.to_csv("results/noised_recon_runs_{}_{}_{}.csv".format(neg, eps, split), index=False)

0.25 equal
0 (44113, 20)
1 (88226, 20)
2 (132339, 20)
3 (176452, 20)
4 (220565, 20)
5 (264678, 20)
6 (308791, 20)
7 (352904, 20)
8 (397017, 20)
9 (441130, 20)
10 (485243, 20)
11 (529356, 20)
12 (573469, 20)
13 (617582, 20)
14 (661695, 20)
15 (705808, 20)
16 (749921, 20)
17 (794034, 20)
18 (838147, 20)
19 (882260, 20)
20 (926373, 20)
21 (970486, 20)
22 (1014599, 20)
23 (1058712, 20)
24 (1102825, 20)
25 (1146938, 20)
26 (1191051, 20)
27 (1235164, 20)
28 (1279277, 20)
29 (1323390, 20)
30 (1367503, 20)
31 (1411616, 20)
0.25 top_heavy
0 (44113, 20)
1 (88226, 20)
2 (132339, 20)
3 (176452, 20)
4 (220565, 20)
5 (264678, 20)
6 (308791, 20)
7 (352904, 20)
8 (397017, 20)
9 (441130, 20)
10 (485243, 20)
11 (529356, 20)
12 (573469, 20)
13 (617582, 20)
14 (661695, 20)
15 (705808, 20)
16 (749921, 20)
17 (794034, 20)
18 (838147, 20)
19 (882260, 20)
20 (926373, 20)
21 (970486, 20)
22 (1014599, 20)
23 (1058712, 20)
24 (1102825, 20)
25 (1146938, 20)
26 (1191051, 20)
27 (1235164, 20)
28 (1279277, 20)
29 (1

22 (1014599, 20)
23 (1058712, 20)
24 (1102825, 20)
25 (1146938, 20)
26 (1191051, 20)
27 (1235164, 20)
28 (1279277, 20)
29 (1323390, 20)
30 (1367503, 20)
31 (1411616, 20)
0.25 equal
0 (44113, 20)
1 (88226, 20)
2 (132339, 20)
3 (176452, 20)
4 (220565, 20)
5 (264678, 20)
6 (308791, 20)
7 (352904, 20)
8 (397017, 20)
9 (441130, 20)
10 (485243, 20)
11 (529356, 20)
12 (573469, 20)
13 (617582, 20)
14 (661695, 20)
15 (705808, 20)
16 (749921, 20)
17 (794034, 20)
18 (838147, 20)
19 (882260, 20)
20 (926373, 20)
21 (970486, 20)
22 (1014599, 20)
23 (1058712, 20)
24 (1102825, 20)
25 (1146938, 20)
26 (1191051, 20)
27 (1235164, 20)
28 (1279277, 20)
29 (1323390, 20)
30 (1367503, 20)
31 (1411616, 20)
0.25 top_heavy
0 (44113, 20)
1 (88226, 20)
2 (132339, 20)
3 (176452, 20)
4 (220565, 20)
5 (264678, 20)
6 (308791, 20)
7 (352904, 20)
8 (397017, 20)
9 (441130, 20)
10 (485243, 20)
11 (529356, 20)
12 (573469, 20)
13 (617582, 20)
14 (661695, 20)
15 (705808, 20)
16 (749921, 20)
17 (794034, 20)
18 (838147, 20)
19

12 (573469, 20)
13 (617582, 20)
14 (661695, 20)
15 (705808, 20)
16 (749921, 20)
17 (794034, 20)
18 (838147, 20)
19 (882260, 20)
20 (926373, 20)
21 (970486, 20)
22 (1014599, 20)
23 (1058712, 20)
24 (1102825, 20)
25 (1146938, 20)
26 (1191051, 20)
27 (1235164, 20)
28 (1279277, 20)
29 (1323390, 20)
30 (1367503, 20)
31 (1411616, 20)
