In [46]:
"""
ACS Collection
Author: Dominic Ridley
"""

import pandas as pd
pd.set_option("display.max_rows", None)
import pickle
import numpy as np
import math

In [2]:
#Imports test table
test = pickle.load(open('test_table.p', 'rb'))
test.iloc[:, 3:] = test.iloc[:, 3:].applymap(lambda x: float(x)).clip(lower=0)


In [55]:
test

Unnamed: 0,MUNI_ID,MUNI,NAME,001E,001M,002E,002M,003E,003M,004E,...,105E,105M,106E,106M,107E,107M,108E,108M,109E,109M
78,1,ABINGTON,"Abington town, Plymouth County, Massachusetts",16330.0,56.0,0.0,19.0,13.0,21.0,0.0,...,0.0,19.0,0.0,19.0,0.0,19.0,1542.0,391.0,2545.0,479.0
57,2,ACTON,"Acton town, Middlesex County, Massachusetts",23561.0,26.0,0.0,23.0,0.0,23.0,0.0,...,32.0,53.0,0.0,23.0,3.0,6.0,7589.0,603.0,3450.0,582.0
105,3,ACUSHNET,"Acushnet town, Bristol County, Massachusetts",10483.0,18.0,0.0,19.0,0.0,19.0,0.0,...,0.0,19.0,0.0,19.0,0.0,19.0,661.0,344.0,1017.0,320.0
320,4,ADAMS,"Adams town, Berkshire County, Massachusetts",8172.0,22.0,0.0,17.0,5.0,8.0,0.0,...,0.0,17.0,0.0,17.0,0.0,17.0,1282.0,329.0,1124.0,245.0
329,5,AGAWAM,"Agawam Town city, Hampden County, Massachusetts",28744.0,28.0,0.0,23.0,303.0,233.0,0.0,...,0.0,23.0,0.0,23.0,0.0,23.0,3414.0,602.0,4046.0,709.0
295,6,ALFORD,"Alford town, Berkshire County, Massachusetts",421.0,80.0,0.0,12.0,0.0,12.0,0.0,...,0.0,12.0,0.0,12.0,0.0,12.0,49.0,21.0,78.0,32.0
207,7,AMESBURY,"Amesbury Town city, Essex County, Massachusetts",17378.0,22.0,0.0,19.0,0.0,19.0,0.0,...,0.0,19.0,0.0,19.0,0.0,19.0,2279.0,497.0,2287.0,442.0
8,8,AMHERST,"Amherst town, Hampshire County, Massachusetts",39741.0,46.0,7.0,16.0,125.0,59.0,0.0,...,53.0,71.0,5.0,9.0,0.0,26.0,8543.0,768.0,10835.0,912.0
227,9,ANDOVER,"Andover town, Essex County, Massachusetts",35609.0,35.0,0.0,26.0,8.0,16.0,26.0,...,13.0,22.0,0.0,26.0,8.0,12.0,7484.0,760.0,4751.0,605.0
58,10,ARLINGTON,"Arlington town, Middlesex County, Massachusetts",45147.0,27.0,0.0,26.0,82.0,78.0,0.0,...,80.0,91.0,13.0,21.0,15.0,26.0,10995.0,892.0,5695.0,676.0


In [36]:
def acs_est(estimates):
    
    return round(estimates.sum(axis=1), 2)


def acs_est_pct(estimates, unis):
    
    est_sum = estimates.sum(axis=1)
    
    uni_sum = unis.sum(axis=1)

    return round((est_sum.divide(uni_sum) * 100), 2)
    
def acs_moe(estimates):
    
    cols = estimates.columns
    zero_ind = set()
    
    for c in cols:
        zero_ind.update(set(estimates[(estimates[c] == 0)].index))

    #Isolates the indexes of rows where the estimate is zero
    nonzero_ind = set(estimates.index).difference(zero_ind)
                  
    moe_cols = [c.replace('E', 'M') for c in cols]
    
    #Applies first part of moe formula [1] to non-zero rows
    g = test.loc[nonzero_ind, moe_cols].applymap(np.square).sum(axis=1)
    #display(test.loc[:, moe_cols])
    
    t = estimates[(estimates[cols] == 0)].dropna(how='all')

    t_2 = t.apply(lambda x: getMOE2(x))
    t_3 = t.apply(lambda x: getMOE(x))
    t_2 = t_2.max(axis=1)

    #display(g, t, t_2, t_3)
    m = (t_3.apply(np.square).sum(axis=1) + t_2.apply(np.square)) #.apply(np.sqrt)
    
    iz = g.append(m)
    iz = iz.apply(np.sqrt)
    iz.name = 'moe'
    estimates = estimates.join(iz)
        
    return round(estimates['moe'], 2)

def acs_moe_pct(estimates, unis):
    
    est_sum = acs_est(estimates)
    uni_sum = acs_est(unis)
    moe_sum = acs_moe(estimates)
    moe_uni_sum = acs_moe(unis)
    
    #display(est_sum)
    print((est_sum **2 / uni_sum **2))
    tbl_1 = moe_sum**2 - (est_sum **2 / uni_sum **2) * moe_uni_sum ** 2
    tbl_2 = moe_sum**2 + (est_sum **2 / uni_sum **2) * moe_uni_sum ** 2
    check_1 = ((moe_sum**2 - (est_sum **2 / uni_sum **2) * moe_uni_sum ** 2) < 0)
    check_2 = ((moe_sum**2 - (est_sum **2 / uni_sum **2) * moe_uni_sum ** 2) >= 0)

    y_1 = 100 / uni_sum[check_1] * np.sqrt(tbl_2[check_1])
    y_2 = 100 / uni_sum[check_2] * np.sqrt(tbl_1[check_2])
        
    return np.round_(y_1.append(y_2), 2)
    
    
    
""" Helper Functions """
def getMOE(x): #Replaces NaN with MOE estimates
    has_value = x[pd.isnull(x)].index
    x[has_value] = test.loc[has_value, x.name.replace('E', 'M')]
    return x

def getMOE2(x): #Replaces zero values with MOE estimates
    #Get values that are NaN
    not_zero = x[(np.isnan(x))].index
    df = test.loc[x.index, x.name.replace('E', 'M')]
    df[not_zero] = np.nan
    
    
    
    
    
    
    
    
    
    return df
    

In [58]:
#g, t, t_2, t_3, df = acs_moe(test[['002E']])
(100/16330) * math.sqrt(21**2 + (13**2 / 16339**2) * (56 **2))

0.12859796244601843

In [51]:
56**2

3136

In [40]:
g = acs_moe_pct(test[['003E']], test[['001E']])

78     6.337447e-07
57     0.000000e+00
105    0.000000e+00
320    3.743547e-07
329    1.111196e-04
295    0.000000e+00
207    0.000000e+00
8      9.893329e-06
227    5.047315e-08
58     3.298906e-06
142    0.000000e+00
28     0.000000e+00
251    0.000000e+00
69     1.088970e-05
138    7.316387e-09
124    9.876422e-08
155    2.255864e-05
278    2.234631e-04
72     0.000000e+00
203    7.211274e-06
185    0.000000e+00
301    0.000000e+00
63     0.000000e+00
22     0.000000e+00
294    1.827009e-06
43     1.074844e-06
120    0.000000e+00
150    6.322880e-05
260    0.000000e+00
209    1.698788e-05
32     3.934615e-07
171    0.000000e+00
332    0.000000e+00
139    1.765412e-05
1      7.608017e-06
195    8.844320e-06
29     3.443506e-05
228    0.000000e+00
181    5.508549e-05
281    2.472260e-05
205    0.000000e+00
84     8.526562e-08
338    0.000000e+00
85     9.883461e-08
165    0.000000e+00
283    3.448603e-08
265    0.000000e+00
37     0.000000e+00
75     6.604680e-07
284    5.052487e-06


In [41]:
g

78      0.13
57      0.10
105     0.18
320     0.10
329     0.81
295     2.85
207     0.11
8       0.15
227     0.04
58      0.17
142     0.27
28      0.37
251     0.78
69      0.28
138     0.02
124     0.05
155     0.34
278     1.94
72      0.21
203     0.24
185     0.31
301     0.65
63      0.13
22      0.13
294     0.16
43      0.10
120     0.25
150     1.02
260     0.59
209     0.35
32      0.06
171     0.18
332     1.02
139     0.59
1       0.08
195     0.23
29      0.81
228     0.21
181     0.57
281     0.36
205     0.17
84      0.05
338     0.32
85      0.03
165     0.35
283     0.03
265     0.63
37      0.08
75      0.06
284     0.38
70      0.33
88      0.16
252     1.06
140     0.24
193     0.28
52      0.05
2       0.24
323     0.38
346     0.81
10      0.89
348     0.08
126     1.24
317     0.69
177     0.14
270     0.20
266     0.67
59      0.14
253     0.64
16      1.37
326     0.26
232     0.71
121     0.11
273     0.48
264     0.34
196     0.11
108     0.22
166     1.67