In [4]:
"""
ACS Collection
Author: Dominic Ridley
"""

import pandas as pd
pd.set_option("display.max_rows", None)
import pickle
import 
import numpy as np


In [19]:
#Imports test table
test = pickle.load(open('test_table.p', 'rb'))
test.iloc[:, 3:] = test.iloc[:, 3:].applymap(lambda x: float(x)).clip(lower=0)


In [18]:
test


Unnamed: 0,MUNI_ID,MUNI,NAME,001E,001M,002E,002M,003E,003M,004E,...,045E,045M,046E,046M,047E,047M,048E,048M,049E,049M
258,1,ABINGTON,"Abington town, Plymouth County, Massachusetts",16330,56,7732,344,527,150,308,...,175,80,324,110,217,96,89,48,243,103
330,2,ACTON,"Acton town, Middlesex County, Massachusetts",23561,26,11434,330,460,152,781,...,420,106,465,124,472,149,141,62,284,123
123,3,ACUSHNET,"Acushnet town, Bristol County, Massachusetts",10483,18,5207,260,331,172,190,...,180,82,308,107,336,127,64,42,144,69
168,4,ADAMS,"Adams town, Berkshire County, Massachusetts",8172,22,4136,180,254,126,199,...,136,57,165,58,214,67,113,51,118,45
177,5,AGAWAM,"Agawam Town city, Hampden County, Massachusetts",28744,28,13873,408,693,192,633,...,459,130,744,167,534,146,415,176,850,184
143,6,ALFORD,"Alford town, Berkshire County, Massachusetts",421,80,212,39,6,7,4,...,19,13,29,15,12,8,9,6,0,12
46,7,AMESBURY,"Amesbury Town city, Essex County, Massachusetts",17378,22,8582,305,533,174,341,...,385,99,365,111,200,77,294,109,223,80
4,8,AMHERST,"Amherst town, Hampshire County, Massachusetts",39741,46,19641,491,406,102,364,...,199,77,438,116,248,81,231,92,386,122
66,9,ANDOVER,"Andover town, Essex County, Massachusetts",35609,35,17176,359,787,217,923,...,491,113,675,154,412,105,389,100,395,102
331,10,ARLINGTON,"Arlington town, Middlesex County, Massachusetts",45147,27,21008,479,1563,260,1475,...,698,168,989,152,882,150,604,150,724,166


In [30]:
def acs_est(estimates):
    
    return round(estimates.sum(axis=1), 2)


def acs_est_pct(estimates, unis):
    
    est_sum = estimates.sum(axis=1)
    
    uni_sum = unis.sum(axis=1)

    return round((est_sum.divide(uni_sum) * 100), 2)
    
def acs_moe(estimates):
    
    cols = estimates.columns
    zero_ind = set()
    
    for c in cols:
        zero_ind.update(set(estimates[(estimates[c] == 0)].index))

    nonzero_ind = set(estimates.index).difference(zero_ind)
                  
    moe_cols = [c.replace('E', 'M') for c in cols]
    
    #Applies moe formula [1]
    g = test.loc[nonzero_ind, moe_cols].applymap(np.square).sum(axis=1)
    
    t = estimates[(estimates[cols] == 0)].dropna(how='all')

    t_2 = t.apply(lambda x: getMOE2(x))
    t_3 = t.apply(lambda x: getMOE(x))
    t_2 = t_2.max(axis=1)

    m = (t_3.apply(np.square).sum(axis=1) + t_2.apply(np.square)).apply(np.sqrt)
    
    iz = g.append(m)
    iz = iz.apply(np.sqrt)
    iz.name = 'moe'
    estimates = estimates.join(iz)
        
    return round(estimates['moe'], 2)

def acs_moe_pct(estimates, unis):
    
    est_sum = acs_est(estimates)
    uni_sum = acs_est(unis)
    moe_sum = acs_moe(estimates)
    moe_uni_sum = acs_moe(unis)
    
    tbl_1 = moe_sum**2 - (est_sum **2 / uni_sum **2) * moe_uni_sum ** 2
    tbl_2 = moe_sum**2 + (est_sum **2 / uni_sum **2) * moe_uni_sum ** 2
    check_1 = ((moe_sum**2 - (est_sum **2 / uni_sum **2) * moe_uni_sum ** 2) < 0)
    check_2 = ((moe_sum**2 - (est_sum **2 / uni_sum **2) * moe_uni_sum ** 2) >= 0)

    y_1 = 100 / uni_sum[check_1] * np.sqrt(tbl_2[check_1])
    y_2 = 100 / uni_sum[check_2] * np.sqrt(tbl_1[check_2])
        
    return np.round_(y_1.append(y_2), 2)
    
    
    
""" Helper Functions """
def getMOE(x): #Replaces NaN with MOE estimates
    has_value = x[pd.isnull(x)].index
    x[has_value] = test.loc[has_value, x.name.replace('E', 'M')]
    return x

def getMOE2(x): #Replaces zero values with MOE estimates
    #Get values that are NaN
    not_zero = x[(np.isnan(x))].index
    df = test.loc[x.index, x.name.replace('E', 'M')]
    df[not_zero] = np.nan
    return df
    

In [31]:
df = acs_est_pct(test[['013E', '014E','015E','016E','017E','018E','019E',]], test[['001E']])
df

88     20.84
195    22.62
142    20.53
267    21.22
117    20.72
242    17.34
316    20.80
219     7.25
336    21.84
196    20.37
38     21.33
166    23.11
300    20.79
207    21.03
34     20.51
161    21.37
51     21.47
11     22.13
210    23.65
285    19.81
81     20.98
248    25.44
201    21.56
233    21.66
27     22.54
181    19.82
157    24.32
46     20.26
309    21.31
318    20.01
170    22.11
67     23.38
120    25.32
35     23.05
139    16.04
277    19.26
167    23.18
337    23.05
77     21.20
14     21.16
287    19.66
94     19.59
126    21.21
95     17.98
61     19.35
16     15.83
314    22.69
175    19.14
213    13.84
17     19.18
208    21.73
98     19.48
301    20.00
36     22.06
274    17.03
189    21.78
140    20.46
270    21.28
134    22.68
221    19.63
136    18.71
236    17.20
264    20.24
73     20.77
3      21.74
315    20.28
197    23.12
302    22.02
227    21.94
273    21.18
341    18.56
158    17.77
6      18.26
313    20.59
278    17.71
145    23.21
62     23.54