In [1]:
"""
ACS Collection
Author: Dominic Ridley
"""
from builtins import any as b_any
import sphinx
import pandas as pd
pd.set_option("display.max_rows", None)
import requests
import pickle
import psycopg2
import sys
from psycopg2 import OperationalError, errorcodes, errors
from IPython.display import display 
import time
import numpy as np
import acs_functions
from config import config

# CensusAPI 
- class
## Methods:
- init
- load_table_censusvars
- load_geo_ids
- connect_database
- call_api

In [2]:
class CensusAPI:
    """
    Contains methods for accessing Census API, loading geo ids of municipalities and retrieving Census tables.
    """
    
    def __init__(self, year): #
        """ 
        Intializes API parameter variables.
        
        Parameters: 
            year: the year of the Census table that's being retrieved
        
        Returns:
            class object
        
        """
        
        self.api_key = '766973dcdc26460a63ee43b8bfed1d1c4692486a'
        self.call = 'https://api.census.gov/data/2018/acs/acs5?get=group({4})&' \
                        + 'for=county%20subdivision:{2}&in=state:{0}%20county:{1}&key={3}'
        self.year = year
        
        """ Loads the census variables used for each table """
        #self.ids_dict = pickle.load(open('pickles/table_to_censusid.p', "rb"))
        
        ### Note: this may no longer be used
        self.cols_dict = pickle.load(open('pickles/cols_ids_dict.p', "rb")) #Would leave in for a referencing ids in dict form
        #self.col_uni_dict = pickle.load(open('pickles/col_uni_dict2.p', 'rb'))
        
        
        """ Census ids for each column in each table. Index is set to table name. """
        self.column_ids = pd.read_csv('csv/column_ids2.csv', index_col=['table'], skipinitialspace=True).drop(columns=['Unnamed: 0'])
        
        """ Processes ids as a list of id strings """
        self.column_ids[['ids', 'estimates', 'universe']] = self.column_ids[['ids', 'estimates', 'universe']] \
                            .applymap(lambda x: "".join([d for d in str(x) if d.isdigit() or d == ',']).split(','))
       
    def load_table_censusvars(self, table):
        """ 
        Loads the census variables used for table 
        
        Parameters:
            table: name of table
            
        Returns:
            ids: list
            names: list
        """
        pass
        #dict_ = c.ids_dict[table]
        #ids, names = list(map(list, zip(*dict_)))
        #return ids, names
        
    def load_geoids(self):
        """ 
        Loads geoids for geographical level (municipality, tract, block)
        
        Returns:
            full_table: pandas DataFrame
        
        """
        
        full_table = pd.read_excel('K:\DataServices\Datasets\Data Keys\Census_MuniName_County_State_ID.xls', \
                                   dtype={'GEOID': str})
        return full_table[['GEOID', 'MUNI_ID', 'MUNI']]
    
    def connect_database(self, server):
        """ Connect to the PostgreSQL database server """
        
        conn = None
        cur = None
        try:
            # read connection parameters
            params = config(section=server)

            # connect to the PostgreSQL server
            #print('Connecting to the PostgreSQL database...')
            conn = psycopg2.connect(**params)
            print("Connected to " + params['database'])
            # create a cursor
            cur = conn.cursor()

        except (Exception, psycopg2.DatabaseError) as error:
            print(error)

        return conn, cur
    
    def call_api(self, table, group='B01001'):
        """ Calls Census API by table and group """
        
        #Loads GEOIDS to map to municipalities
        geoid_table = self.load_geoids()

        #Creates set of county codes to drop any duplicate values
        counties = set([str(x)[2:5] for x in geoid_table['GEOID']])
        
        #Intializes empty dataframe
        out_df = pd.DataFrame()
        
        col_names = []
        
        #Iterates through the counties, calls census api for pop. estimates and appends to dataframe
        for i in counties:
            
            #Calls Census API with state code: 25, county: i, subdivisions: *global and groups
            request_url = self.call.format('25', i, '*', self.api_key, group)
            out = requests.get(request_url).json()
            
            if out_df.empty:
                out_df = out_df.append(out[1:], ignore_index=True)
                col_names = out[0]
            else:
                out_df = out_df.append(out[1:], ignore_index=True)

        
        out_df.columns = col_names
        
        ### Data Preprocessing ###
        e_m_columns = [i[-4:] for i in out_df.columns if (i[-1] in ['E', 'M']) & (i not in ['NAME'])]
        cols_to_rename = [i for i in out_df.columns if (i[-1] in ['E', 'M']) & (i not in ['NAME'])]
        
        out_df = out_df.rename(columns=dict(zip(cols_to_rename, e_m_columns)))
        out_df = out_df[['NAME', 'GEO_ID']+e_m_columns]
        
        out_df = out_df.rename(columns={'GEO_ID': 'GEOID'})
        
        out_df['GEOID'] = out_df['GEOID'].apply(lambda x: x[-10:])
        
        final_df = out_df.copy()
        ###
        
        #Joins tables on 'GEOID' to get municipality names
        final_df = final_df.merge(geoid_table, on='GEOID', how='inner')
        final_df = final_df[['MUNI_ID', 'MUNI', 'NAME']+e_m_columns].sort_values('MUNI_ID')
        return final_df

In [3]:
c = CensusAPI('2018')

In [4]:
#pickle.dump(c.col_uni_dict, open('col_uni_dict2.p', 'wb'))
c.cols_dict['b05003_citizenship_nativity_by_age_gender_acs_m']

{'pop': {'ids': {'001'}, 'moe': False, 'percent': False},
 'pop_me': {'ids': {'001'}, 'moe': True, 'percent': False},
 'pop_u18': {'ids': {'003', '014'}, 'moe': False, 'percent': False},
 'pop_u18me': {'ids': {'003', '014'}, 'moe': True, 'percent': False},
 'ntv_u18': {'ids': {'004', '015'}, 'moe': False, 'percent': False},
 'ntv_u18me': {'ids': {'004', '015'}, 'moe': True, 'percent': False},
 'fb_u18': {'ids': {'005', '016'}, 'moe': False, 'percent': False},
 'fb_u18me': {'ids': {'005', '016'}, 'moe': True, 'percent': False},
 'ntr_u18': {'ids': {'006', '017'}, 'moe': False, 'percent': False},
 'ntr_u18me': {'ids': {'006', '017'}, 'moe': True, 'percent': False},
 'non_u18': {'ids': {'007', '018'}, 'moe': False, 'percent': False},
 'non_u18me': {'ids': {'007', '018'}, 'moe': True, 'percent': False},
 'pop_u18_p': {'ids': {'001', '003', '014'}, 'moe': False, 'percent': True},
 'pop_u18mep': {'ids': {'001', '003', '014'}, 'moe': True, 'percent': True},
 'ntv_u18_p': {'ids': {'003', '004'

In [5]:
#[x for x in c.ids_dict.keys()]
c.column_ids.loc['b05003_citizenship_nativity_by_age_gender_acs_m']

Unnamed: 0_level_0,col,group,ids,moe,percent,estimates,universe
table,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
b05003_citizenship_nativity_by_age_gender_acs_m,pop,b05003,[001],False,False,[001],[]
b05003_citizenship_nativity_by_age_gender_acs_m,pop_me,b05003,[001],True,False,[001],[]
b05003_citizenship_nativity_by_age_gender_acs_m,pop_u18,b05003,"[003, 014]",False,False,"[003, 014]",[]
b05003_citizenship_nativity_by_age_gender_acs_m,pop_u18me,b05003,"[003, 014]",True,False,"[003, 014]",[]
b05003_citizenship_nativity_by_age_gender_acs_m,ntv_u18,b05003,"[015, 004]",False,False,"[004, 015]",[]
b05003_citizenship_nativity_by_age_gender_acs_m,ntv_u18me,b05003,"[015, 004]",True,False,"[004, 015]",[]
b05003_citizenship_nativity_by_age_gender_acs_m,fb_u18,b05003,"[016, 005]",False,False,"[005, 016]",[]
b05003_citizenship_nativity_by_age_gender_acs_m,fb_u18me,b05003,"[016, 005]",True,False,"[005, 016]",[]
b05003_citizenship_nativity_by_age_gender_acs_m,ntr_u18,b05003,"[006, 017]",False,False,"[006, 017]",[]
b05003_citizenship_nativity_by_age_gender_acs_m,ntr_u18me,b05003,"[006, 017]",True,False,"[006, 017]",[]


In [6]:
conn, cur = c.connect_database('sdvm')

Connected to ds


In [7]:
hello = c.call_api('b04006_reported_ancestry_acs_m', 'B04006')

In [8]:
#pickle.dump(hello, open('test_table.p', 'wb'))

In [9]:
hello.iloc[:, 3:] = hello.iloc[:, 3:].applymap(lambda x: float(x)).clip(lower=0)
hello

Unnamed: 0,MUNI_ID,MUNI,NAME,001E,001M,002E,002M,003E,003M,004E,...,105E,105M,106E,106M,107E,107M,108E,108M,109E,109M
46,1,ABINGTON,"Abington town, Plymouth County, Massachusetts",16330.0,56.0,0.0,19.0,13.0,21.0,0.0,...,0.0,19.0,0.0,19.0,0.0,19.0,1542.0,391.0,2545.0,479.0
194,2,ACTON,"Acton town, Middlesex County, Massachusetts",23561.0,26.0,0.0,23.0,0.0,23.0,0.0,...,32.0,53.0,0.0,23.0,3.0,6.0,7589.0,603.0,3450.0,582.0
0,3,ACUSHNET,"Acushnet town, Bristol County, Massachusetts",10483.0,18.0,0.0,19.0,0.0,19.0,0.0,...,0.0,19.0,0.0,19.0,0.0,19.0,661.0,344.0,1017.0,320.0
239,4,ADAMS,"Adams town, Berkshire County, Massachusetts",8172.0,22.0,0.0,17.0,5.0,8.0,0.0,...,0.0,17.0,0.0,17.0,0.0,17.0,1282.0,329.0,1124.0,245.0
329,5,AGAWAM,"Agawam Town city, Hampden County, Massachusetts",28744.0,28.0,0.0,23.0,303.0,233.0,0.0,...,0.0,23.0,0.0,23.0,0.0,23.0,3414.0,602.0,4046.0,709.0
214,6,ALFORD,"Alford town, Berkshire County, Massachusetts",421.0,80.0,0.0,12.0,0.0,12.0,0.0,...,0.0,12.0,0.0,12.0,0.0,12.0,49.0,21.0,78.0,32.0
73,7,AMESBURY,"Amesbury Town city, Essex County, Massachusetts",17378.0,22.0,0.0,19.0,0.0,19.0,0.0,...,0.0,19.0,0.0,19.0,0.0,19.0,2279.0,497.0,2287.0,442.0
250,8,AMHERST,"Amherst town, Hampshire County, Massachusetts",39741.0,46.0,7.0,16.0,125.0,59.0,0.0,...,53.0,71.0,5.0,9.0,0.0,26.0,8543.0,768.0,10835.0,912.0
93,9,ANDOVER,"Andover town, Essex County, Massachusetts",35609.0,35.0,0.0,26.0,8.0,16.0,26.0,...,13.0,22.0,0.0,26.0,8.0,12.0,7484.0,760.0,4751.0,605.0
195,10,ARLINGTON,"Arlington town, Middlesex County, Massachusetts",45147.0,27.0,0.0,26.0,82.0,78.0,0.0,...,80.0,91.0,13.0,21.0,15.0,26.0,10995.0,892.0,5695.0,676.0


In [10]:
#c.column_ids.loc['b01001_population_by_age_gender_acs_m', 'fpop_u18'] #Not doing multiindex anymore

In [35]:
class WriteTables:
    """
    Contains methods for initializing, performing calculations and populating ACS tables.
    """
    def __init__(self, year='2018'):
        self.year = year
        c = CensusAPI(year)
        self.con, self.cur = c.connect_database('sdvm')
        
    
    def initializeTable(self, tbl_name):
        """
        Initializes specified table with columns and returns table with no rows.
        
        Parameters:
            tbl_name: string
        
        Returns:
            final_table: pandas DataFrame
        """
        
        #Grabs the column ids for the specified table
        tbl_dict = c.column_ids.loc[tbl_name].to_dict(orient='records')
        tbl_dict_cols= [x['col'] for x in tbl_dict]
        for x in tbl_dict: x.pop('col')
        tbl_dict = dict(zip(tbl_dict_cols, tbl_dict))
        
        keys = list(tbl_dict.keys())
        
        default_cols = ['muni_id']
        
        #Initializes final table dataframe with final columns and no rows
        final_table = pd.DataFrame(columns=default_cols + keys)
        
        return final_table, tbl_dict
    
    def mapCategories(self, row):
        """ 
        Maps the moe:bool, percent:bool properties of table columns to their category 
        (estimate, moe, percent or moe percent)

        Returns: string
        """
        
        #Gets ['moe', 'percent'] as row
        tpl = tuple(row)
        
        
        if tpl == (False, False): #moe: false, percent: false
            return 'est'
        
        elif tpl == (False, True): #moe: false, percent: true
            return 'pct'
        
        elif tpl == (True, False): #moe: true, percent: false
            return 'moe'
        
        elif tpl == (True, True): #moe: true, percent: true
            return 'moe_pct'
        
        else:
            return None
        

    def populateFullTable(self, tbl_name):
        
        final_table, tbl_dict = self.initializeTable(tbl_name)
        
        display(final_table)
        #Grabs the muni uni, muni names and geoids
        init_table = c.load_geoids()
        
        #Populates raw data from Census Subject Tables
        census_table = c.call_api(tbl_name, 'B05003')
        
        #Convert values to float and also set a lower bound at 0
        census_table.iloc[:, 3:] = census_table.iloc[:, 3:].applymap(lambda x: float(x)).clip(lower=0)
        
        final_table['muni_id'] = census_table['MUNI_ID']
        
        #List of columns with errors
        error_cols = set()
        
        #Converts bit flags to calculation categories
        mapped_cols = c.column_ids.loc[tbl_name].set_index('col')[['moe', 'percent']].apply(p.mapCategories, axis=1)

        #Get column names for each calculation categories
        est_cols = mapped_cols[(mapped_cols == 'est')].index.values
        pct_cols = mapped_cols[(mapped_cols == 'pct')].index.values
        moe_cols = mapped_cols[(mapped_cols == 'moe')].index.values
        moe_pct_cols = mapped_cols[(mapped_cols == 'moe_pct')].index.values
        
        #print(est_cols, pct_cols)
        """ ------------------------------ Calculations ------------------------------------- """
        ### Estimate ###
        for col in est_cols:
            try:
                #List of census ids that will be used in the final table
                ids = tbl_dict

                #Append 'E' to find it in the Census dataframe 
                ids_ = [idx + 'E' for idx in ids[col]['estimates']]
                #Performs calculations and updates column values
                final_table[col] = acs_functions.acs_est(census_table[ids_])
            except:
                error_cols.add(col)
                continue
        #display(final_table) 
        
        ### Percentages (non-MOE) ###       
        for col in pct_cols:
            try:
                ids_key = col

                ids = tbl_dict

                
                #Append 'E' to find it in the Census dataframe 
                ids_ = [idx + 'E' for idx in ids[ids_key]['estimates']]
                uni_ids_ = [u + 'E' for u in ids[ids_key]['universe']]

                #Performs calculations and updates column values
                result = acs_functions.acs_est_pct(census_table[ids_], census_table[uni_ids_])
                final_table[col] = result #- 100
                
            except:
                break
        #display(final_table)
        
        ### MOE ###
        for col in moe_cols:
            try:
                #List of census ids that will be used in the final table
                ids = tbl_dict

                #Append 'E' to find it in the Census dataframe 
                ids_ = [idx + 'E' for idx in ids[col]['estimates']]

                #print(ids_)
                
                #Performs calculations and updates column values
                final_table[col] = acs_functions.acs_moe(census_table[ids_], census_table)
                #print(final_table[col])
            except:
                break
            #    error_cols.add(col)
            #    continue
            
        print('MOE done!')
        #display(final_table)
        ### MOE Percentage ###
        for col in moe_pct_cols:
            try:

                #List of census ids that will be used in the final table
                ids = tbl_dict

                print(ids[col])
                #Append 'E' to find it in the Census dataframe 
                ids_ = [idx + 'E' for idx in ids[col]['estimates']]
                uni_ids_ = [u + 'E' for u in ids[col]['universe']]

                print(ids_, uni_ids_)
                #Performs calculations and updates column values
                result = acs_functions.acs_moe_pct(census_table[ids_], census_table[uni_ids_], census_table)
                final_table[col] = result
                
            except:
                error_cols.add(col)
                continue
                #break
                
                
        display(final_table)
        final_table = final_table.set_index([final_table['muni_id'] - 1])

        print(error_cols)
        return final_table
    
  
    def populateSummGroups(self, tbl_name, df_in):
        """
            Aggregates the rows of municipalities belonging to summary groups. Appends row to final table.
        """
        
        
        df_out = df_in.copy()
        
        muni_summ_table = pd.read_csv('K:\DataServices\Datasets\Data Keys\RegionalSummary\muni_405_summary.csv')
        muni_dict = {}
        cats = ['subregion', 'comm_type', 'county', 'subtype', 'rpa_name', 'region']
        for c in cats:
            cat_types = set(muni_summ_table[c].values)
            cat_types = [x for x in cat_types if isinstance(x, str)]
            for i in cat_types:
                muni_dict[i] = muni_summ_table[(muni_summ_table[c] == i)]['muni_Id'].values
        
        
        
    def compareTables(self, tbl_name, df_in):
        
        #Connect to Postgres database 
        self.con, self.cur = c.connect_database('sdvm')
        
        #Get columns at a string list
        tbl_dict = c.column_ids.loc[tbl_name].to_dict(orient='records')
        cols= [x['col'] for x in tbl_dict]
        
        cols = 'muni_id, ' + ",".join(cols)
        
        query = \
        """
        SELECT {1} from tabular.{0} WHERE acs_year = '2014-18';
        """
        #Get table from Postgres as pandas dataframe
        pg_df = pd.read_sql(query.format(tbl_name, cols), self.con, coerce_float=True)
        pg_df.index = pg_df['muni_id'] - 1
        
        #Casts all values as floats except for the GEO_ID column
        df_in.iloc[:, 1:] = df_in.iloc[:, 1:].astype('float', errors='ignore')
        
        #Removes municipalities from return Postres dataframe that aren't in the comparison dataframe
        pg_df = pg_df[(pg_df['muni_id'].isin(df_in['muni_id'].values))]

        #Drops columns that aren't in df_in
        pg_df = pg_df.drop(columns=[col for col in set(pg_df.columns).difference(df.columns)])
        
        #Ensures that the column names match in both tables
        #pg_df = pg_df.reindex(columns=df_in.columns.values)
        pg_df = pg_df.reindex_like(df_in)
        display(pg_df)
        display(df_in)
        #Creates comparison table and returns it with the PG dataframe
        comparison_table = pg_df.compare(df_in, align_axis=0)
        
        return pg_df, comparison_table


In [36]:
p = WriteTables()

Connected to ds


In [65]:
muni_dict

{'Three Rivers (TRIC) Subregion': array([ 50,  73,  78,  99, 175, 199, 220, 244, 266, 285, 307, 335],
       dtype=int64),
 'Metrowest Subregion': array([ 14, 100, 136, 170, 198, 277, 315, 317, 333], dtype=int64),
 'MAGIC Subregion': array([  2,  23,  34,  37,  51,  67, 141, 155, 157, 158, 174, 286, 288],
       dtype=int64),
 'South Shore (SSC) Subregion': array([ 40,  65,  82, 122, 131, 133, 142, 171, 219, 231, 251, 264, 336],
       dtype=int64),
 'North Suburban (NSPC) Subregion': array([ 48, 164, 213, 246, 284, 305, 342, 344, 347], dtype=int64),
 'Inner Core (ICC) Subregion': array([ 10,  26,  35,  46,  49,  57,  93, 163, 165, 176, 178, 189, 207,
        243, 248, 262, 274, 308, 314, 346], dtype=int64),
 'South West (SWAP) Subregion': array([ 25, 101, 139, 177, 185, 187, 208, 269, 350], dtype=int64),
 'North Shore (NSTF) Subregion': array([ 30,  71,  92, 107, 119, 144, 166, 168, 184, 196, 229, 252, 258,
        291, 298, 320], dtype=int64),
 'Maturing Suburb': array([  2,  14,  18

In [15]:
start = time.time()
df = p.populateFullTable('b05003_citizenship_nativity_by_age_gender_acs_m')
end = time.time()
print(end-start)

df

Unnamed: 0,muni_id,pop,pop_me,pop_u18,pop_u18me,ntv_u18,ntv_u18me,fb_u18,fb_u18me,ntr_u18,...,fnon18ome,fpop18o_p,fpop18omep,fntv18o_p,fntv18omep,ffb18o_p,ffb18omep,fntr18o_p,fntr18omep,fnon18o_p


MOE done!
{'group': 'b05003', 'ids': ['003', '001', '014'], 'moe': True, 'percent': True, 'estimates': ['003', '014'], 'universe': ['001']}
['003E', '014E'] ['001E']
{'group': 'b05003', 'ids': ['015', '004', '003', '014'], 'moe': True, 'percent': True, 'estimates': ['004', '015'], 'universe': ['003', '014']}
['004E', '015E'] ['003E', '014E']
{'group': 'b05003', 'ids': ['005', '003', '016', '014'], 'moe': True, 'percent': True, 'estimates': ['005', '016'], 'universe': ['003', '014']}
['005E', '016E'] ['003E', '014E']
{'group': 'b05003', 'ids': ['003', '006', '017', '014'], 'moe': True, 'percent': True, 'estimates': ['006', '017'], 'universe': ['003', '014']}
['006E', '017E'] ['003E', '014E']
{'group': 'b05003', 'ids': ['003', '018', '007', '014'], 'moe': True, 'percent': True, 'estimates': ['007', '018'], 'universe': ['003', '014']}
['007E', '018E'] ['003E', '014E']
{'group': 'b05003', 'ids': ['019', '008', '001'], 'moe': True, 'percent': True, 'estimates': ['008', '019'], 'universe': [

Unnamed: 0,muni_id,pop,pop_me,pop_u18,pop_u18me,ntv_u18,ntv_u18me,fb_u18,fb_u18me,ntr_u18,...,fnon18ome,fpop18o_p,fpop18omep,fntv18o_p,fntv18omep,ffb18o_p,ffb18omep,fntr18o_p,fntr18omep,fnon18o_p
46,1,16330.0,56.0,3039.0,347.96,3016.0,347.8,23.0,29.15,23.0,...,142.0,82.67,2.02,90.53,2.72,9.47,2.58,5.81,2.32,3.66
194,2,23561.0,26.0,5798.0,347.71,5157.0,352.15,641.0,198.12,202.0,...,209.0,76.28,1.08,70.1,3.6,29.9,2.81,17.75,2.35,12.15
0,3,10483.0,18.0,1791.0,265.66,1693.0,259.79,98.0,94.94,0.0,...,65.0,83.19,1.31,87.1,3.81,12.9,3.77,10.8,3.12,2.1
239,4,8172.0,22.0,1444.0,215.87,1405.0,215.06,39.0,59.48,0.0,...,66.0,84.96,1.7,94.84,1.95,5.16,2.11,2.27,1.1,2.89
329,5,28744.0,28.0,5582.0,511.59,5462.0,490.03,120.0,101.87,105.0,...,161.0,83.03,1.27,91.28,2.39,8.72,2.05,6.11,1.52,2.62
214,6,421.0,80.0,58.0,20.81,58.0,20.81,0.0,12.0,0.0,...,12.0,83.25,25.42,93.1,5.58,6.9,3.78,6.9,3.78,0.0
73,7,17378.0,22.0,3666.0,341.48,3561.0,342.9,105.0,101.79,99.0,...,50.0,81.54,1.97,96.6,1.38,3.4,1.27,2.38,1.03,1.02
250,8,39741.0,46.0,3537.0,335.84,3166.0,328.34,371.0,145.6,58.0,...,242.0,91.03,1.16,83.08,1.92,16.92,1.83,8.05,1.64,8.87
93,9,35609.0,35.0,8885.0,434.19,8499.0,434.19,386.0,179.08,143.0,...,246.0,75.31,0.77,78.09,2.49,21.91,2.44,14.33,2.03,7.59
195,10,45147.0,27.0,9889.0,512.04,9222.0,510.53,667.0,164.77,249.0,...,354.0,79.63,0.88,77.27,1.68,22.73,2.05,11.75,1.64,10.98


set()
15.523096323013306


Unnamed: 0_level_0,muni_id,pop,pop_me,pop_u18,pop_u18me,ntv_u18,ntv_u18me,fb_u18,fb_u18me,ntr_u18,...,fnon18ome,fpop18o_p,fpop18omep,fntv18o_p,fntv18omep,ffb18o_p,ffb18omep,fntr18o_p,fntr18omep,fnon18o_p
muni_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,1,16330.0,56.0,3039.0,347.96,3016.0,347.8,23.0,29.15,23.0,...,142.0,82.67,2.02,90.53,2.72,9.47,2.58,5.81,2.32,3.66
1,2,23561.0,26.0,5798.0,347.71,5157.0,352.15,641.0,198.12,202.0,...,209.0,76.28,1.08,70.1,3.6,29.9,2.81,17.75,2.35,12.15
2,3,10483.0,18.0,1791.0,265.66,1693.0,259.79,98.0,94.94,0.0,...,65.0,83.19,1.31,87.1,3.81,12.9,3.77,10.8,3.12,2.1
3,4,8172.0,22.0,1444.0,215.87,1405.0,215.06,39.0,59.48,0.0,...,66.0,84.96,1.7,94.84,1.95,5.16,2.11,2.27,1.1,2.89
4,5,28744.0,28.0,5582.0,511.59,5462.0,490.03,120.0,101.87,105.0,...,161.0,83.03,1.27,91.28,2.39,8.72,2.05,6.11,1.52,2.62
5,6,421.0,80.0,58.0,20.81,58.0,20.81,0.0,12.0,0.0,...,12.0,83.25,25.42,93.1,5.58,6.9,3.78,6.9,3.78,0.0
6,7,17378.0,22.0,3666.0,341.48,3561.0,342.9,105.0,101.79,99.0,...,50.0,81.54,1.97,96.6,1.38,3.4,1.27,2.38,1.03,1.02
7,8,39741.0,46.0,3537.0,335.84,3166.0,328.34,371.0,145.6,58.0,...,242.0,91.03,1.16,83.08,1.92,16.92,1.83,8.05,1.64,8.87
8,9,35609.0,35.0,8885.0,434.19,8499.0,434.19,386.0,179.08,143.0,...,246.0,75.31,0.77,78.09,2.49,21.91,2.44,14.33,2.03,7.59
9,10,45147.0,27.0,9889.0,512.04,9222.0,510.53,667.0,164.77,249.0,...,354.0,79.63,0.88,77.27,1.68,22.73,2.05,11.75,1.64,10.98


In [37]:
pg_df, comparison_table = p.compareTables('b05003_citizenship_nativity_by_age_gender_acs_m', df)

Connected to ds


Unnamed: 0_level_0,muni_id,pop,pop_me,pop_u18,pop_u18me,ntv_u18,ntv_u18me,fb_u18,fb_u18me,ntr_u18,...,fnon18ome,fpop18o_p,fpop18omep,fntv18o_p,fntv18omep,ffb18o_p,ffb18omep,fntr18o_p,fntr18omep,fnon18o_p
muni_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,1,16330.0,56.0,3039.0,347.96,3016.0,347.8,23.0,29.15,23.0,...,142.0,82.67,2.02,90.53,2.72,9.47,2.58,5.81,2.32,3.66
1,2,23561.0,26.0,5798.0,347.71,5157.0,352.15,641.0,198.12,202.0,...,209.0,76.28,1.08,70.1,3.6,29.9,2.81,17.75,2.35,12.15
2,3,10483.0,18.0,1791.0,265.66,1693.0,259.79,98.0,94.94,0.0,...,65.0,83.19,1.31,87.1,3.81,12.9,3.77,10.8,3.12,2.1
3,4,8172.0,22.0,1444.0,215.87,1405.0,215.06,39.0,59.48,0.0,...,66.0,84.96,1.7,94.84,1.95,5.16,2.11,2.27,1.1,2.89
4,5,28744.0,28.0,5582.0,511.59,5462.0,490.03,120.0,101.87,105.0,...,161.0,83.03,1.27,91.28,2.39,8.72,2.05,6.11,1.52,2.62
5,6,421.0,80.0,58.0,20.81,58.0,20.81,0.0,12.0,0.0,...,12.0,83.25,25.42,93.1,5.58,6.9,3.78,6.9,3.78,0.0
6,7,17378.0,22.0,3666.0,341.48,3561.0,342.9,105.0,101.79,99.0,...,50.0,81.54,1.97,96.6,1.38,3.4,1.27,2.38,1.03,1.02
7,8,39741.0,46.0,3537.0,335.84,3166.0,328.34,371.0,145.6,58.0,...,242.0,91.03,1.16,83.08,1.92,16.92,1.83,8.05,1.64,8.87
8,9,35609.0,35.0,8885.0,434.19,8499.0,434.19,386.0,179.08,143.0,...,246.0,75.31,0.77,78.09,2.49,21.91,2.44,14.33,2.03,7.59
9,10,45147.0,27.0,9889.0,512.04,9222.0,510.53,667.0,164.77,249.0,...,354.0,79.63,0.88,77.27,1.68,22.73,2.05,11.75,1.64,10.98


Unnamed: 0_level_0,muni_id,pop,pop_me,pop_u18,pop_u18me,ntv_u18,ntv_u18me,fb_u18,fb_u18me,ntr_u18,...,fnon18ome,fpop18o_p,fpop18omep,fntv18o_p,fntv18omep,ffb18o_p,ffb18omep,fntr18o_p,fntr18omep,fnon18o_p
muni_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,1,16330.0,56.0,3039.0,347.96,3016.0,347.8,23.0,29.15,23.0,...,142.0,82.67,2.02,90.53,2.72,9.47,2.58,5.81,2.32,3.66
1,2,23561.0,26.0,5798.0,347.71,5157.0,352.15,641.0,198.12,202.0,...,209.0,76.28,1.08,70.1,3.6,29.9,2.81,17.75,2.35,12.15
2,3,10483.0,18.0,1791.0,265.66,1693.0,259.79,98.0,94.94,0.0,...,65.0,83.19,1.31,87.1,3.81,12.9,3.77,10.8,3.12,2.1
3,4,8172.0,22.0,1444.0,215.87,1405.0,215.06,39.0,59.48,0.0,...,66.0,84.96,1.7,94.84,1.95,5.16,2.11,2.27,1.1,2.89
4,5,28744.0,28.0,5582.0,511.59,5462.0,490.03,120.0,101.87,105.0,...,161.0,83.03,1.27,91.28,2.39,8.72,2.05,6.11,1.52,2.62
5,6,421.0,80.0,58.0,20.81,58.0,20.81,0.0,12.0,0.0,...,12.0,83.25,25.42,93.1,5.58,6.9,3.78,6.9,3.78,0.0
6,7,17378.0,22.0,3666.0,341.48,3561.0,342.9,105.0,101.79,99.0,...,50.0,81.54,1.97,96.6,1.38,3.4,1.27,2.38,1.03,1.02
7,8,39741.0,46.0,3537.0,335.84,3166.0,328.34,371.0,145.6,58.0,...,242.0,91.03,1.16,83.08,1.92,16.92,1.83,8.05,1.64,8.87
8,9,35609.0,35.0,8885.0,434.19,8499.0,434.19,386.0,179.08,143.0,...,246.0,75.31,0.77,78.09,2.49,21.91,2.44,14.33,2.03,7.59
9,10,45147.0,27.0,9889.0,512.04,9222.0,510.53,667.0,164.77,249.0,...,354.0,79.63,0.88,77.27,1.68,22.73,2.05,11.75,1.64,10.98


In [38]:
#[x for x in comparison_table.columns]
pd.set_option("display.max_columns", None)
comparison_table

Unnamed: 0_level_0,Unnamed: 1_level_0,pop_u18mep,ntv_u18_p,ntv_u18mep,fb_u18mep,ntr_u18mep,non_u18mep,non_18ome,pop_18omep,ntv_18omep,fb_18omep,ntr_18omep,non_18omep,mntvu18mep,mfbu18_p,mntru18_p,mpop18omep,mntv18omep,fntvu18mep,fpop18omep,fntv18omep,fnon18o_p
muni_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
0,self,,,48.0,,,,,,,,,,,,,,,97.86,,,
0,other,,,1.36,,,,,,,,,,,,,,,0.82,,,
1,self,,,44.19,,,,,,,,,,,,,,,88.96,,,
1,other,,,2.9,,,,,,,,,,,,,,,3.38,,,
2,self,,,47.27,,,,,,,,,,,,,,,95.25,,,
2,other,,,3.72,,,,,,,,,,,,,,,4.17,,,
3,self,,,41.26,,,,,,,,,,,,,,,100.0,,,
3,other,,,3.2,,,,,,,,,,,,,,,0.0,,,
4,self,,,44.48,,,0.6,,,,,,,,,,,,98.46,,,
4,other,,,12.55,,,0.59,,,,,,,,,,,,0.27,,,


error_dict = {}
cols = comparison_table.columns

for idx, row in comparison_table.iterrows():
    #print(idx)
    #break
    error_dict[str(idx)] = [i for i in cols if not np.isnan(comparison_table[i].loc[idx])]

error_dict
#[i for i in cols if not np.isnan(list(comparison_table[i].values)).all()]

# Appendix

## class: CensusAPI 
    - Initializes the Census API parameters
    - Gets Census ids that correspond to different columns names
   ### functions:
   ### variables:
    
## class: WriteTables
   ### functions:
   ### variables:
