In [2]:
"""
ACS Collection
Author: Dominic Ridley
"""
from builtins import any as b_any
import sphinx
import pandas as pd
pd.set_option("display.max_rows", None)
import requests
import pickle
import psycopg2
import sys
from psycopg2 import OperationalError, errorcodes, errors
from IPython.display import display 
import time
import numpy as np
import acs_functions
from config import config

In [3]:
class CensusAPI:
    """
    Contains methods for accessing Census API, loading geo ids of municipalities and retrieving Census tables.
    """
    
    def __init__(self, year): #
        """ Intializes API parameter variables """
        
        self.api_key = '766973dcdc26460a63ee43b8bfed1d1c4692486a'
        self.call = 'https://api.census.gov/data/2018/acs/acs5?get=group({4})&' \
                        + 'for=county%20subdivision:{2}&in=state:{0}%20county:{1}&key={3}'
        self.year = year
        
        """ Loads the census variables used for each table """
        self.ids_dict = pickle.load(open('table_to_censusid.p', "rb"))
        self.cols_dict = pickle.load(open('cols_ids_dict.p', "rb"))
        self.col_uni_dict = pickle.load(open('p_cols_dict.p', 'rb'))
        self.column_ids = pd.read_csv('column_ids.csv', index_col=[0,1], skipinitialspace=True)
        self.column_ids['ids'] = self.column_ids['ids'].apply(lambda x: set(str(x).zfill(3).replace(' ', '').split(',')))
        
    def load_table_censusvars(self, table):
        """ Loads the census variables used for table """
        
        dict_ = c.ids_dict[table]
        ids, names = list(map(list, zip(*dict_)))
        return ids, names
        
    def load_geoids(self):
        """ Loads geoids for geographical level (municipality, tract, block)"""
        
        full_table = pd.read_excel('K:\DataServices\Datasets\Data Keys\Census_MuniName_County_State_ID.xls', \
                                   dtype={'GEOID': str})
        return full_table[['GEOID', 'MUNI_ID', 'MUNI']]
    
    def connect_database(self, server):
        """ Connect to the PostgreSQL database server """
        
        conn = None
        cur = None
        try:
            # read connection parameters
            params = config(section=server)

            # connect to the PostgreSQL server
            #print('Connecting to the PostgreSQL database...')
            conn = psycopg2.connect(**params)
            print("Connected to " + params['database'])
            # create a cursor
            cur = conn.cursor()

        except (Exception, psycopg2.DatabaseError) as error:
            print(error)

        return conn, cur
    
    def call_api(self, table, group='B01001'):
        """ Calls Census API by table and group """
        
        #Loads GEOIDS to map to municipalities
        geoid_table = self.load_geoids()

        #Creates set of county codes to drop any duplicate values
        counties = set([str(x)[2:5] for x in geoid_table['GEOID']])
        
        #Intializes empty dataframe
        out_df = pd.DataFrame()
        
        col_names = []
        
        #Iterates through the counties, calls census api for pop. estimates and appends to dataframe
        for i in counties:
            
            #Calls Census API with state code: 25, county: i, subdivisions: *global and groups
            request_url = self.call.format('25', i, '*', self.api_key, group)
            out = requests.get(request_url).json()
            
            if out_df.empty:
                out_df = out_df.append(out[1:], ignore_index=True)
                col_names = out[0]
            else:
                out_df = out_df.append(out[1:], ignore_index=True)

        
        out_df.columns = col_names
        
        #display(out_df)
        e_m_columns = [i[-4:] for i in out_df.columns if (i[-1] in ['E', 'M']) & (i not in ['NAME'])]
        cols_to_rename = [i for i in out_df.columns if (i[-1] in ['E', 'M']) & (i not in ['NAME'])]
        
        out_df = out_df.rename(columns=dict(zip(cols_to_rename, e_m_columns)))
        out_df = out_df[['NAME', 'GEO_ID']+e_m_columns]
        
        out_df = out_df.rename(columns={'GEO_ID': 'GEOID'})
        
        out_df['GEOID'] = out_df['GEOID'].apply(lambda x: x[-10:])
        
        final_df = out_df.copy()
        
        
        #Joins tables on 'GEOID' to get municipality names
        final_df = final_df.merge(geoid_table, on='GEOID', how='inner')
        final_df = final_df[['MUNI_ID', 'MUNI', 'NAME']+e_m_columns].sort_values('MUNI_ID')
        return final_df

In [4]:
column_ids = pd.read_csv('column_ids.csv', index_col=[0,1], skipinitialspace=True)

In [5]:
column_ids['ids'] = column_ids['ids'].apply(lambda x: set(str(x).zfill(3).replace(' ', '').split(',')))

In [6]:
column_ids.loc['b01001_population_by_age_gender_acs_m']

Unnamed: 0,ids,moe,percent
pop,{001},False,False
popm,{001},True,False
pop_u5,"{027, 003}",False,False
pop_u5m,"{027, 003}",True,False
pop_5_9,"{028, 004}",False,False
pop_5_9m,"{028, 004}",True,False
pop1014,"{029, 005}",False,False
pop1014m,"{029, 005}",True,False
pop1517,"{030, 006}",False,False
pop1517m,"{030, 006}",True,False


In [7]:
c = CensusAPI('2018')

In [8]:
conn, cur = c.connect_database('sdvm')

Connected to ds


In [9]:
cur.callproc('acs_est_pct', ([1], [1]))
cur.fetchall()

[(Decimal('100.00'),)]

In [10]:
hello = c.call_api('b01001_population_by_age_gender_acs_m')

In [11]:
pickle.dump(hello, open('test_table.p', 'wb'))

In [12]:
hello.iloc[:, 3:] = hello.iloc[:, 3:].applymap(lambda x: float(x)).clip(lower=0)
hello

Unnamed: 0,MUNI_ID,MUNI,NAME,001E,001M,002E,002M,003E,003M,004E,...,045E,045M,046E,046M,047E,047M,048E,048M,049E,049M
323,1,ABINGTON,"Abington town, Plymouth County, Massachusetts",16330.0,56.0,7732.0,344.0,527.0,150.0,308.0,...,175.0,80.0,324.0,110.0,217.0,96.0,89.0,48.0,243.0,103.0
225,2,ACTON,"Acton town, Middlesex County, Massachusetts",23561.0,26.0,11434.0,330.0,460.0,152.0,781.0,...,420.0,106.0,465.0,124.0,472.0,149.0,141.0,62.0,284.0,123.0
277,3,ACUSHNET,"Acushnet town, Bristol County, Massachusetts",10483.0,18.0,5207.0,260.0,331.0,172.0,190.0,...,180.0,82.0,308.0,107.0,336.0,127.0,64.0,42.0,144.0,69.0
270,4,ADAMS,"Adams town, Berkshire County, Massachusetts",8172.0,22.0,4136.0,180.0,254.0,126.0,199.0,...,136.0,57.0,165.0,58.0,214.0,67.0,113.0,51.0,118.0,45.0
117,5,AGAWAM,"Agawam Town city, Hampden County, Massachusetts",28744.0,28.0,13873.0,408.0,693.0,192.0,633.0,...,459.0,130.0,744.0,167.0,534.0,146.0,415.0,176.0,850.0,184.0
245,6,ALFORD,"Alford town, Berkshire County, Massachusetts",421.0,80.0,212.0,39.0,6.0,7.0,4.0,...,19.0,13.0,29.0,15.0,12.0,8.0,9.0,6.0,0.0,12.0
138,7,AMESBURY,"Amesbury Town city, Essex County, Massachusetts",17378.0,22.0,8582.0,305.0,533.0,174.0,341.0,...,385.0,99.0,365.0,111.0,200.0,77.0,294.0,109.0,223.0,80.0
176,8,AMHERST,"Amherst town, Hampshire County, Massachusetts",39741.0,46.0,19641.0,491.0,406.0,102.0,364.0,...,199.0,77.0,438.0,116.0,248.0,81.0,231.0,92.0,386.0,122.0
158,9,ANDOVER,"Andover town, Essex County, Massachusetts",35609.0,35.0,17176.0,359.0,787.0,217.0,923.0,...,491.0,113.0,675.0,154.0,412.0,105.0,389.0,100.0,395.0,102.0
226,10,ARLINGTON,"Arlington town, Middlesex County, Massachusetts",45147.0,27.0,21008.0,479.0,1563.0,260.0,1475.0,...,698.0,168.0,989.0,152.0,882.0,150.0,604.0,150.0,724.0,166.0


In [13]:
c.column_ids.loc['b01001_population_by_age_gender_acs_m']

Unnamed: 0,ids,moe,percent
pop,{001},False,False
popm,{001},True,False
pop_u5,"{027, 003}",False,False
pop_u5m,"{027, 003}",True,False
pop_5_9,"{028, 004}",False,False
pop_5_9m,"{028, 004}",True,False
pop1014,"{029, 005}",False,False
pop1014m,"{029, 005}",True,False
pop1517,"{030, 006}",False,False
pop1517m,"{030, 006}",True,False


In [14]:
#ids, names = c.load_table_censusvars('b01001_population_by_age_gender_acs_m')
keys = list(c.cols_dict['b01001_population_by_age_gender_acs_m'].keys())


In [15]:
#list(c.cols_dict[tbl_name].keys())

In [16]:
class WriteTables:
    
    def __init__(self, year='2018'):
        self.year = year
        c = CensusAPI(year)
        self.con, self.cur = c.connect_database('sdvm')
        
    
    def initializeTable(self, tbl_name):
        
        #Grabs the column ids for the specified table
        tbl_dict = c.column_ids.loc[tbl_name].to_dict(orient='index')
        keys = list(tbl_dict.keys())
        
        default_cols = ['muni_id']
        
        #Initializes final table dataframe with final columns and no rows
        final_table = pd.DataFrame(columns=default_cols + keys)
        
        return final_table
    
    def mapCategories(self, row):
        """ 
        Maps the moe:bool, percent:bool properties of table columns to their category 
        (estimate, moe, percent or moe percent)

        Returns: string
        """
        
        #Gets ['moe', 'percent'] as row
        tpl = tuple(row)
        
        
        if tpl == (False, False): #moe: false, percent: false
            return 'est'
        
        elif tpl == (False, True): #moe: false, percent: true
            return 'pct'
        
        elif tpl == (True, False): #moe: true, percent: false
            return 'moe'
        
        elif tpl == (True, True): #moe: true, percent: true
            return 'moe_pct'
        
        else:
            return None
        

    def populateFullTable(self, tbl_name):
        
        final_table = self.initializeTable(tbl_name)
        
        #Grabs the muni uni, muni names and geoids
        init_table = c.load_geoids()
        
        #Populates raw data from Census Subject Tables
        census_table = c.call_api(tbl_name)
        
        #Convert values to float and also set a lower bound at 0
        census_table.iloc[:, 3:] = census_table.iloc[:, 3:].applymap(lambda x: float(x)).clip(lower=0)
        
        final_table['muni_id'] = census_table['MUNI_ID']
        
        #List of census ids that will be used in the final table
        ids = c.column_ids.loc[tbl_name].to_dict(orient='index')
        
        #List of universe ids for percentage calculations
        col_uni = c.col_uni_dict[tbl_name]
        
        #List of columns with errors
        error_cols = set()
        
        mapped_cols = c.column_ids.loc[tbl_name][['moe', 'percent']].apply(self.mapCategories, axis=1)

        est_cols = mapped_cols[(mapped_cols == 'est')].index.values
        pct_cols = mapped_cols[(mapped_cols == 'pct')].index.values
        moe_cols = mapped_cols[(mapped_cols == 'moe')].index.values
        moe_pct_cols = mapped_cols[(mapped_cols == 'moe_pct')].index.values
        

        for col in est_cols:
            #List of census ids that will be used in the final table
            ids = c.column_ids.loc[tbl_name].to_dict(orient='index')

            #Append 'E' to find it in the Census dataframe 
            ids_ = [idx + 'E' for idx in ids[col]['ids']]
            
            final_table[col] = acs_functions.acs_est(census_table[ids_])
            
        for col in pct_cols:
            try:
                #List of census ids that will be used in the final table
                ids = c.column_ids.loc[tbl_name].to_dict(orient='index')

                #Append 'E' to find it in the Census dataframe 
                ids_ = [idx + 'E' for idx in ids[col]['ids']]
                uni_ids_ = [u + 'E' for u in col_uni[col][1]['universe']]
                
                result = acs_functions.acs_est_pct(census_table[ids_], census_table[uni_ids_])
                print(result[:5])
                final_table[col] = result - 100
            except:
                continue
                
        for col in moe_cols:
            #List of census ids that will be used in the final table
            ids = c.column_ids.loc[tbl_name].to_dict(orient='index')

            #Append 'E' to find it in the Census dataframe 
            ids_ = [idx + 'E' for idx in ids[col]['ids']]
            
            final_table[col] = acs_functions.acs_moe(census_table[ids_], census_table)
            
        for col in moe_pct_cols:
            try:
                col_uni_key = col[:-2] + 'p'

                #List of census ids that will be used in the final table
                ids = c.column_ids.loc[tbl_name].to_dict(orient='index')

                #Append 'E' to find it in the Census dataframe 
                ids_ = [idx + 'E' for idx in ids[col_uni_key]['ids']]
                uni_ids_ = [u + 'E' for u in col_uni[col_uni_key][1]['universe']]


                result = acs_functions.acs_moe_pct(census_table[ids_], census_table[uni_ids_], census_table)
                print(result[:5])
                final_table[col] = result
            except:
                continue
        
        final_table = final_table.set_index([final_table['muni_id'] - 1])

        
        return final_table
    
  
    def compareTables(self, tbl_name, df_in):
        
        #Connect to Postgres database 
        self.con, self.cur = c.connect_database('sdvm')
        
        #Get columns at a string list
        keys = list(c.cols_dict[tbl_name].keys())
        cols = ", ".join(keys).replace('mpop5559mmp', 'mpop5559mp')
        
        cols = 'muni_id, ' + cols
        
        query = \
        """
        SELECT {1} from tabular.{0} WHERE acs_year = '2014-18';
        """
        #Get table from Postgres as pandas dataframe
        pg_df = pd.read_sql(query.format(tbl_name, cols), self.con, coerce_float=True)
        pg_df.index = pg_df['muni_id'] - 1
        
        #Casts all values as floats except for the GEO_ID column
        df_in.iloc[:, 1:] = df_in.iloc[:, 1:].astype('float', errors='ignore')
        
        #Removes municipalities from return Postres dataframe that aren't in the comparison dataframe
        pg_df = pg_df[(pg_df['muni_id'].isin(df_in['muni_id'].values))]

        #Ensures that the column names match in both tables
        pg_df = pg_df.reindex(columns=df_in.columns.values)
        
        #Creates comparison table and returns it with the PG dataframe
        comparison_table = pg_df.compare(df_in, align_axis=0)
        
        return pg_df, comparison_table


In [17]:
p = WriteTables()

Connected to ds


In [18]:
start = time.time()
df = p.populateFullTable('b01001_population_by_age_gender_acs_m')
end = time.time()
print(end-start)

df

323    105.30
225    104.74
277    105.38
270    104.94
117    104.50
dtype: float64
323    103.58
225    106.24
277    104.95
270    103.70
117    105.07
dtype: float64
323    106.33
225    108.73
277    103.02
270    105.52
117    106.01
dtype: float64
323    103.40
225    104.91
277    103.73
270    103.51
117    103.84
dtype: float64
323    102.00
225    102.47
277    102.77
270    100.94
117    101.44
dtype: float64
323    102.27
225    100.73
277    101.20
270    100.83
117    100.52
dtype: float64
323    100.77
225    100.31
277    101.82
270    101.19
117    100.74
dtype: float64
323    106.28
225    102.64
277    103.15
270    103.46
117    102.29
dtype: float64
323    106.32
225    103.03
277    105.57
270    108.06
117    105.71
dtype: float64
323    105.97
225    104.73
277    105.73
270    106.01
117    106.11
dtype: float64
323    106.64
225    105.48
277    104.54
270    106.53
117    105.12
dtype: float64
323    105.98
225    108.29
277    107.06
270    104.67
117    10

dtype: float64
245    19.12
10     14.55
240     0.34
251    13.27
19     11.34
dtype: float64
245    19.12
10     10.23
37      0.68
240     0.35
251    13.20
dtype: float64
245    19.12
196     0.62
10     14.37
37      0.69
107     0.68
dtype: float64
245    19.26
10     14.41
251    13.23
19      8.00
120    21.88
dtype: float64
245    27.99
10     14.91
251    13.87
19     11.54
120    22.14
dtype: float64
245    19.39
10     14.64
251    13.67
19     11.77
120    22.37
dtype: float64
245    27.28
10     14.44
251    13.58
19     11.60
120    21.87
dtype: float64
245    27.17
10     14.81
251    13.59
19     11.73
120    22.33
dtype: float64
245    27.45
10     15.01
251    13.73
19     11.64
120    22.47
dtype: float64
245    28.27
10     15.10
251    13.98
19     12.02
120    22.59
dtype: float64
245    28.14
10     15.20
251    14.22
19     12.06
120    22.75
dtype: float64
245    27.31
10     14.74
251    13.40
19     11.48
120    22.15
dtype: float64
245    27.90
10     14.69

Unnamed: 0_level_0,muni_id,pop,popm,pop_u5,pop_u5m,pop_5_9,pop_5_9m,pop1014,pop1014m,pop1517,...,mpop2534mp,mpop3564p,mpop3564mp,mpop65ovp,mpop65ovmp,mpop1834p,mpop1834mp,mpop3544p,mpop3544mp,mpop4564p
muni_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,1,16330.0,56.0,865.0,216.42,585.0,152.84,1033.0,275.18,556.0,...,1.32,20.84,2.41,5.96,1.21,11.07,2.19,6.27,1.46,14.57
1,2,23561.0,26.0,1116.0,242.54,1470.0,224.88,2056.0,270.96,1156.0,...,0.93,22.62,1.7,6.54,0.95,6.97,1.19,6.96,1.02,15.66
2,3,10483.0,18.0,564.0,210.3,519.0,144.2,317.0,112.04,391.0,...,1.91,20.53,2.9,8.9,1.75,11.62,2.79,4.8,1.1,15.73
3,4,8172.0,22.0,404.0,146.12,302.0,100.8,451.0,115.97,287.0,...,1.37,21.22,2.78,9.14,1.49,10.01,2.05,6.03,1.51,15.19
4,5,28744.0,28.0,1293.0,293.51,1458.0,293.6,1728.0,332.02,1103.0,...,0.95,20.72,1.68,9.17,1.2,7.74,1.15,5.89,0.87,14.83
5,6,421.0,80.0,17.0,10.63,16.0,14.32,13.0,8.54,12.0,...,27.52,17.34,29.83,23.52,30.93,4.04,19.89,2.38,27.24,14.96
6,7,17378.0,22.0,824.0,202.2,748.0,180.42,1266.0,255.62,828.0,...,1.27,20.8,2.28,6.48,1.22,10.36,1.72,5.49,0.98,15.31
7,8,39741.0,46.0,851.0,170.0,879.0,183.93,933.0,177.1,874.0,...,0.93,7.25,0.78,3.64,0.55,34.18,2.27,2.22,0.5,5.03
8,9,35609.0,35.0,1906.0,285.81,2140.0,299.61,2947.0,310.42,1892.0,...,0.61,21.84,1.37,6.71,0.76,7.51,0.9,6.37,0.74,15.47
9,10,45147.0,27.0,3128.0,343.84,2944.0,309.89,2142.0,275.79,1675.0,...,0.82,20.37,1.34,6.74,0.8,8.41,0.95,7.4,0.79,12.98


In [19]:
df

Unnamed: 0_level_0,muni_id,pop,popm,pop_u5,pop_u5m,pop_5_9,pop_5_9m,pop1014,pop1014m,pop1517,...,mpop2534mp,mpop3564p,mpop3564mp,mpop65ovp,mpop65ovmp,mpop1834p,mpop1834mp,mpop3544p,mpop3544mp,mpop4564p
muni_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,1,16330.0,56.0,865.0,216.42,585.0,152.84,1033.0,275.18,556.0,...,1.32,20.84,2.41,5.96,1.21,11.07,2.19,6.27,1.46,14.57
1,2,23561.0,26.0,1116.0,242.54,1470.0,224.88,2056.0,270.96,1156.0,...,0.93,22.62,1.7,6.54,0.95,6.97,1.19,6.96,1.02,15.66
2,3,10483.0,18.0,564.0,210.3,519.0,144.2,317.0,112.04,391.0,...,1.91,20.53,2.9,8.9,1.75,11.62,2.79,4.8,1.1,15.73
3,4,8172.0,22.0,404.0,146.12,302.0,100.8,451.0,115.97,287.0,...,1.37,21.22,2.78,9.14,1.49,10.01,2.05,6.03,1.51,15.19
4,5,28744.0,28.0,1293.0,293.51,1458.0,293.6,1728.0,332.02,1103.0,...,0.95,20.72,1.68,9.17,1.2,7.74,1.15,5.89,0.87,14.83
5,6,421.0,80.0,17.0,10.63,16.0,14.32,13.0,8.54,12.0,...,27.52,17.34,29.83,23.52,30.93,4.04,19.89,2.38,27.24,14.96
6,7,17378.0,22.0,824.0,202.2,748.0,180.42,1266.0,255.62,828.0,...,1.27,20.8,2.28,6.48,1.22,10.36,1.72,5.49,0.98,15.31
7,8,39741.0,46.0,851.0,170.0,879.0,183.93,933.0,177.1,874.0,...,0.93,7.25,0.78,3.64,0.55,34.18,2.27,2.22,0.5,5.03
8,9,35609.0,35.0,1906.0,285.81,2140.0,299.61,2947.0,310.42,1892.0,...,0.61,21.84,1.37,6.71,0.76,7.51,0.9,6.37,0.74,15.47
9,10,45147.0,27.0,3128.0,343.84,2944.0,309.89,2142.0,275.79,1675.0,...,0.82,20.37,1.34,6.74,0.8,8.41,0.95,7.4,0.79,12.98


In [20]:
pg_df, comparison_table = p.compareTables('b01001_population_by_age_gender_acs_m', df)

Connected to ds


In [21]:
pg_df.loc[0, 'pop2534mp']

2.11

In [22]:
comparison_table

Unnamed: 0_level_0,Unnamed: 1_level_0,pop_u5m,pop_5_9m,pop1014m,pop1517m,pop1819m,pop20m,pop21m,pop2224m,pop2529m,pop3034m,...,mpop2534mp,mpop3564p,mpop3564mp,mpop65ovp,mpop65ovmp,mpop1834p,mpop1834mp,mpop3544p,mpop3544mp,mpop4564p
muni_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
0,self,,,,,,,,,,,...,1.33,20.84,2.42,5.96,1.22,11.07,2.2,6.27,,14.57
0,other,,,,,,,,,,,...,1.32,20.84,2.41,5.96,1.21,11.07,2.19,6.27,,14.57
1,self,,,,,,,,,,,...,,22.62,,6.54,,6.97,,6.96,,15.66
1,other,,,,,,,,,,,...,,22.62,,6.54,,6.97,,6.96,,15.66
2,self,,,,,,,,,,,...,,,,8.9,,11.62,,4.8,1.11,15.73
2,other,,,,,,,,,,,...,,,,8.9,,11.62,,4.8,1.1,15.73
3,self,,,,,,,,,,,...,1.38,,,,,10.01,2.06,6.03,,15.19
3,other,,,,,,,,,,,...,1.37,,,,,10.01,2.05,6.03,,15.19
4,self,,,,,,,,,,,...,,,,9.17,,7.74,,5.89,,14.83
4,other,,,,,,,,,,,...,,,,9.17,,7.74,,5.89,,14.83


error_dict = {}
cols = comparison_table.columns

for idx, row in comparison_table.iterrows():
    #print(idx)
    #break
    error_dict[str(idx)] = [i for i in cols if not np.isnan(comparison_table[i].loc[idx])]

error_dict
#[i for i in cols if not np.isnan(list(comparison_table[i].values)).all()]

# Appendix

## class: CensusAPI 
    - Initializes the Census API parameters
    - Gets Census ids that correspond to different columns names
   ### functions:
   ### variables:
    
## class: WriteTables
   ### functions:
   ### variables:
