In [1]:
import psycopg2
from config import config
import pandas as pd
import os
import pickle
import re
import numpy as np

In [2]:
#Postgres Connection Setup
def connect(server):
    """ Connect to the PostgreSQL database server """
    conn = None
    try:
        # read connection parameters
        params = config(section=server)

        # connect to the PostgreSQL server
        #print('Connecting to the PostgreSQL database...')
        conn = psycopg2.connect(**params)

        # create a cursor
        cur = conn.cursor()
        
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
    
    return cur

def close():
    if conn is not None:
        conn.close()
        print('Database connection closed.')


In [3]:
def get_acs_table_metadata():
    """
    Searches existing tables in metadata schema that are sourced from the ACS to aggregate census variables and titles.
    
    Returns:
    -------------
    
    """
    
    #Establish connection to Postgres server
    cur = connect('sdvm') 

    #Matches on tables in metadata schema that include the string 'acs'.
    cur.execute(
        """
        SELECT table_name
        FROM information_schema.tables
        WHERE table_schema = 'metadata' AND table_name LIKE '%acs_m'
        ORDER BY table_name;
        """)

    #Assigns query results
    table_names = cur.fetchall()

    table_meta = pd.DataFrame(columns=['title', 'tbl_name', 'tbl_num', 'geography'])
    
    for idx, table in enumerate(table_names): 
        proc_table = table[0].replace("'", '')
        
        query = \
        """
        SELECT details 
        FROM metadata.{0}
        WHERE orderid IN (2, 4, 5, 6);
        """.format(proc_table)
        
        cur.execute(query)
        metadata = cur.fetchall()
        try:
            table_meta.loc[idx] = [x[0] for x in metadata]
        except ValueError:
            print("Error at " + str(table_names[idx]))
            continue

    cur.close()
    return table_meta

In [4]:
table_meta = get_acs_table_metadata()

Error at ('b16002_hh_linguistic_isolation_acs_m',)


In [5]:
column_ids = pd.read_csv('column_ids.csv', skipinitialspace=True)
column_ids['ids'] = column_ids['ids'].apply(lambda x: set(str(x).zfill(3).replace(' ', '').split(',')))

In [6]:
column_ids.columns = ['table', 'col', 'group', 'ids', 'moe', 'percent']

In [7]:
#Uncomment to save new pickle of table_meta Dataframe
#pickle.dump(table_meta, open('pickles/table_meta.p', 'wb'))

In [8]:
#census_vars = pd.read_json('census_variables/variables.json', orient='index')
#census_vars

In [9]:
set(table_meta['title'])

{'Aggregate Gross Rent by Units in Structure',
 'Aggregate travel time to work (in minutes) of workers by travel time',
 'American Indian And Alaska Native Alone Population by Age and Gender',
 'Asian Alone Population by Age and Gender',
 'Average Household Size',
 'Black Or African American Alone Population by Age and Gender',
 'Children in Poverty by Family Type',
 'Citizenship Status by Native and Foreign born',
 'Citizenship Status by Native and Foreign born with Place of Birth',
 'Citizenship and Foreign Born by Age and Gender',
 'Contract Rent',
 'Disability Status by Age and Gender',
 'Disability Status by Age and Race/Ethnicity',
 'Disability Status by Difficulty Type',
 'Educational Attainment Overall and by Gender',
 'Educational Attainment by Age',
 'Educational Attainment by Employment',
 'Educational Attainment by Place of Birth',
 'Educational Attainment by Race/Ethinicity',
 'Employment',
 'Employment Status by Disability Status',
 'Employment Status by Race and Age',
 '

In [10]:
len(list(table_meta['tbl_name']))

102

In [11]:

def get_col_names(sql_name, visited_ids, lines):
    #IDEA: continously add ids from visited_ids set and use "as" as a delimiter. Get all ids that came before delimiter
    #save what comes after delimiter as a hash map key and what comes before as values.
    
    hash_map = {}
    hash_map[sql_name] = {}
    list_ = []
    for line in lines:
        if "SELECT" in line:
            continue
        if "e._" in line:
            there = re.search('e._(.+?)]', line)
            if there:
                ids = [x for x in re.findall(r'\d+', there.group()) if len(x) == 3]
                list_ += ids
        
        #Once "as" is in line, hash the list of ids with the key of the term after the "as"
        if "as" in line: 
            there2 = re.search('as (.+?),', line)
            
            if there2:
                col_name = there2.group(1)
                if col_name.endswith(('m', 'me', 'mep')):
                    hash_map[sql_name][col_name] = {'ids':set(list_), 'moe': True}
                else:
                    hash_map[sql_name][col_name] = {'ids':set(list_), 'moe': False}  
                    
                if col_name.endswith(('p')):
                    hash_map[sql_name][col_name]['percent'] = True
                else:
                    hash_map[sql_name][col_name]['percent'] = False
            list_ = []
                
    return hash_map

In [12]:
list(table_meta['tbl_name'])

['b01001_population_by_age_gender_acs_m',
 'b01001a_whi_population_by_age_gender_acs_m',
 'b01001b_aa_population_by_age_gender_acs_m',
 'b01001c_na_population_by_age_gender_acs_m',
 'b01001d_as_population_by_age_gender_acs_m',
 'b01001e_pi_population_by_age_gender_acs_m',
 'b01001f_oth_population_by_age_gender_acs_m',
 'b01001g_mlt_population_by_age_gender_acs_m',
 'b01001h_nhw_population_by_age_gender_acs_m',
 'b01001i_lat_population_by_age_gender_acs_m',
 'b01002_med_age_acs_m',
 'b03002_race_ethnicity_acs_m',
 'b04001_first_reported_ancestry_acs_m',
 'b04006_reported_ancestry_acs_m',
 'b05002_citizenship_nativity_acs_m',
 'b05002_place_of_birth_citizenship_nativity_acs_m',
 'b05003_citizenship_nativity_by_age_gender_acs_m',
 'b05006_foreign_born_place_of_birth_acs_m',
 'b05011_naturalization_by_year_acs_m',
 'b06009_educational_attainment_by_placeofbirth_acs_m',
 'b07001_geomobility_in_migration_by_age_acs_m',
 'b07001_geomobility_migration_curr_res_by_age_acs_m',
 'b07204_geomobili

In [32]:
uni_dict = []
root = 'K:\DataServices\Code\SQL\ACS\Tables'
#for root, dir, files in os.walk('K:\DataServices\Code\SQL\ACS\Tables', topdown=False):
for file in os.listdir(root):   
    name = file
    name_wo_ext = name[:-4]
    group = name_wo_ext[:6]
    if name_wo_ext not in list(table_meta['tbl_name']):
        continue
    if name.endswith(".sql"):
        start, e_start, m_start = False, False, False
        moe, percent = False, False
        
        with open(os.path.join(root, name)) as sql_file:
            lines = sql_file.readlines()
            visited_cols = set()
            f = ''
            temp_dict = {'estimate': [], 'universe': []}
            for i, line in enumerate(lines):


                if('acs_est_pct(' in line):
                    start = True
                    moe, percent = False, True
                    
                if('acs_moe_pct(' in line):
                    m_start = True
                    f = f.split('acs_moe_pct')[-1]
                    moe, percent = True, True
                    
                if('muni_sum_est_pct(' in line):
                    start = True
                    moe, percent = False, True
                    
                if('acs_est(' in line):
                    e_start = True
                    moe, percent = False, False
                    
                if('acs_moe(' in line):
                    e_start = True
                    moe, percent = True, False
                    
                    
                if start or e_start or m_start:
                    f += line

                if (f.count(']') == 1 and e_start) or (f.count(']') == 2 and e_start): #est or moe
                    e_start = False
                    if 'as' not in f and 'as' in lines[i+1]:

                        #Get strings within square brackets
                        w_bracs = re.findall(r'\[.*?\]', f+lines[i+1])
                        try:
                            estimate = [w_bracs[0]] 
                        except:
                            continue

                        col = re.findall(r'as (.+?),', lines[i+1])
                        if isinstance(col, list) and len(col) > 0:
                            col = col[0]
                            if isinstance(col, list): col = '' 

                    else:
                        w_bracs = re.findall(r'\[.*?\]', f)
                        try:
                            estimate = [w_bracs[0]] 
                        except:
                            continue

                        col = re.findall(r'as (.+?),', f)
                        if isinstance(col, list) and len(col) > 0:
                            col = col[0]

                        if isinstance(col, list): 
                            col = ''

                    final_list = []

                    if col not in visited_cols:
                        visited_cols.add(col)
                    else:
                        continue

                    #Split any strings with commas
                    for idx, s in enumerate(estimate):
                        if ',' in s:
                            new_strings = s.split(',')
                            estimate.extend(new_strings)
                            estimate.pop(idx)
                        temp_dict['estimate'].append(''.join([x for x in estimate[idx] if x.isdigit()])[-3:])


                    t = {'table': name_wo_ext, 'group': group, 'col': col, 'moe': moe, 'percent': percent}
                    t.update(temp_dict)
                    uni_dict.append(t)
                    temp_dict = {'estimate': [], 'universe': []}

                    f = ''                    

                if (f.count(']') == 2 and start) or (f.count(']') == 4 and m_start):
                    start = False
                    m_start = False     
                    if 'as' not in f and 'as' in lines[i+1]:
                        #Get strings within square brackets
                        w_bracs = re.findall(r'\[.*?\]', f+lines[i+1])
                        col = re.findall(r'as (.+?),', lines[i+1])
                        
                        if isinstance(col, list) and len(col) > 0:
                            col = col[0]

                        if isinstance(col, list): 
                            col = ''

                        try: 
                            estimate = [w_bracs[0]] 
                            universe = [w_bracs[1]] 
                        except:
                            continue
                            
                        if(f.count(']') == 4):
                            print(col, '\t', f+lines[i+1], '\n -----END-----')
                        
                    else:
                        w_bracs = re.findall(r'\[.*?\]', f)

                        col = re.findall(r'as (.+?),', f)
                        if isinstance(col, list) and len(col) > 0:
                            col = col[0]

                        if isinstance(col, list): 
                            col = ''
                        try:
                            estimate = [w_bracs[0]] 
                            universe = [w_bracs[1]] 
                        except:
                            continue
                        if(f.count(']') == 4):
                            print(col, '\t', f, '\n -----END-----')
                            
                    final_list = []

                    if col not in visited_cols and not isinstance(col, list):
                        visited_cols.add(col)
                    else:
                        continue

                    #Split any strings with commas
                    for idx, s in enumerate(estimate):
                        if ',' in s:
                            new_strings = s.split(',')
                            estimate.extend(new_strings)
                            estimate.pop(idx)
                        temp_dict['estimate'].append(''.join([x for x in estimate[idx] if x.isdigit()])[-3:])

                    for idx, s in enumerate(universe):
                        if ',' in s:
                            new_strings = s.split(',')
                            universe.extend(new_strings)
                            universe.pop(idx)
                        temp_dict['universe'].append(''.join([x for x in universe[idx] if x.isdigit()])[-3:])


                    t = {'table': name_wo_ext, 'group': group, 'col': col, 'moe': moe, 'percent': percent}
                    t.update(temp_dict)
                    uni_dict.append(t)
                    temp_dict = {'estimate': [], 'universe': []}

                    f = ''                    
        sql_file.close()      


pop_u5mp 	 acs1418.acs_moe_pct(
	array[e._003,e._018], --estimate
	array[e._001], --universe estimate
	array[m._003,m._018], --moe
	array[m._001]  --universe moe
	)as pop_u5mp,
 
 -----END-----
pop_5_9mp 	 acs1418.acs_moe_pct(
	array[e._004,e._019], --estimate
	array[e._001], --universe estimate
	array[m._004,m._019], --moe
	array[m._001]  --universe moe
	)as pop_5_9mp,
 
 -----END-----
pop1014mp 	 acs1418.acs_moe_pct(
	array[e._005,e._020], --estimate
	array[e._001], --universe estimate
	array[m._005,m._020], --moe
	array[m._001]  --universe moe
	)as pop1014mp,
 
 -----END-----
pop1517mp 	 acs1418.acs_moe_pct(
	array[e._006,e._021], --estimate
	array[e._001], --universe estimate
	array[m._006,m._021], --moe
	array[m._001]  --universe moe
	)as pop1517mp,
 
 -----END-----
pop1819mp 	 acs1418.acs_moe_pct(
	array[e._007,e._022], --estimate
	array[e._001], --universe estimate
	array[m._007,m._022], --moe
	array[m._001]  --universe moe
	)as pop1819mp,
 
 -----END-----
pop2024mp 	 acs1418.ac

pop_u5mp 	 acs1418.acs_moe_pct(
	array[e._003,e._018], --estimate
	array[e._001], --universe estimate
	array[m._003,m._018], --moe
	array[m._001]  --universe moe
	)as pop_u5mp,
 
 -----END-----
pop_5_9mp 	 acs1418.acs_moe_pct(
	array[e._004,e._019], --estimate
	array[e._001], --universe estimate
	array[m._004,m._019], --moe
	array[m._001]  --universe moe
	)as pop_5_9mp,
 
 -----END-----
pop1014mp 	 acs1418.acs_moe_pct(
	array[e._005,e._020], --estimate
	array[e._001], --universe estimate
	array[m._005,m._020], --moe
	array[m._001]  --universe moe
	)as pop1014mp,
 
 -----END-----
pop1517mp 	 acs1418.acs_moe_pct(
	array[e._006,e._021], --estimate
	array[e._001], --universe estimate
	array[m._006,m._021], --moe
	array[m._001]  --universe moe
	)as pop1517mp,
 
 -----END-----
pop1819mp 	 acs1418.acs_moe_pct(
	array[e._007,e._022], --estimate
	array[e._001], --universe estimate
	array[m._007,m._022], --moe
	array[m._001]  --universe moe
	)as pop1819mp,
 
 -----END-----
pop2024mp 	 acs1418.ac

pop_u5mp 	 acs1418.acs_moe_pct(
	array[e._003,e._018], --estimate
	array[e._001], --universe estimate
	array[m._003,m._018], --moe
	array[m._001]  --universe moe
	)as pop_u5mp,
 
 -----END-----
pop_5_9mp 	 acs1418.acs_moe_pct(
	array[e._004,e._019], --estimate
	array[e._001], --universe estimate
	array[m._004,m._019], --moe
	array[m._001]  --universe moe
	)as pop_5_9mp,
 
 -----END-----
pop1014mp 	 acs1418.acs_moe_pct(
	array[e._005,e._020], --estimate
	array[e._001], --universe estimate
	array[m._005,m._020], --moe
	array[m._001]  --universe moe
	)as pop1014mp,
 
 -----END-----
pop1517mp 	 acs1418.acs_moe_pct(
	array[e._006,e._021], --estimate
	array[e._001], --universe estimate
	array[m._006,m._021], --moe
	array[m._001]  --universe moe
	)as pop1517mp,
 
 -----END-----
pop1819mp 	 acs1418.acs_moe_pct(
	array[e._007,e._022], --estimate
	array[e._001], --universe estimate
	array[m._007,m._022], --moe
	array[m._001]  --universe moe
	)as pop1819mp,
 
 -----END-----
pop2024mp 	 acs1418.ac

pop_u5mp 	 acs1418.acs_moe_pct(
	array[e._003,e._018], --estimate
	array[e._001], --universe estimate
	array[m._003,m._018], --moe
	array[m._001]  --universe moe
	)as pop_u5mp,
 
 -----END-----
pop_5_9mp 	 acs1418.acs_moe_pct(
	array[e._004,e._019], --estimate
	array[e._001], --universe estimate
	array[m._004,m._019], --moe
	array[m._001]  --universe moe
	)as pop_5_9mp,
 
 -----END-----
pop1014mp 	 acs1418.acs_moe_pct(
	array[e._005,e._020], --estimate
	array[e._001], --universe estimate
	array[m._005,m._020], --moe
	array[m._001]  --universe moe
	)as pop1014mp,
 
 -----END-----
pop1517mp 	 acs1418.acs_moe_pct(
	array[e._006,e._021], --estimate
	array[e._001], --universe estimate
	array[m._006,m._021], --moe
	array[m._001]  --universe moe
	)as pop1517mp,
 
 -----END-----
pop1819mp 	 acs1418.acs_moe_pct(
	array[e._007,e._022], --estimate
	array[e._001], --universe estimate
	array[m._007,m._022], --moe
	array[m._001]  --universe moe
	)as pop1819mp,
 
 -----END-----
pop2024mp 	 acs1418.ac

pop_u5mp 	 acs1418.acs_moe_pct(
	array[e._003,e._018], --estimate
	array[e._001], --universe estimate
	array[m._003,m._018], --moe
	array[m._001]  --universe moe
	)as pop_u5mp,
 
 -----END-----
pop_5_9mp 	 acs1418.acs_moe_pct(
	array[e._004,e._019], --estimate
	array[e._001], --universe estimate
	array[m._004,m._019], --moe
	array[m._001]  --universe moe
	)as pop_5_9mp,
 
 -----END-----
pop1014mp 	 acs1418.acs_moe_pct(
	array[e._005,e._020], --estimate
	array[e._001], --universe estimate
	array[m._005,m._020], --moe
	array[m._001]  --universe moe
	)as pop1014mp,
 
 -----END-----
pop1517mp 	 acs1418.acs_moe_pct(
	array[e._006,e._021], --estimate
	array[e._001], --universe estimate
	array[m._006,m._021], --moe
	array[m._001]  --universe moe
	)as pop1517mp,
 
 -----END-----
pop1819mp 	 acs1418.acs_moe_pct(
	array[e._007,e._022], --estimate
	array[e._001], --universe estimate
	array[m._007,m._022], --moe
	array[m._001]  --universe moe
	)as pop1819mp,
 
 -----END-----
pop2024mp 	 acs1418.ac

nh_mep 	 acs1418.acs_moe_pct(
	array[e._002], --estimate
	array[e._001], --universe estimate
	array[m._002], --moe
	array[m._001]  --universe moe
	)as nh_mep,
 
 -----END-----
nhwhi_mep 	 acs1418.acs_moe_pct(
	array[e._003],--estimate
	array[e._001], --universe estimate
	array[m._003],--moe
	array[m._001] --universe moe
	)as nhwhi_mep,
 
 -----END-----
nhaa_mep 	 acs1418.acs_moe_pct(
	array[e._004],--estimate
	array[e._001], --universe estimate
	array[m._004],--moe
	array[m._001] --universe moe
	)as nhaa_mep,
 
 -----END-----
nhna_mep 	 acs1418.acs_moe_pct(
	array[e._005],--estimate
	array[e._001], --universe estimate
	array[m._005],--moe
	array[m._001] --universe moe
	)as nhna_mep,
 
 -----END-----
nhas_mep 	 acs1418.acs_moe_pct(
	array[e._006],--estimate
	array[e._001], --universe estimate
	array[m._006],--moe
	array[m._001] --universe moe
	)as nhas_mep,
 
 -----END-----
nhpi_mep 	 acs1418.acs_moe_pct(
	array[e._007],--estimate
	array[e._001], --universe estimate
	array[m._007],--moe

sameu18mp 	 acs1418.acs_moe_pct(
	array[e._018,e._019],--estimate
	array[e._017], --universe estimate
	array[m._018,m._019],--moe
	array[m._017] --universe moe
	)as sameu18mp,
 
 -----END-----
mp1824same 	 acs1418.acs_moe_pct(
	array[e._020,e._021], --estimate
	array[e._004,e._005], --universe estimate
	array[m._020,m._021], --moe
	array[m._004,m._005]  --universe moe
	)as mp1824same,
 
 -----END-----
same1824mp 	 acs1418.acs_moe_pct(
	array[e._020,e._021],--estimate
	array[e._017], --universe estimate
	array[m._020,m._021],--moe
	array[m._017] --universe moe
	)as same1824mp,
 
 -----END-----
mp2564same 	 acs1418.acs_moe_pct(
	array[e._022,e._023,e._024,e._025,e._026,e._027,e._028,e._029], --estimate
	array[e._006,e._007,e._008,e._009,e._010,e._011,e._012,e._013], --universe estimate
	array[m._022,m._023,m._024,m._025,m._026,m._027,m._028,m._029], --moe
	array[m._006,m._007,m._008,m._009,m._010,m._011,m._012,m._013]  --universe moe
	)as mp2564same,
 
 -----END-----
same2564mp 	 acs1418

s_cnty_mep 	 acs1418.acs_moe_pct(
	array[e._007::numeric],--estimate
	array[e._001::numeric], --universe estimate
	array[m._007::numeric],--moe
	array[m._001::numeric] --universe moe
	)as s_cnty_mep,
 
 -----END-----
s_statemep 	 acs1418.acs_moe_pct(
	array[e._010::numeric],--estimate
	array[e._001::numeric], --universe estimate
	array[m._010::numeric],--moe
	array[m._001::numeric] --universe moe
	)as s_statemep,
 
 -----END-----
 	 acs1418.acs_moe_pct(
	array[e._013::numeric],--estimate
	array[e._001::numeric], --universe estimate
	array[m._013::numeric],--moe
	array[m._001::numeric] --universe moe
	)as d_statemep
 
 -----END-----
mlt15mp 	 acs1418.acs_moe_pct(
	array[e._002,e._003],--estimate
	array[e._001], --universe estimate
	array[m._002,m._003],--moe
	array[m._001] --universe moe
	)as mlt15mp,
 
 -----END-----
m15_29_mp 	 acs1418.acs_moe_pct(
	array[e._004,e._005,e._006],--estimate
	array[e._001], --universe estimate
	array[m._004,m._005,m._006],--moe
	array[m._001] --universe m

ctv_mep 	 acs1418.acs_moe_pct(
	array[e._002::numeric],--estimate
	array[e._001::numeric], --universe estimate
	array[m._002::numeric],--moe
	array[m._001::numeric] --universe moe
	)as ctv_mep,
 
 -----END-----
ctvsnglmep 	 acs1418.acs_moe_pct(
	array[e._003::numeric],--estimate
	array[e._002::numeric], --universe estimate
	array[m._003::numeric],--moe
	array[m._002::numeric] --universe moe
	)as ctvsnglmep,
 
 -----END-----
carpoolmep 	 acs1418.acs_moe_pct(
	array[e._004::numeric],--estimate
	array[e._002::numeric], --universe estimate
	array[m._004::numeric],--moe
	array[m._002::numeric] --universe moe
	)as carpoolmep,
 
 -----END-----
pub_mep 	 acs1418.acs_moe_pct(
	array[e._010::numeric],--estimate
	array[e._001::numeric], --universe estimate
	array[m._010::numeric],--moe
	array[m._001::numeric] --universe moe
	)as pub_mep,
 
 -----END-----
taxi_mep 	 acs1418.acs_moe_pct(
	array[e._016::numeric],--estimate
	array[e._001::numeric], --universe estimate
	array[m._016::numeric],--moe
	a

sc4564_mep 	 acs1418.acs_moe_pct(
	array[e._031,e._032,e._072,e._073],--estimate
	array[e._027,e._068], --universe estimate
	array[m._031,m._032,m._072,m._073],--moe
	array[m._027,m._068] --universe moe
	)as sc4564_mep,
 
 -----END-----
bd4564_mep 	 acs1418.acs_moe_pct(
	array[e._033,e._034,e._074,e._075],--estimate
	array[e._027,e._068], --universe estimate
	array[m._033,m._034,m._074,m._075],--moe
	array[m._027,m._068] --universe moe
	)as bd4564_mep,
 
 -----END-----
pop2564mep 	 acs1418.acs_moe_pct(
	array[e._011,e._052,e._019,e._060,e._027,e._068],--estimate
	array[e._001], --universe estimate
	array[m._011,m._052,m._019,m._060,m._027,m._068],--moe
	array[m._001] --universe moe
	)as pop2564mep,
 
 -----END-----
lh2564_mep 	 acs1418.acs_moe_pct(
	array[e._012,e._013,e._053,e._054,e._020,e._021,e._061,e._062,e._028,e._029,e._069,e._070],--estimate
	array[e._011,e._052,e._019,e._060,e._027,e._068], --universe estimate
	array[m._012,m._013,m._053,m._054,m._020,m._021,m._061,m._062,m._0

pov_mep 	 acs1418.acs_moe_pct(
	array[e._002], --estimate
	array[e._001], --universe estimate
	array[m._002], --moe
	array[m._001]  --universe moe
	)as pov_mep,
 
 -----END-----
pov_u6mep 	 acs1418.acs_moe_pct(
	array[e._004,e._005,e._018,e._019], --estimate
	array[e._004,e._005,e._018,e._019,e._033,e._034,e._047,e._048], --universe estimate
	array[m._004,m._005,m._018,m._019], --moe
	array[m._004,m._005,m._018,m._019,m._033,m._034,m._047,m._048]  --universe moe
	)as pov_u6mep,
 
 -----END-----
pov_u18mep 	 acs1418.acs_moe_pct(
	array[e._004,e._005,e._006,e._007,e._008,e._009,e._018,e._019,e._020,e._021,e._022,e._023], --estimate
	array[e._004,e._005,e._006,e._007,e._008,e._009,e._018,e._019,e._020,e._021,e._022,e._023,e._033,e._034,e._035,e._036,e._037,e._038,e._047,e._048,e._049,e._050,e._051,e._052], --universe estimate
	array[m._004,m._005,m._006,m._007,m._008,m._009,m._018,m._019,m._020,m._021,m._022,m._023], --moe
	array[m._004,m._005,m._006,m._007,m._008,m._009,m._018,m._019,m._

naou5mep 	 acs1418.acs_moe_pct(
	array[na_e._012,na_e._018],--estimate
	array[na_e._012,na_e._018,na_e._032,na_e._038], --universe estimate
	array[na_m._012,na_m._018],--moe
	array[na_m._012,na_m._018,na_m._032,na_m._038] --universe moe
	)as naou5mep,
 
 -----END-----
nao517mep 	 acs1418.acs_moe_pct(
	array[na_e._014,na_e._020],--estimate
	array[na_e._014,na_e._020,na_e._034,na_e._040], --universe estimate
	array[na_m._014,na_m._020],--moe
	array[na_m._014,na_m._020,na_m._034,na_m._040] --universe moe
	)as nao517mep,
 
 -----END-----
 	 acs1418.acs_moe_pct(
	array[as_e._002],--estimate
	array[as_e._001], --universe estimate
	array[as_m._002],--moe
	array[as_m._001] --universe moe
 
 -----END-----
 	 acs1418.acs_moe_pct(
	array[as_e._003],--estimate
	array[as_e._003,as_e._023], --universe estimate
	array[as_m._003],--moe
	array[as_m._003,as_m._023] --universe moe
 
 -----END-----
pov_hhmep 	 acs1418.acs_moe_pct(
	array[e._002],--estimate
	array[e._001], --universe estimate
	array[m._002

iu25mp 	 acs1418.acs_moe_pct(
	array[e._002],--estimate
	array[e._001], --universe estimate
	array[m._002],--moe
	array[m._001] --universe moe
	)as iu25mp,
 
 -----END-----
iu25u20mp 	 acs1418.acs_moe_pct(
	array[e._003,e._004,e._005],--estimate
	array[e._002], --universe estimate
	array[m._003,m._004,m._005],--moe
	array[m._002] --universe moe
	)as iu25u20mp,
 
 -----END-----
iu252039mp 	 acs1418.acs_moe_pct(
	array[e._006,e._007,e._008,e._009],--estimate
	array[e._002], --universe estimate
	array[m._006,m._007,m._008,m._009],--moe
	array[m._002] --universe moe
	)as iu252039mp,
 
 -----END-----
iu254059mp 	 acs1418.acs_moe_pct(
	array[e._010,e._011,e._012],--estimate
	array[e._002], --universe estimate
	array[m._010,m._011,m._012],--moe
	array[m._002] --universe moe
	)as iu254059mp,	
 
 -----END-----
iu256074mp 	 acs1418.acs_moe_pct(
	array[e._013],--estimate
	array[e._002], --universe estimate
	array[m._013],--moe
	array[m._002] --universe moe
	)as iu256074mp,	
 
 -----END-----
iu257

assist_mep 	 acs1418.acs_moe_pct(
	array[e._002],--estimate
	array[e._001], --universe estimate
	array[m._002],--moe
	array[m._001] --universe moe
	)as assist_mep,
 
 -----END-----
 	 acs1418.acs_moe_pct(
	array[e._003],--estimate
	array[e._001], --universe estimate
	array[m._003],--moe
	array[m._001] --universe moe
	)as noasst_mep
 
 -----END-----
snap_mp 	 acs1418.acs_moe_pct(
	array[e._002],--estimate
	array[e._001], --universe estimate
	array[m._002],--moe
	array[m._001] --universe moe
	)as snap_mp,
 
 -----END-----
snap60mp 	 acs1418.acs_moe_pct(
	array[e._003],--estimate
	array[e._001], --universe estimate
	array[m._003],--moe
	array[m._001] --universe moe
	)as snap60mp,
 
 -----END-----
snapno60mp 	 acs1418.acs_moe_pct(
	array[e._004],--estimate
	array[e._001], --universe estimate
	array[m._004],--moe
	array[m._001] --universe moe
	)as snapno60mp,
 
 -----END-----
nsnp_mp 	 acs1418.acs_moe_pct(
	array[e._005],--estimate
	array[e._001], --universe estimate
	array[m._005],--moe
	a

 -----END-----
hs_e_mep 	 acs1418.acs_moe_pct(
	array[e._013],--estimate
	array[e._012], --universe estimate
	array[m._013],--moe
	array[m._012] --universe moe
	)as hs_e_mep,
 
 -----END-----
hs_ue_mep 	 acs1418.acs_moe_pct(
	array[e._014],--estimate
	array[e._012], --universe estimate
	array[m._014],--moe
	array[m._012] --universe moe
	)as hs_ue_mep,
 
 -----END-----
sc_mep 	 acs1418.acs_moe_pct(
	array[e._016],--estimate
	array[e._001], --universe estimate
	array[m._016],--moe
	array[m._001] --universe moe
	)as sc_mep,
 
 -----END-----
sclf_mep 	 acs1418.acs_moe_pct(
	array[e._017],--estimate
	array[e._016], --universe estimate
	array[m._017],--moe
	array[m._016] --universe moe
	)as sclf_mep,
 
 -----END-----
sc_e_mep 	 acs1418.acs_moe_pct(
	array[e._020],--estimate
	array[e._019], --universe estimate
	array[m._020],--moe
	array[m._019] --universe moe
	)as sc_e_mep,
 
 -----END-----
sc_ue_mep 	 acs1418.acs_moe_pct(
	array[e._021],--estimate
	array[e._019], --universe estimate
	array[

r_mf_mep 	 acs1418.acs_moe_pct(
	array[e._016,e._017,e._018,e._019,e._020,e._021], --estimate
	array[e._013], --universe estimate
	array[m._016,m._017,m._018,m._019,m._020,m._021], --moe
	array[m._013]  --universe moe
	)as r_mf_mep,
 
 -----END-----
r_u2mp 	 acs1418.acs_moe_pct(
	array[e._016], 	--estimate
	array[e._013], 	--universe estimate
	array[m._016], 	--moe
	array[m._013]  	--universe moe
	)as r_u2mp,
 
 -----END-----
r_u2_4mp 	 acs1418.acs_moe_pct(
	array[e._016,e._017], 	--estimate
	array[e._013], 	--universe estimate
	array[m._016,m._017], 	--moe
	array[m._013]  	--universe moe
	)as r_u2_4mp,
 
 -----END-----
r_u3_4mp 	 acs1418.acs_moe_pct(
	array[e._017], 	--estimate
	array[e._013], 	--universe estimate
	array[m._017], 	--moe
	array[m._013]  	--universe moe
	)as r_u3_4mp,
 
 -----END-----
r_u5ovmp 	 acs1418.acs_moe_pct(
	array[e._018,e._019,e._020,e._021], 	--estimate
	array[e._013], 	--universe estimate
	array[m._018,m._019,m._020,m._021], 	--moe
	array[m._013]  	--univers

whilh_mep 	 acs1418.acs_moe_pct(
	array[whi_e._003,whi_e._008],--estimate
	array[whi_e._001], --universe estimate
	array[whi_m._003,whi_m._008],--moe
	array[whi_m._001] --universe moe
	)as whilh_mep,
 
 -----END-----
whihs_mep 	 acs1418.acs_moe_pct(
	array[whi_e._004,whi_e._009],--estimate
	array[whi_e._001], --universe estimate
	array[whi_m._004,whi_m._009],--moe
	array[whi_m._001] --universe moe
	)as whihs_mep,
 
 -----END-----
whisc_mep 	 acs1418.acs_moe_pct(
	array[whi_e._005,whi_e._010],--estimate
	array[whi_e._001], --universe estimate
	array[whi_m._005,whi_m._010],--moe
	array[whi_m._001] --universe moe
	)as whisc_mep,
 
 -----END-----
whibd_mep 	 acs1418.acs_moe_pct(
	array[whi_e._006,whi_e._011],--estimate
	array[whi_e._001], --universe estimate
	array[whi_m._006,whi_m._011],--moe
	array[whi_m._001] --universe moe
	)as whibd_mep,
 
 -----END-----
aalh_mep 	 acs1418.acs_moe_pct(
	array[aa_e._003,aa_e._008],--estimate
	array[aa_e._001], --universe estimate
	array[aa_m._003,aa_m.

In [33]:
uni_dict

[x for x in uni_dict if x['table'] == 'b01001_population_by_age_gender_acs_m']

[{'table': 'b01001_population_by_age_gender_acs_m',
  'group': 'b01001',
  'col': 'pop',
  'moe': False,
  'percent': False,
  'estimate': ['001'],
  'universe': []},
 {'table': 'b01001_population_by_age_gender_acs_m',
  'group': 'b01001',
  'col': 'popm',
  'moe': True,
  'percent': False,
  'estimate': ['001'],
  'universe': []},
 {'table': 'b01001_population_by_age_gender_acs_m',
  'group': 'b01001',
  'col': 'pop_u5',
  'moe': False,
  'percent': False,
  'estimate': ['003', '027'],
  'universe': []},
 {'table': 'b01001_population_by_age_gender_acs_m',
  'group': 'b01001',
  'col': 'pop_u5m',
  'moe': True,
  'percent': False,
  'estimate': ['003', '027'],
  'universe': []},
 {'table': 'b01001_population_by_age_gender_acs_m',
  'group': 'b01001',
  'col': 'pop_5_9',
  'moe': False,
  'percent': False,
  'estimate': ['004', '028'],
  'universe': []},
 {'table': 'b01001_population_by_age_gender_acs_m',
  'group': 'b01001',
  'col': 'pop_5_9m',
  'moe': True,
  'percent': False,
  'es

In [None]:
#pickle.dump(ids_dict, open('table_to_censusid.p', 'wb'))

In [None]:
#col_names_dict['b01001_population_by_age_gender_acs_m']
#pickle.dump(col_names_dict, open('cols_ids_dict.p', 'wb'))

In [34]:
uni_dict

[{'table': 'b01001a_whi_population_by_age_gender_acs_m',
  'group': 'b01001',
  'col': 'pop',
  'moe': False,
  'percent': False,
  'estimate': ['001'],
  'universe': []},
 {'table': 'b01001a_whi_population_by_age_gender_acs_m',
  'group': 'b01001',
  'col': 'popm',
  'moe': True,
  'percent': False,
  'estimate': ['001'],
  'universe': []},
 {'table': 'b01001a_whi_population_by_age_gender_acs_m',
  'group': 'b01001',
  'col': 'pop_u5',
  'moe': False,
  'percent': False,
  'estimate': ['003', '018'],
  'universe': []},
 {'table': 'b01001a_whi_population_by_age_gender_acs_m',
  'group': 'b01001',
  'col': 'pop_u5m',
  'moe': True,
  'percent': False,
  'estimate': ['003', '018'],
  'universe': []},
 {'table': 'b01001a_whi_population_by_age_gender_acs_m',
  'group': 'b01001',
  'col': 'pop_5_9',
  'moe': False,
  'percent': False,
  'estimate': ['004', '019'],
  'universe': []},
 {'table': 'b01001a_whi_population_by_age_gender_acs_m',
  'group': 'b01001',
  'col': 'pop_5_9m',
  'moe': T

In [35]:
#table_ids = pd.DataFrame.from_dict(ids_dict, orient='index')
table_ids = pd.DataFrame.from_records(uni_dict)

In [36]:
table_ids

Unnamed: 0,table,group,col,moe,percent,estimate,universe
0,b01001a_whi_population_by_age_gender_acs_m,b01001,pop,False,False,[001],[]
1,b01001a_whi_population_by_age_gender_acs_m,b01001,popm,True,False,[001],[]
2,b01001a_whi_population_by_age_gender_acs_m,b01001,pop_u5,False,False,"[003, 018]",[]
3,b01001a_whi_population_by_age_gender_acs_m,b01001,pop_u5m,True,False,"[003, 018]",[]
4,b01001a_whi_population_by_age_gender_acs_m,b01001,pop_5_9,False,False,"[004, 019]",[]
...,...,...,...,...,...,...,...
7572,c18130_poverty_status_by_disability_status_acs_m,c18130,nd65omp,True,True,[020],[016]
7573,c18130_poverty_status_by_disability_status_acs_m,c18130,nd65opvp,False,True,[021],[020]
7574,c18130_poverty_status_by_disability_status_acs_m,c18130,nd65opvmp,True,True,[021],[020]
7575,c18130_poverty_status_by_disability_status_acs_m,c18130,nd65onpvp,False,True,[022],[020]


In [254]:
print(len(table_ids[(table_ids['table'] == 'b05003_citizenship_nativity_by_age_gender_acs_m')]['col'].values))
print(len(set([x for x in table_ids[(table_ids['table'] == 'b05003_citizenship_nativity_by_age_gender_acs_m')]['col'].values if not isinstance(x, list)])))

65
65


In [41]:
table_ids[(table_ids['table'] == 'b05003_citizenship_nativity_by_age_gender_acs_m')]

Unnamed: 0,table,group,col,moe,percent,estimate,universe
2877,b05003_citizenship_nativity_by_age_gender_acs_m,b05003,pop,False,False,[001],[]
2878,b05003_citizenship_nativity_by_age_gender_acs_m,b05003,pop_me,True,False,[001],[]
2879,b05003_citizenship_nativity_by_age_gender_acs_m,b05003,pop_u18,False,False,"[003, 014]",[]
2880,b05003_citizenship_nativity_by_age_gender_acs_m,b05003,pop_u18me,True,False,"[003, 014]",[]
2881,b05003_citizenship_nativity_by_age_gender_acs_m,b05003,ntv_u18,False,False,"[004, 015]",[]
...,...,...,...,...,...,...,...
3001,b05003_citizenship_nativity_by_age_gender_acs_m,b05003,ffb18o_p,False,True,[021],[019]
3002,b05003_citizenship_nativity_by_age_gender_acs_m,b05003,ffb18omep,True,True,[021],[019]
3003,b05003_citizenship_nativity_by_age_gender_acs_m,b05003,fntr18o_p,False,True,[022],[019]
3004,b05003_citizenship_nativity_by_age_gender_acs_m,b05003,fntr18omep,True,True,[022],[019]


In [38]:
table_ids.dropna(how='all')
table_ids.sort_index()

Unnamed: 0,table,group,col,moe,percent,estimate,universe
0,b01001a_whi_population_by_age_gender_acs_m,b01001,pop,False,False,[001],[]
1,b01001a_whi_population_by_age_gender_acs_m,b01001,popm,True,False,[001],[]
2,b01001a_whi_population_by_age_gender_acs_m,b01001,pop_u5,False,False,"[003, 018]",[]
3,b01001a_whi_population_by_age_gender_acs_m,b01001,pop_u5m,True,False,"[003, 018]",[]
4,b01001a_whi_population_by_age_gender_acs_m,b01001,pop_5_9,False,False,"[004, 019]",[]
...,...,...,...,...,...,...,...
7572,c18130_poverty_status_by_disability_status_acs_m,c18130,nd65omp,True,True,[020],[016]
7573,c18130_poverty_status_by_disability_status_acs_m,c18130,nd65opvp,False,True,[021],[020]
7574,c18130_poverty_status_by_disability_status_acs_m,c18130,nd65opvmp,True,True,[021],[020]
7575,c18130_poverty_status_by_disability_status_acs_m,c18130,nd65onpvp,False,True,[022],[020]


In [None]:
#table_ids.sort_index().index.values
table_ids.loc['b01001_population_by_age_gender_acs_m', 0]

In [39]:
new_table = column_ids.copy()
new_table['estimates'] = np.nan
new_table['universe'] = np.nan

for li in uni_dict:
    table, col, group = li['table'], li['col'], li['group']
    est, uni = li['estimate'], li['universe']
    moe, percent = li['moe'], li['percent']
    cond = 'table == @table and col == @col'
    new_table.loc[new_table.eval(cond), 'group'] = group
    new_table.loc[new_table.eval(cond), 'estimates'] = ",".join(est)
    new_table.loc[new_table.eval(cond), 'universe'] = ",".join(uni)
    new_table.loc[new_table.eval(cond), 'moe'] = moe
    new_table.loc[new_table.eval(cond), 'percent'] = percent

    
new_table

Unnamed: 0,table,col,group,ids,moe,percent,estimates,universe
0,b25046_b25044_b01003_hh_vehicle_ownership_acs_m,occ_hu,,{001},False,False,,
1,b25046_b25044_b01003_hh_vehicle_ownership_acs_m,occ_hu_me,,{001},True,False,,
2,b25046_b25044_b01003_hh_vehicle_ownership_acs_m,no_car,,"{010, 003}",False,False,,
3,b25046_b25044_b01003_hh_vehicle_ownership_acs_m,no_car_me,,"{010, 003}",True,False,,
4,b25046_b25044_b01003_hh_vehicle_ownership_acs_m,car,,"{015, 008, 005, 004, 012, 011, 006, 013, 007, ...",False,False,,
...,...,...,...,...,...,...,...,...
11256,c18130_poverty_status_by_disability_status_acs_m,nd65op,c18130,"{020, 016}",False,True,020,016
11257,c18130_poverty_status_by_disability_status_acs_m,nd65omp,c18130,"{020, 016}",True,True,020,016
11258,c18130_poverty_status_by_disability_status_acs_m,nd65opvp,c18130,"{020, 021}",False,True,021,020
11259,c18130_poverty_status_by_disability_status_acs_m,nd65opvmp,c18130,"{020, 021}",True,True,021,020


In [None]:
uni_dict

In [45]:
new_table[(new_table['table'] == 'b05003_citizenship_nativity_by_age_gender_acs_m')]
#mnonu18mep
#new_table[(new_table['col'] == 'mnonu18mep')]

Unnamed: 0,table,col,group,ids,moe,percent,estimates,universe
601,b05003_citizenship_nativity_by_age_gender_acs_m,pop,b05003,{001},False,False,001,
602,b05003_citizenship_nativity_by_age_gender_acs_m,pop_me,b05003,{001},True,False,001,
603,b05003_citizenship_nativity_by_age_gender_acs_m,pop_u18,b05003,"{003, 014}",False,False,003014,
604,b05003_citizenship_nativity_by_age_gender_acs_m,pop_u18me,b05003,"{003, 014}",True,False,003014,
605,b05003_citizenship_nativity_by_age_gender_acs_m,ntv_u18,b05003,"{015, 004}",False,False,004015,
...,...,...,...,...,...,...,...,...
724,b05003_citizenship_nativity_by_age_gender_acs_m,ffb18o_p,b05003,"{019, 021}",False,True,021,019
725,b05003_citizenship_nativity_by_age_gender_acs_m,ffb18omep,b05003,"{019, 021}",True,True,021,019
726,b05003_citizenship_nativity_by_age_gender_acs_m,fntr18o_p,b05003,"{019, 022}",False,True,022,019
727,b05003_citizenship_nativity_by_age_gender_acs_m,fntr18omep,b05003,"{019, 022}",True,True,022,019


In [46]:
#new_table.loc[(new_table['table'] =='b01001_population_by_age_gender_acs_m')]
new_table.to_csv("csv/column_ids2.csv")