# Census Data for San Antonio Districts

In [1]:
import pandas as pd
import numpy as np
import os
import re
import math

## District Data

This sheet tells which district each census tract aligns with and what percentage of the district is included. District 0 are census tracts outside of San Antonio.

In [2]:
dist = pd.read_excel('CMO_CDTractsPrecincts_220406.xlsx')
dist = dist[['Council District',
             'Tract 2020 ID\n(Use with 2020 Decennial Census)',
             'Percentage of Tract 2020 in Council District']]
dist.columns = ['council','tract','percent']
dist = dist.iloc[1:].reset_index(drop=True)
dist.tract = dist.tract.astype(str)
dist.council = np.where(dist.council == 'Outside CoSA', 0, dist.council)
dist.head()

Unnamed: 0,council,tract,percent
0,1,110100,0.999534
1,1,110300,0.499153
2,1,110500,0.00015
3,1,110600,0.002611
4,1,110700,0.953113


## Employment Data

This is all the demographic data pulled from the census. 

In [44]:
dataall = pd.read_csv('ACSDP5Y2021.DP03-Data.csv')
dataall.columns = dataall.columns.str.lower()

dataall = dataall.iloc[1:].reset_index(drop=True)
dataall = dataall.replace('(X)', np.nan).replace('-',np.nan).replace('**',np.nan)
# dataall = dataall.dropna(axis=1, how='all')

dataall.geo_id = dataall.geo_id.str[-6:]
dataall.geo_id = dataall.geo_id.astype(str)

dataall.head()

Unnamed: 0,geo_id,name,dp03_0001e,dp03_0001m,dp03_0001ma,dp03_0001ea,dp03_0002e,dp03_0002ea,dp03_0002m,dp03_0002ma,...,dp03_0135pma,dp03_0136pe,dp03_0136pm,dp03_0136pma,dp03_0136pea,dp03_0137pe,dp03_0137pm,dp03_0137pma,dp03_0137pea,unnamed: 1098
0,110100,"Census Tract 1101, Bexar County, Texas",2779,583,,,1778,,430,,...,,12.1,19.1,,,21.7,7.8,,,
1,110300,"Census Tract 1103, Bexar County, Texas",2327,455,,,1718,,433,,...,,5.0,6.8,,,32.7,12.3,,,
2,110500,"Census Tract 1105, Bexar County, Texas",1376,175,,,679,,128,,...,,65.3,8.8,,,73.3,12.8,,,
3,110600,"Census Tract 1106, Bexar County, Texas",5152,1603,,,939,,216,,...,,41.3,19.6,,,50.5,13.8,,,
4,110700,"Census Tract 1107, Bexar County, Texas",878,204,,,434,,196,,...,,25.0,13.5,,,66.0,18.2,,,


## Labels

These are the labels for the census data.

In [45]:
labels = pd.read_csv('ACSDP5Y2021.DP03-Column-Metadata.csv')
labels.columns = labels.columns.str.lower().str.replace('\W+','_', regex=True)
labels.column_name = labels.column_name.str.lower()

labels = labels [labels.label.str.startswith('Estimate')]
labels = labels [~labels.label.str.contains('ratio')]

In [46]:
groups = labels.label.str.split('!', expand=True)[2].unique()
groups

array(['EMPLOYMENT STATUS', 'COMMUTING TO WORK', 'OCCUPATION', 'INDUSTRY',
       'CLASS OF WORKER',
       'INCOME AND BENEFITS (IN 2021 INFLATION-ADJUSTED DOLLARS)',
       'HEALTH INSURANCE COVERAGE',
       'PERCENTAGE OF FAMILIES AND PEOPLE WHOSE INCOME IN THE PAST 12 MONTHS IS BELOW THE POVERTY LEVEL'],
      dtype=object)

## Functions to combine everything

In [47]:
def cal_counts(label):
    #pull out estimate and moa
    name_est = label + 'e'
    name_moa = label + 'm'

    #find the corresponding title to census label
    string = labels [labels.column_name == name_est].label.iloc[0]
#     label_title = re.findall('!!([\w\s]+)$',string)[0]
    label_title = string.split('!!')[-1]

    #isolate just one indicator
    data = dataall[['geo_id', name_est, name_moa]].copy()

    #join districts and indicator together
    df = dist.merge(data, how='inner', right_on='geo_id', left_on='tract')
    df = df.replace(np.nan, 0)

    #get count percentages for each census tract per district
    df['tru_count'] = df[name_est].astype(float) * df.percent
    df['tru_error'] = df[name_moa].astype(float) * df.percent

    #for each council district, sum up counts, and calculate new moe
    council_counts = []
    total_counts = df.tru_count.sum()

    for x in df.council.unique():
        subset = df [df.council == x]

        count = round(subset.tru_count.sum(), 2)
        count_perc = round(count / total_counts, 2)

        error = round((subset.tru_error ** 2).sum()**.5, 2)
        error_perc = round(math.sqrt(sum((subset.tru_error / 1.645)**2)) / count, 2)

        council_counts.append([x, count, count_perc, error, error_perc])

    #convert to dataframe and format
    dff = pd.DataFrame(council_counts).sort_values(0)
    dff.columns = [label_title, 'counts', 'count_perc', 'moe', 'moe_perc']
    dff = dff.set_index(label_title, drop=True)
#     print(dff)

    return dff

In [48]:
def cal_district_numbers(labels_check):
    district_totals = pd.DataFrame(np.arange(11))
    totals = []

    for label in labels_check:
        dff = cal_counts(label)
        name = dff.index.name
        dff.columns = [name + ' counts', name + ' counts percent', name + ' moe', name + ' moa percent']
        dff = dff.reset_index(drop=True)

        district_totals = pd.concat([district_totals,dff],axis=1)

    district_totals = district_totals.drop(columns=0)
    return district_totals.T

## Now calculate! 

### Employment status

In [40]:
grouping = groups[0]

current_labels = labels [labels.label.str.contains(grouping)]
# current_labels = current_labels.iloc[:-13]
labels_check = current_labels.column_name.str[:-1]

for titles in current_labels.label:
    print(titles)

Estimate!!EMPLOYMENT STATUS!!Population 16 years and over
Estimate!!EMPLOYMENT STATUS!!Population 16 years and over!!In labor force
Estimate!!EMPLOYMENT STATUS!!Population 16 years and over!!In labor force!!Civilian labor force
Estimate!!EMPLOYMENT STATUS!!Population 16 years and over!!In labor force!!Civilian labor force!!Employed
Estimate!!EMPLOYMENT STATUS!!Population 16 years and over!!In labor force!!Civilian labor force!!Unemployed
Estimate!!EMPLOYMENT STATUS!!Population 16 years and over!!In labor force!!Armed Forces
Estimate!!EMPLOYMENT STATUS!!Population 16 years and over!!Not in labor force
Estimate!!EMPLOYMENT STATUS!!Civilian labor force
Estimate!!EMPLOYMENT STATUS!!Civilian labor force!!Unemployment Rate
Estimate!!EMPLOYMENT STATUS!!Females 16 years and over
Estimate!!EMPLOYMENT STATUS!!Females 16 years and over!!In labor force
Estimate!!EMPLOYMENT STATUS!!Females 16 years and over!!In labor force!!Civilian labor force
Estimate!!EMPLOYMENT STATUS!!Females 16 years and over

In [41]:
final = cal_district_numbers(labels_check)
final

  count_perc = round(count / total_counts, 2)
  error_perc = round(math.sqrt(sum((subset.tru_error / 1.645)**2)) / count, 2)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
Population 16 years and over counts,404666.26,102401.21,108638.52,108029.32,97527.89,94468.13,127932.21,116881.10,136397.09,124085.79,118005.49
Population 16 years and over counts percent,0.26,0.07,0.07,0.07,0.06,0.06,0.08,0.08,0.09,0.08,0.08
Population 16 years and over moe,7035.94,2954.23,3296.56,3082.81,2941.07,3227.91,3943.50,3261.94,3185.89,3199.61,2733.26
Population 16 years and over moa percent,0.01,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.01,0.02,0.01
In labor force counts,273048.17,64192.99,69007.17,63874.05,62205.32,51814.21,91292.00,74454.17,98828.80,86856.84,80986.29
...,...,...,...,...,...,...,...,...,...,...,...
Own children of the householder 6 to 17 years moa percent,0.02,0.05,0.04,0.04,0.04,0.04,0.04,0.05,0.04,0.04,0.04
All parents in family in labor force counts,67196.23,9485.10,15383.61,16343.47,17157.05,12818.81,20912.43,15388.19,13506.60,17558.81,15679.71
All parents in family in labor force counts percent,0.30,0.04,0.07,0.07,0.08,0.06,0.09,0.07,0.06,0.08,0.07
All parents in family in labor force moe,2922.22,1072.17,1372.49,1268.53,1437.53,1110.16,1726.57,1420.85,1206.03,1626.90,1356.95


In [43]:
with pd.ExcelWriter('sa2020_ccd.xlsx') as writer:
    final.to_excel(writer, sheet_name='employment_status')

### Race

In [11]:
grouping = groups[1]
print(grouping)

current_labels = labels [labels.label.str.contains(grouping)]
current_labels = current_labels.iloc[:-16]
labels_check = current_labels.column_name.str[:-1]

for titles in current_labels.label:
    print(titles)

RACE
Estimate!!RACE!!Total population
Estimate!!RACE!!Total population!!One race
Estimate!!RACE!!Total population!!Two or more races
Estimate!!RACE!!Total population!!One race
Estimate!!RACE!!Total population!!One race!!White
Estimate!!RACE!!Total population!!One race!!Black or African American
Estimate!!RACE!!Total population!!One race!!American Indian and Alaska Native
Estimate!!RACE!!Total population!!One race!!American Indian and Alaska Native!!Cherokee tribal grouping
Estimate!!RACE!!Total population!!One race!!American Indian and Alaska Native!!Chippewa tribal grouping
Estimate!!RACE!!Total population!!One race!!American Indian and Alaska Native!!Navajo tribal grouping
Estimate!!RACE!!Total population!!One race!!American Indian and Alaska Native!!Sioux tribal grouping
Estimate!!RACE!!Total population!!One race!!Asian
Estimate!!RACE!!Total population!!One race!!Asian!!Asian Indian
Estimate!!RACE!!Total population!!One race!!Asian!!Chinese
Estimate!!RACE!!Total population!!One race

In [12]:
final = cal_district_numbers(labels_check)
final

  error_perc = round(math.sqrt(sum((subset.tru_error / 1.645)**2)) / count, 2)
  error_perc = round(math.sqrt(sum((subset.tru_error / 1.645)**2)) / count, 2)
  error_perc = round(math.sqrt(sum((subset.tru_error / 1.645)**2)) / count, 2)
  error_perc = round(math.sqrt(sum((subset.tru_error / 1.645)**2)) / count, 2)
  error_perc = round(math.sqrt(sum((subset.tru_error / 1.645)**2)) / count, 2)
  error_perc = round(math.sqrt(sum((subset.tru_error / 1.645)**2)) / count, 2)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
Total population counts,538323.84,125014.68,143060.08,140873.10,132566.94,125056.03,167877.44,147037.45,166422.74,155155.67,149134.04
Total population counts percent,0.27,0.06,0.07,0.07,0.07,0.06,0.08,0.07,0.08,0.08,0.07
Total population moe,9093.05,3793.28,4486.09,4040.89,4164.44,4441.76,5753.03,4444.57,4049.76,4723.08,3877.11
Total population moa percent,0.01,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.01,0.02,0.02
One race counts,456572.04,95274.41,122339.36,120482.49,113456.78,98391.94,132112.92,118362.07,141119.77,131732.20,128582.99
...,...,...,...,...,...,...,...,...,...,...,...
White and Asian moa percent,0.10,0.27,0.29,0.41,0.32,3.24,0.18,0.21,0.17,0.19,0.19
Black or African American and American Indian and Alaska Native counts,528.29,26.99,111.54,107.48,53.26,15.00,191.12,40.84,60.91,184.74,25.83
Black or African American and American Indian and Alaska Native counts percent,0.39,0.02,0.08,0.08,0.04,0.01,0.14,0.03,0.05,0.14,0.02
Black or African American and American Indian and Alaska Native moe,251.62,80.29,98.27,142.17,88.41,86.12,166.35,85.74,107.72,170.02,84.82


In [13]:
with pd.ExcelWriter('sa2020_ccd.xlsx') as writer:
    final.to_excel(writer, sheet_name='employment_status')

### More Race

In [14]:
grouping = groups[2]
print(grouping)

current_labels = labels [labels.label.str.contains(grouping)]
# current_labels = current_labels.iloc[:-16]
labels_check = current_labels.column_name.str[:-1]

for titles in current_labels.label:
    print(titles)

Race alone or in combination with one or more other races
Estimate!!Race alone or in combination with one or more other races!!Total population
Estimate!!Race alone or in combination with one or more other races!!Total population!!White
Estimate!!Race alone or in combination with one or more other races!!Total population!!Black or African American
Estimate!!Race alone or in combination with one or more other races!!Total population!!American Indian and Alaska Native
Estimate!!Race alone or in combination with one or more other races!!Total population!!Asian
Estimate!!Race alone or in combination with one or more other races!!Total population!!Native Hawaiian and Other Pacific Islander
Estimate!!Race alone or in combination with one or more other races!!Total population!!Some other race


In [15]:
final = cal_district_numbers(labels_check)
final

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
Total population counts,538323.84,125014.68,143060.08,140873.1,132566.94,125056.03,167877.44,147037.45,166422.74,155155.67,149134.04
Total population counts percent,0.27,0.06,0.07,0.07,0.07,0.06,0.08,0.07,0.08,0.08,0.07
Total population moe,9093.05,3793.28,4486.09,4040.89,4164.44,4441.76,5753.03,4444.57,4049.76,4723.08,3877.11
Total population moa percent,0.01,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.01,0.02,0.02
White counts,423107.5,109544.86,94313.67,109738.61,104863.11,109089.1,140467.02,121622.85,128371.34,131821.82,119569.12
White counts percent,0.27,0.07,0.06,0.07,0.07,0.07,0.09,0.08,0.08,0.08,0.08
White moe,8757.59,3809.03,4092.75,3803.89,4208.4,4322.78,5763.56,4009.74,3859.52,4472.51,3740.14
White moa percent,0.01,0.02,0.03,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02
Black or African American counts,66234.65,4295.24,32491.85,6662.21,7396.7,3314.27,14709.8,10863.63,13417.52,9943.92,16015.2
Black or African American counts percent,0.36,0.02,0.18,0.04,0.04,0.02,0.08,0.06,0.07,0.05,0.09


In [16]:
with pd.ExcelWriter('sa2020_ccd.xlsx') as writer:
    final.to_excel(writer, sheet_name='employment_status')

### Hispanic or Latino Race

In [17]:
grouping = groups[3]
print(grouping)

current_labels = labels [labels.label.str.contains(grouping)]
# current_labels = current_labels.iloc[:-16]
labels_check = current_labels.column_name.str[:-1]

for titles in current_labels.label:
    print(titles)

HISPANIC OR LATINO AND RACE
Estimate!!HISPANIC OR LATINO AND RACE!!Total population
Estimate!!HISPANIC OR LATINO AND RACE!!Total population!!Hispanic or Latino (of any race)
Estimate!!HISPANIC OR LATINO AND RACE!!Total population!!Hispanic or Latino (of any race)!!Mexican
Estimate!!HISPANIC OR LATINO AND RACE!!Total population!!Hispanic or Latino (of any race)!!Puerto Rican
Estimate!!HISPANIC OR LATINO AND RACE!!Total population!!Hispanic or Latino (of any race)!!Cuban
Estimate!!HISPANIC OR LATINO AND RACE!!Total population!!Hispanic or Latino (of any race)!!Other Hispanic or Latino
Estimate!!HISPANIC OR LATINO AND RACE!!Total population!!Not Hispanic or Latino
Estimate!!HISPANIC OR LATINO AND RACE!!Total population!!Not Hispanic or Latino!!White alone
Estimate!!HISPANIC OR LATINO AND RACE!!Total population!!Not Hispanic or Latino!!Black or African American alone
Estimate!!HISPANIC OR LATINO AND RACE!!Total population!!Not Hispanic or Latino!!American Indian and Alaska Native alone
Est

In [18]:
final = cal_district_numbers(labels_check)
final

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
Total population counts,538323.84,125014.68,143060.08,140873.10,132566.94,125056.03,167877.44,147037.45,166422.74,155155.67,149134.04
Total population counts percent,0.27,0.06,0.07,0.07,0.07,0.06,0.08,0.07,0.08,0.08,0.07
Total population moe,9093.05,3793.28,4486.09,4040.89,4164.44,4441.76,5753.03,4444.57,4049.76,4723.08,3877.11
Total population moa percent,0.01,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.01,0.02,0.02
Hispanic or Latino (of any race) counts,264506.88,93544.34,82778.29,116404.14,108722.19,115811.24,120243.64,96942.67,80812.27,65148.44,67082.90
...,...,...,...,...,...,...,...,...,...,...,...
Two races including Some other race moa percent,0.23,0.40,0.32,0.50,0.51,1.01,0.45,0.40,0.32,0.30,0.33
"Two races excluding Some other race, and Three or more races counts",15631.85,965.03,2635.65,848.36,802.95,317.08,3919.99,1651.00,4261.30,4120.55,3809.25
"Two races excluding Some other race, and Three or more races counts percent",0.40,0.02,0.07,0.02,0.02,0.01,0.10,0.04,0.11,0.11,0.10
"Two races excluding Some other race, and Three or more races moe",1557.21,269.28,588.28,327.15,240.88,156.80,1065.79,367.64,790.26,751.58,740.74


In [19]:
with pd.ExcelWriter('sa2020_ccd.xlsx') as writer:
    final.to_excel(writer, sheet_name='employment_status')

### Housing Units

In [20]:
grouping = groups[4]
print(grouping)

current_labels = labels [labels.label.str.contains(grouping)]
# current_labels = current_labels.iloc[:-16]
labels_check = current_labels.column_name.str[:-1]

for titles in current_labels.label:
    print(titles)

Total housing units
Estimate!!Total housing units


In [21]:
final = cal_district_numbers(labels_check)
final

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
Total housing units counts,188661.64,57123.02,57254.27,53818.39,43705.62,45540.96,61319.53,61352.55,77166.25,69283.6,62837.16
Total housing units counts percent,0.24,0.07,0.07,0.07,0.06,0.06,0.08,0.08,0.1,0.09,0.08
Total housing units moe,2501.64,1317.08,1360.16,1189.09,939.71,1163.11,1410.2,1362.88,1474.12,1445.48,1184.3
Total housing units moa percent,0.01,0.01,0.01,0.01,0.01,0.02,0.01,0.01,0.01,0.01,0.01


In [22]:
with pd.ExcelWriter('sa2020_ccd.xlsx') as writer:
    final.to_excel(writer, sheet_name='employment_status')

### Citizen, Voting Age

In [23]:
grouping = groups[5]
print(grouping)

current_labels = labels [labels.label.str.contains(grouping)]
current_labels = current_labels.iloc[:-2]
labels_check = current_labels.column_name.str[:-1]

for titles in current_labels.label:
    print(titles)

CITIZEN, VOTING AGE POPULATION
Estimate!!CITIZEN, VOTING AGE POPULATION!!Citizen, 18 and over population


In [24]:
final = cal_district_numbers(labels_check)
final

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
"Citizen, 18 and over population counts",365619.97,83863.01,93394.9,93487.42,81995.24,75691.26,113452.6,103135.74,118668.7,112022.4,107375.75
"Citizen, 18 and over population counts percent",0.27,0.06,0.07,0.07,0.06,0.06,0.08,0.08,0.09,0.08,0.08
"Citizen, 18 and over population moe",6535.02,2665.77,3123.29,2839.13,2569.17,2925.69,3633.71,2973.91,2967.35,3042.5,2552.95
"Citizen, 18 and over population moa percent",0.01,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.01


In [25]:
with pd.ExcelWriter('sa2020_ccd.xlsx') as writer:
    final.to_excel(writer, sheet_name='employment_status')