# Census Data for San Antonio Districts

In [1]:
import pandas as pd
import numpy as np
import os
import re
import math

## District Data

This sheet tells which district each census tract aligns with and what percentage of the district is included. District 0 are census tracts outside of San Antonio.

In [2]:
dist = pd.read_excel('CMO_CDTractsPrecincts_220406.xlsx')
dist = dist[['Council District',
             'Tract 2020 ID\n(Use with 2020 Decennial Census)',
             'Percentage of Tract 2020 in Council District']]
dist.columns = ['council','tract','percent']
dist = dist.iloc[1:].reset_index(drop=True)
dist.tract = dist.tract.astype(str)
dist.council = np.where(dist.council == 'Outside CoSA', 0, dist.council)
dist.head()

Unnamed: 0,council,tract,percent
0,1,110100,0.999534
1,1,110300,0.499153
2,1,110500,0.00015
3,1,110600,0.002611
4,1,110700,0.953113


## Demographic Data

This is all the demographic data pulled from the census. 

In [3]:
dataall = pd.read_csv('ACSDP5Y2021.DP05-Data.csv')
dataall.columns = dataall.columns.str.lower()

dataall = dataall.iloc[1:].reset_index(drop=True)
# dataall = dataall.replace('(X)', np.nan).replace('-',np.nan).replace('**',np.nan)
# dataall = dataall.dropna(axis=1, how='all')

dataall.geo_id = dataall.geo_id.str[-6:]
dataall.geo_id = dataall.geo_id.astype(str)

dataall.head()

Unnamed: 0,geo_id,name,dp05_0001e,dp05_0001ea,dp05_0001m,dp05_0001ma,dp05_0002e,dp05_0002m,dp05_0002ma,dp05_0002ea,...,dp05_0087pea,dp05_0088pe,dp05_0088pm,dp05_0088pma,dp05_0088pea,dp05_0089pe,dp05_0089pm,dp05_0089pma,dp05_0089pea,unnamed: 714
0,110100,"Census Tract 1101, Bexar County, Texas",2934,,565,,1698,365,,,...,,53.2,8.7,,,46.8,8.7,,,
1,110300,"Census Tract 1103, Bexar County, Texas",2930,,652,,1444,456,,,...,,49.8,6.8,,,50.2,6.8,,,
2,110500,"Census Tract 1105, Bexar County, Texas",2201,,309,,1030,198,,,...,,39.2,6.8,,,60.8,6.8,,,
3,110600,"Census Tract 1106, Bexar County, Texas",5384,,1620,,4117,1585,,,...,,77.6,8.5,,,22.4,8.5,,,
4,110700,"Census Tract 1107, Bexar County, Texas",982,,246,,525,148,,,...,,52.8,6.9,,,47.2,6.9,,,


## Labels

These are the labels for the census data.

In [4]:
labels = pd.read_csv('ACSDP5Y2021.DP05-Column-Metadata.csv')
labels.columns = labels.columns.str.lower().str.replace('\W+','_', regex=True)
labels.column_name = labels.column_name.str.lower()

labels = labels [labels.label.str.startswith('Estimate')]
labels = labels [~labels.label.str.contains('ratio')]

## Combine it all together

In [12]:
def cal_counts(label):
    #pull out estimate and moa
    name_est = label + 'e'
    name_moa = label + 'm'

    #find the corresponding title to census label
    string = labels [labels.column_name == name_est].label.iloc[0]
    label_title = re.findall('!!([\w\s]+)$',string)[0]

    #isolate just one indicator
    data = dataall[['geo_id', name_est, name_moa]].copy()

    #join districts and indicator together
    df = dist.merge(data, how='inner', right_on='geo_id', left_on='tract')
    df = df.replace(np.nan, 0)

    #get count percentages for each census tract per district
    df['tru_count'] = df[name_est].astype(float) * df.percent
    df['tru_error'] = df[name_moa].astype(float) * df.percent

    #for each council district, sum up counts, and calculate new moe
    council_counts = []
    total_counts = df.tru_count.sum()
    total_errors = ((df.tru_error**2).sum())**.5

    for x in df.council.unique():
        count = round(df [df.council == x].tru_count.sum(), 2)
        count_perc = round(count / total_counts, 2)

        error = round((df [df.council == x].tru_error ** 2).sum()**.5, 2)
        error_perc = round(error / total_errors, 2)

        council_counts.append([x, count, count_perc, error, error_perc])

    #convert to dataframe and format
    dff = pd.DataFrame(council_counts).sort_values(0)
    dff.columns = [label_title, 'counts', 'count_perc', 'moe', 'moe_perc']
    dff = dff.set_index(label_title, drop=True)

    return dff

In [6]:
grouping = 'SEX AND AGE'

labels = labels [labels.label.str.contains(grouping)]
labels = labels.iloc[:-13]
labels_check = labels.column_name.str[:-1]
labels

for label in labels.label:
    print(label)

Estimate!!SEX AND AGE!!Total population
Estimate!!SEX AND AGE!!Total population!!Male
Estimate!!SEX AND AGE!!Total population!!Female
Estimate!!SEX AND AGE!!Total population!!Under 5 years
Estimate!!SEX AND AGE!!Total population!!5 to 9 years
Estimate!!SEX AND AGE!!Total population!!10 to 14 years
Estimate!!SEX AND AGE!!Total population!!15 to 19 years
Estimate!!SEX AND AGE!!Total population!!20 to 24 years
Estimate!!SEX AND AGE!!Total population!!25 to 34 years
Estimate!!SEX AND AGE!!Total population!!35 to 44 years
Estimate!!SEX AND AGE!!Total population!!45 to 54 years
Estimate!!SEX AND AGE!!Total population!!55 to 59 years
Estimate!!SEX AND AGE!!Total population!!60 to 64 years
Estimate!!SEX AND AGE!!Total population!!65 to 74 years
Estimate!!SEX AND AGE!!Total population!!75 to 84 years
Estimate!!SEX AND AGE!!Total population!!85 years and over


In [14]:
district_totals = pd.DataFrame(np.arange(11))
totals = []

for label in labels_check:
    dff = cal_counts(label)
#     dff = dff.iloc[1:]
    
#     if label == labels_check.iloc[0]:
#         total_count = dff.counts.sum()
#         total_moa = round(math.sqrt(sum(dff.moe**2)))
        
#     totals.append([dff.index.name,
#                    dff.counts.sum(),
#                    round(math.sqrt(sum(dff.moe**2))),
#                    round(dff.counts.sum()/total_count,3)])
    name = dff.index.name
    dff.columns = [name + ' counts', name + ' counts percent', name + ' moe', name + ' moa percent']
    dff = dff.reset_index(drop=True)

    district_totals = pd.concat([district_totals,dff],axis=1)
    
# df_total = pd.DataFrame(totals, columns=['title','count','moe','percent'])
district_totals = district_totals.drop(columns=0)

In [15]:
dff

Unnamed: 0,85 years and over counts,85 years and over counts percent,85 years and over moe,85 years and over moa percent
0,5495.86,0.2,690.83,0.46
1,2440.87,0.09,404.75,0.27
2,1486.36,0.05,233.34,0.16
3,1892.45,0.07,355.55,0.24
4,1401.12,0.05,288.08,0.19
5,2356.97,0.09,405.54,0.27
6,1423.56,0.05,356.83,0.24
7,3394.51,0.13,684.98,0.46
8,2270.17,0.08,464.81,0.31
9,2159.46,0.08,366.01,0.25


In [16]:
# df_total

In [19]:
district_totals

Unnamed: 0,Total population counts,Total population counts percent,Total population moe,Total population moa percent,Male counts,Male counts percent,Male moe,Male moa percent,Female counts,Female counts percent,...,65 to 74 years moe,65 to 74 years moa percent,75 to 84 years counts,75 to 84 years counts percent,75 to 84 years moe,75 to 84 years moa percent,85 years and over counts,85 years and over counts percent,85 years and over moe,85 years and over moa percent
0,538323.84,0.27,9093.05,0.55,269224.97,0.27,5418.28,0.54,269098.87,0.27,...,1665.13,0.54,15636.13,0.24,960.35,0.48,5495.86,0.2,690.83,0.46
1,125014.68,0.06,3793.28,0.23,60939.87,0.06,2149.34,0.21,64074.81,0.06,...,740.97,0.24,4875.83,0.07,569.78,0.28,2440.87,0.09,404.75,0.27
2,143060.08,0.07,4486.09,0.27,71401.0,0.07,2609.73,0.26,71659.08,0.07,...,782.79,0.26,4228.27,0.06,431.11,0.21,1486.36,0.05,233.34,0.16
3,140873.1,0.07,4040.89,0.24,70799.65,0.07,2556.41,0.26,70073.46,0.07,...,884.16,0.29,5309.37,0.08,561.12,0.28,1892.45,0.07,355.55,0.24
4,132566.94,0.07,4164.44,0.25,65484.7,0.07,2364.47,0.24,67082.24,0.07,...,625.07,0.2,4206.69,0.06,563.59,0.28,1401.12,0.05,288.08,0.19
5,125056.03,0.06,4441.76,0.27,63334.33,0.06,2863.35,0.29,61721.7,0.06,...,757.25,0.25,4808.97,0.07,442.28,0.22,2356.97,0.09,405.54,0.27
6,167877.44,0.08,5753.03,0.35,82438.15,0.08,3359.64,0.34,85439.29,0.09,...,979.26,0.32,4007.55,0.06,552.57,0.27,1423.56,0.05,356.83,0.24
7,147037.45,0.07,4444.57,0.27,71437.55,0.07,2643.22,0.26,75599.9,0.08,...,820.38,0.27,5668.48,0.09,522.94,0.26,3394.51,0.13,684.98,0.46
8,166422.74,0.08,4049.76,0.24,84107.49,0.09,2766.16,0.28,82315.26,0.08,...,831.62,0.27,5026.89,0.08,532.95,0.26,2270.17,0.08,464.81,0.31
9,155155.67,0.08,4723.08,0.28,76641.17,0.08,2934.49,0.29,78514.5,0.08,...,928.42,0.3,6647.89,0.1,763.68,0.38,2159.46,0.08,366.01,0.25


In [20]:
for x in district_totals.columns [district_totals.columns.str.contains('percent')]:
    print(round(district_totals [x].sum(),2), x)

0.98 Total population counts percent
3.18 Total population moa percent
0.99 Male counts percent
3.19 Male moa percent
1.01 Female counts percent
3.18 Female moa percent
1.0 Under 5 years counts percent
3.19 Under 5 years moa percent
1.01 5 to 9 years counts percent
3.13 5 to 9 years moa percent
1.0 10 to 14 years counts percent
3.14 10 to 14 years moa percent
1.01 15 to 19 years counts percent
3.12 15 to 19 years moa percent
1.0 20 to 24 years counts percent
3.13 20 to 24 years moa percent
0.99 25 to 34 years counts percent
3.19 25 to 34 years moa percent
1.01 35 to 44 years counts percent
3.17 35 to 44 years moa percent
1.01 45 to 54 years counts percent
3.16 45 to 54 years moa percent
1.0 55 to 59 years counts percent
3.2 55 to 59 years moa percent
1.0 60 to 64 years counts percent
3.11 60 to 64 years moa percent
1.01 65 to 74 years counts percent
3.18 65 to 74 years moa percent
1.0 75 to 84 years counts percent
3.21 75 to 84 years moa percent
0.99 85 years and over counts percent
3.

In [22]:
district_totals.T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
Total population counts,538323.84,125014.68,143060.08,140873.10,132566.94,125056.03,167877.44,147037.45,166422.74,155155.67,149134.04
Total population counts percent,0.27,0.06,0.07,0.07,0.07,0.06,0.08,0.07,0.08,0.08,0.07
Total population moe,9093.05,3793.28,4486.09,4040.89,4164.44,4441.76,5753.03,4444.57,4049.76,4723.08,3877.11
Total population moa percent,0.55,0.23,0.27,0.24,0.25,0.27,0.35,0.27,0.24,0.28,0.23
Male counts,269224.97,60939.87,71401.00,70799.65,65484.70,63334.33,82438.15,71437.55,84107.49,76641.17,71491.14
...,...,...,...,...,...,...,...,...,...,...,...
75 to 84 years moa percent,0.48,0.28,0.21,0.28,0.28,0.22,0.27,0.26,0.26,0.38,0.29
85 years and over counts,5495.86,2440.87,1486.36,1892.45,1401.12,2356.97,1423.56,3394.51,2270.17,2159.46,2735.68
85 years and over counts percent,0.20,0.09,0.05,0.07,0.05,0.09,0.05,0.13,0.08,0.08,0.10
85 years and over moe,690.83,404.75,233.34,355.55,288.08,405.54,356.83,684.98,464.81,366.01,459.62


In [24]:
district_totals.T.to_excel('CCD_test2.xlsx')