# Census Data for San Antonio Districts

In [1]:
import pandas as pd
import numpy as np
import os
import re
import math

## District Data

This sheet tells which district each census tract aligns with and what percentage of the district is included. District 0 are census tracts outside of San Antonio.

In [2]:
dist = pd.read_excel('CMO_CDTractsPrecincts_220406.xlsx')
dist = dist[['Council District',
             'Tract 2020 ID\n(Use with 2020 Decennial Census)',
             'Percentage of Tract 2020 in Council District']]
dist.columns = ['council','tract','percent']
dist = dist.iloc[1:].reset_index(drop=True)
dist.tract = dist.tract.astype(str)
dist.council = np.where(dist.council == 'Outside CoSA', 0, dist.council)
dist.head()

Unnamed: 0,council,tract,percent
0,1,110100,0.999534
1,1,110300,0.499153
2,1,110500,0.00015
3,1,110600,0.002611
4,1,110700,0.953113


## Demographic Data

This is all the demographic data pulled from the census. 

In [3]:
dataall = pd.read_csv('ACSDP5Y2021.DP05-Data.csv')
dataall.columns = dataall.columns.str.lower()

dataall = dataall.iloc[1:].reset_index(drop=True)
# dataall = dataall.replace('(X)', np.nan).replace('-',np.nan).replace('**',np.nan)
# dataall = dataall.dropna(axis=1, how='all')

dataall.geo_id = dataall.geo_id.str[-6:]
dataall.geo_id = dataall.geo_id.astype(str)

dataall.head()

Unnamed: 0,geo_id,name,dp05_0001e,dp05_0001ea,dp05_0001m,dp05_0001ma,dp05_0002e,dp05_0002m,dp05_0002ma,dp05_0002ea,...,dp05_0087pea,dp05_0088pe,dp05_0088pm,dp05_0088pma,dp05_0088pea,dp05_0089pe,dp05_0089pm,dp05_0089pma,dp05_0089pea,unnamed: 714
0,110100,"Census Tract 1101, Bexar County, Texas",2934,,565,,1698,365,,,...,,53.2,8.7,,,46.8,8.7,,,
1,110300,"Census Tract 1103, Bexar County, Texas",2930,,652,,1444,456,,,...,,49.8,6.8,,,50.2,6.8,,,
2,110500,"Census Tract 1105, Bexar County, Texas",2201,,309,,1030,198,,,...,,39.2,6.8,,,60.8,6.8,,,
3,110600,"Census Tract 1106, Bexar County, Texas",5384,,1620,,4117,1585,,,...,,77.6,8.5,,,22.4,8.5,,,
4,110700,"Census Tract 1107, Bexar County, Texas",982,,246,,525,148,,,...,,52.8,6.9,,,47.2,6.9,,,


## Labels

These are the labels for the census data.

In [141]:
labels = pd.read_csv('ACSDP5Y2021.DP05-Column-Metadata.csv')
labels.columns = labels.columns.str.lower().str.replace('\W+','_', regex=True)
labels.column_name = labels.column_name.str.lower()

labels = labels [labels.label.str.startswith('Estimate')]
labels = labels [~labels.label.str.contains('ratio')]

In [142]:
groups = labels.label.str.split('!', expand=True)[2].unique()
groups

array(['SEX AND AGE', 'RACE',
       'Race alone or in combination with one or more other races',
       'HISPANIC OR LATINO AND RACE', 'Total housing units',
       'CITIZEN, VOTING AGE POPULATION'], dtype=object)

## Functions to combine everything

In [143]:
def cal_counts(label):
    #pull out estimate and moa
    name_est = label + 'e'
    name_moa = label + 'm'

    #find the corresponding title to census label
    string = labels [labels.column_name == name_est].label.iloc[0]
    label_title = re.findall('!!([\w\s]+)$',string)[0]

    #isolate just one indicator
    data = dataall[['geo_id', name_est, name_moa]].copy()

    #join districts and indicator together
    df = dist.merge(data, how='inner', right_on='geo_id', left_on='tract')
    df = df.replace(np.nan, 0)

    #get count percentages for each census tract per district
    df['tru_count'] = df[name_est].astype(float) * df.percent
    df['tru_error'] = df[name_moa].astype(float) * df.percent

    #for each council district, sum up counts, and calculate new moe
    council_counts = []
    total_counts = df.tru_count.sum()

    for x in df.council.unique():
        subset = df [df.council == x]

        count = round(subset.tru_count.sum(), 2)
        count_perc = round(count / total_counts, 2)

        error = round((subset.tru_error ** 2).sum()**.5, 2)
        error_perc = round(math.sqrt(sum((subset.tru_error / 1.645)**2)) / count, 2)

        council_counts.append([x, count, count_perc, error, error_perc])

    #convert to dataframe and format
    dff = pd.DataFrame(council_counts).sort_values(0)
    dff.columns = [label_title, 'counts', 'count_perc', 'moe', 'moe_perc']
    dff = dff.set_index(label_title, drop=True)
    print(dff)

    return dff

In [144]:
def cal_district_numbers(labels_check):
    district_totals = pd.DataFrame(np.arange(11))
    totals = []

    for label in labels_check:
        dff = cal_counts(label)
        name = dff.index.name
        dff.columns = [name + ' counts', name + ' counts percent', name + ' moe', name + ' moa percent']
        dff = dff.reset_index(drop=True)

        district_totals = pd.concat([district_totals,dff],axis=1)

    district_totals = district_totals.drop(columns=0)
    return district_totals.T

## Now calculate! 

### Sex and Age

In [145]:
grouping = groups[0]

current_labels = labels [labels.label.str.contains(grouping)]
current_labels = current_labels.iloc[:-13]
labels_check = current_labels.column_name.str[:-1]

for titles in current_labels.label:
    print(titles)
    
cal_district_numbers(labels_check)

Estimate!!SEX AND AGE!!Total population
Estimate!!SEX AND AGE!!Total population!!Male
Estimate!!SEX AND AGE!!Total population!!Female
Estimate!!SEX AND AGE!!Total population!!Under 5 years
Estimate!!SEX AND AGE!!Total population!!5 to 9 years
Estimate!!SEX AND AGE!!Total population!!10 to 14 years
Estimate!!SEX AND AGE!!Total population!!15 to 19 years
Estimate!!SEX AND AGE!!Total population!!20 to 24 years
Estimate!!SEX AND AGE!!Total population!!25 to 34 years
Estimate!!SEX AND AGE!!Total population!!35 to 44 years
Estimate!!SEX AND AGE!!Total population!!45 to 54 years
Estimate!!SEX AND AGE!!Total population!!55 to 59 years
Estimate!!SEX AND AGE!!Total population!!60 to 64 years
Estimate!!SEX AND AGE!!Total population!!65 to 74 years
Estimate!!SEX AND AGE!!Total population!!75 to 84 years
Estimate!!SEX AND AGE!!Total population!!85 years and over
                     counts  count_perc      moe  moe_perc
Total population                                          
0                 53

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
Total population counts,538323.84,125014.68,143060.08,140873.10,132566.94,125056.03,167877.44,147037.45,166422.74,155155.67,149134.04
Total population counts percent,0.27,0.06,0.07,0.07,0.07,0.06,0.08,0.07,0.08,0.08,0.07
Total population moe,9093.05,3793.28,4486.09,4040.89,4164.44,4441.76,5753.03,4444.57,4049.76,4723.08,3877.11
Total population moa percent,0.01,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.01,0.02,0.02
Male counts,269224.97,60939.87,71401.00,70799.65,65484.70,63334.33,82438.15,71437.55,84107.49,76641.17,71491.14
...,...,...,...,...,...,...,...,...,...,...,...
75 to 84 years moa percent,0.04,0.07,0.06,0.06,0.08,0.06,0.08,0.06,0.06,0.07,0.06
85 years and over counts,5495.86,2440.87,1486.36,1892.45,1401.12,2356.97,1423.56,3394.51,2270.17,2159.46,2735.68
85 years and over counts percent,0.20,0.09,0.05,0.07,0.05,0.09,0.05,0.13,0.08,0.08,0.10
85 years and over moe,690.83,404.75,233.34,355.55,288.08,405.54,356.83,684.98,464.81,366.01,459.62


In [146]:
# export

### Race

In [147]:
grouping = groups[1]
print(grouping)

current_labels = labels [labels.label.str.contains(grouping)]
current_labels = current_labels.iloc[:-16]
labels_check = current_labels.column_name.str[:-1]

for titles in current_labels.label:
    print(titles)
    
cal_district_numbers(labels_check)

RACE
Estimate!!RACE!!Total population
Estimate!!RACE!!Total population!!One race
Estimate!!RACE!!Total population!!Two or more races
Estimate!!RACE!!Total population!!One race
Estimate!!RACE!!Total population!!One race!!White
Estimate!!RACE!!Total population!!One race!!Black or African American
Estimate!!RACE!!Total population!!One race!!American Indian and Alaska Native
Estimate!!RACE!!Total population!!One race!!American Indian and Alaska Native!!Cherokee tribal grouping
Estimate!!RACE!!Total population!!One race!!American Indian and Alaska Native!!Chippewa tribal grouping
Estimate!!RACE!!Total population!!One race!!American Indian and Alaska Native!!Navajo tribal grouping
Estimate!!RACE!!Total population!!One race!!American Indian and Alaska Native!!Sioux tribal grouping
Estimate!!RACE!!Total population!!One race!!Asian
Estimate!!RACE!!Total population!!One race!!Asian!!Asian Indian
Estimate!!RACE!!Total population!!One race!!Asian!!Chinese
Estimate!!RACE!!Total population!!One race

10               1601.94        0.13  506.77      0.19
                                                    counts  count_perc  \
Black or African American and American Indian a...                       
0                                                   528.29        0.39   
1                                                    26.99        0.02   
2                                                   111.54        0.08   
3                                                   107.48        0.08   
4                                                    53.26        0.04   
5                                                    15.00        0.01   
6                                                   191.12        0.14   
7                                                    40.84        0.03   
8                                                    60.91        0.05   
9                                                   184.74        0.14   
10                                                   25.8

  error_perc = round(math.sqrt(sum((subset.tru_error / 1.645)**2)) / count, 2)
  error_perc = round(math.sqrt(sum((subset.tru_error / 1.645)**2)) / count, 2)
  error_perc = round(math.sqrt(sum((subset.tru_error / 1.645)**2)) / count, 2)
  error_perc = round(math.sqrt(sum((subset.tru_error / 1.645)**2)) / count, 2)
  error_perc = round(math.sqrt(sum((subset.tru_error / 1.645)**2)) / count, 2)
  error_perc = round(math.sqrt(sum((subset.tru_error / 1.645)**2)) / count, 2)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
Total population counts,538323.84,125014.68,143060.08,140873.10,132566.94,125056.03,167877.44,147037.45,166422.74,155155.67,149134.04
Total population counts percent,0.27,0.06,0.07,0.07,0.07,0.06,0.08,0.07,0.08,0.08,0.07
Total population moe,9093.05,3793.28,4486.09,4040.89,4164.44,4441.76,5753.03,4444.57,4049.76,4723.08,3877.11
Total population moa percent,0.01,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.01,0.02,0.02
One race counts,456572.04,95274.41,122339.36,120482.49,113456.78,98391.94,132112.92,118362.07,141119.77,131732.20,128582.99
...,...,...,...,...,...,...,...,...,...,...,...
White and Asian moa percent,0.10,0.27,0.29,0.41,0.32,3.24,0.18,0.21,0.17,0.19,0.19
Black or African American and American Indian and Alaska Native counts,528.29,26.99,111.54,107.48,53.26,15.00,191.12,40.84,60.91,184.74,25.83
Black or African American and American Indian and Alaska Native counts percent,0.39,0.02,0.08,0.08,0.04,0.01,0.14,0.03,0.05,0.14,0.02
Black or African American and American Indian and Alaska Native moe,251.62,80.29,98.27,142.17,88.41,86.12,166.35,85.74,107.72,170.02,84.82


In [148]:
groups

array(['SEX AND AGE', 'RACE',
       'Race alone or in combination with one or more other races',
       'HISPANIC OR LATINO AND RACE', 'Total housing units',
       'CITIZEN, VOTING AGE POPULATION'], dtype=object)

In [151]:
grouping = groups[2]
print(grouping)

current_labels = labels [labels.label.str.contains(grouping)]
current_labels = current_labels.iloc[:-16]
labels_check = current_labels.column_name.str[:-1]
print(labels_check)

for titles in current_labels.label:
    print(titles)
    
cal_district_numbers(labels_check)

Race alone or in combination with one or more other races
Series([], Name: column_name, dtype: object)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10


In [152]:
labels.iloc[55:80]

Unnamed: 0,column_name,label
234,dp05_0059e,Estimate!!RACE!!Total population!!Two or more ...
238,dp05_0060e,Estimate!!RACE!!Total population!!Two or more ...
242,dp05_0061e,Estimate!!RACE!!Total population!!Two or more ...
246,dp05_0062e,Estimate!!RACE!!Total population!!Two or more ...
250,dp05_0063e,Estimate!!Race alone or in combination with on...
254,dp05_0064e,Estimate!!Race alone or in combination with on...
258,dp05_0065e,Estimate!!Race alone or in combination with on...
262,dp05_0066e,Estimate!!Race alone or in combination with on...
266,dp05_0067e,Estimate!!Race alone or in combination with on...
270,dp05_0068e,Estimate!!Race alone or in combination with on...


In [153]:
dataall['dp05_0063e']

0      2934
1      2930
2      2201
3      5384
4       982
       ... 
370       0
371    1182
372       0
373       9
374    1085
Name: dp05_0063e, Length: 375, dtype: object