# Census Data for San Antonio Districts

In [1]:
import pandas as pd
import numpy as np
import os
import re
import math

## District Data

This sheet tells which district each census tract aligns with and what percentage of the district is included. District 0 are census tracts outside of San Antonio.

In [2]:
dist = pd.read_excel('CMO_CDTractsPrecincts_220406.xlsx')
dist = dist[['Council District',
             'Tract 2020 ID\n(Use with 2020 Decennial Census)',
             'Percentage of Tract 2020 in Council District']]
dist.columns = ['council','tract','percent']
dist = dist.iloc[1:].reset_index(drop=True)
dist.tract = dist.tract.astype(str)
dist.council = np.where(dist.council == 'Outside CoSA', 0, dist.council)
dist.head()

Unnamed: 0,council,tract,percent
0,1,110100,0.999534
1,1,110300,0.499153
2,1,110500,0.00015
3,1,110600,0.002611
4,1,110700,0.953113


## Census Data

This is all the data pulled from the census. 

In [3]:
filename = 'ACSDP5Y2021.DP05-Data.csv'
filename2 = 'ACSDP5Y2021.DP05-Column-Metadata.csv'

In [4]:
dataall = pd.read_csv(filename)
dataall.columns = dataall.columns.str.lower()

dataall = dataall.iloc[1:].reset_index(drop=True)
# dataall = dataall.replace('(X)', np.nan).replace('-',np.nan).replace('**',np.nan)
# dataall = dataall.dropna(axis=1, how='all')

dataall.geo_id = dataall.geo_id.str[-6:]
dataall.geo_id = dataall.geo_id.astype(str)
dataall = dataall.drop(columns='name')

dataall.head()

Unnamed: 0,geo_id,name,dp05_0001e,dp05_0001ea,dp05_0001m,dp05_0001ma,dp05_0002e,dp05_0002m,dp05_0002ma,dp05_0002ea,...,dp05_0087pea,dp05_0088pe,dp05_0088pm,dp05_0088pma,dp05_0088pea,dp05_0089pe,dp05_0089pm,dp05_0089pma,dp05_0089pea,unnamed: 714
0,110100,"Census Tract 1101, Bexar County, Texas",2934,,565,,1698,365,,,...,,53.2,8.7,,,46.8,8.7,,,
1,110300,"Census Tract 1103, Bexar County, Texas",2930,,652,,1444,456,,,...,,49.8,6.8,,,50.2,6.8,,,
2,110500,"Census Tract 1105, Bexar County, Texas",2201,,309,,1030,198,,,...,,39.2,6.8,,,60.8,6.8,,,
3,110600,"Census Tract 1106, Bexar County, Texas",5384,,1620,,4117,1585,,,...,,77.6,8.5,,,22.4,8.5,,,
4,110700,"Census Tract 1107, Bexar County, Texas",982,,246,,525,148,,,...,,52.8,6.9,,,47.2,6.9,,,


## Labels

These are the labels for the census data.

In [5]:
labels = pd.read_csv(filename2)
labels.columns = labels.columns.str.lower().str.replace('\W+','_', regex=True)
labels.column_name = labels.column_name.str.lower()

labels = labels [labels.label.str.startswith('Estimate')]
labels = labels [~labels.label.str.contains('ratio')]

groups = labels.label.str.split('!', expand=True)[2].unique()
groups

array(['SEX AND AGE', 'RACE',
       'Race alone or in combination with one or more other races',
       'HISPANIC OR LATINO AND RACE', 'Total housing units',
       'CITIZEN, VOTING AGE POPULATION'], dtype=object)

## Functions to combine everything

In [6]:
def cal_counts(label):
    #pull out estimate and moa
    name_est = label + 'e'
    name_moa = label + 'm'

    #find the corresponding title to census label
    string = labels [labels.column_name == name_est].label.iloc[0]
#     label_title = re.findall('!!([\w\s]+)$',string)[0]
    if not string.split('!!')[3:]:
        label_title = string.split('!!')[-1]
    else:
        label_title = '!!'.join(string.split('!!')[3:])

    #isolate just one indicator
    data = dataall[['geo_id', name_est, name_moa]].copy()

    #join districts and indicator together
    df = dist.merge(data, how='inner', right_on='geo_id', left_on='tract')
    df = df.replace(np.nan, 0)

    #get count percentages for each census tract per district
    df['tru_count'] = df[name_est].astype(float) * df.percent
    df['tru_error'] = df[name_moa].astype(float) * df.percent

    #for each council district, sum up counts, and calculate new moe
    council_counts = []
    total_counts = df.tru_count.sum()

    for x in df.council.unique():
        subset = df [df.council == x]

        count = round(subset.tru_count.sum(), 2)
        count_perc = round(count / total_counts, 2)

        error = round((subset.tru_error ** 2).sum()**.5, 2)
        error_perc = round(math.sqrt(sum((subset.tru_error / 1.645)**2)) / count, 2)

        council_counts.append([x, count, count_perc, error, error_perc])

    #convert to dataframe and format
    dff = pd.DataFrame(council_counts).sort_values(0)
    dff.columns = [label_title, 'counts', 'count_perc', 'moe', 'moe_perc']
    dff = dff.set_index(label_title, drop=True)
#     print(dff)

    return dff

In [7]:
def cal_district_numbers(labels_check):
    district_totals = pd.DataFrame(np.arange(11))
    totals = []

    for label in labels_check:
        dff = cal_counts(label)
        name = dff.index.name
        dff.columns = [name + ' counts', name + ' counts percent', name + ' moe', name + ' moa percent']
        dff = dff.reset_index(drop=True)

        district_totals = pd.concat([district_totals,dff],axis=1)

    district_totals = district_totals.drop(columns=0)
    return district_totals.T

## Now calculate! 

### Total Population

In [8]:
grouping = groups[0]

current_labels = labels [labels.label.str.contains(grouping)]
current_labels = current_labels.iloc[:1]
labels_check = current_labels.column_name.str[:-1]

for titles in current_labels.label:
    print(titles)

Estimate!!SEX AND AGE!!Total population


In [9]:
final0 = cal_district_numbers(labels_check)
final0

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
Total population counts,538323.84,125014.68,143060.08,140873.1,132566.94,125056.03,167877.44,147037.45,166422.74,155155.67,149134.04
Total population counts percent,0.27,0.06,0.07,0.07,0.07,0.06,0.08,0.07,0.08,0.08,0.07
Total population moe,9093.05,3793.28,4486.09,4040.89,4164.44,4441.76,5753.03,4444.57,4049.76,4723.08,3877.11
Total population moa percent,0.01,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.01,0.02,0.02


### Sex and Age

In [10]:
grouping = groups[0]

current_labels = labels [labels.label.str.contains(grouping)]
current_labels = current_labels.iloc[1:]
current_labels = current_labels.iloc[:-13]
labels_check = current_labels.column_name.str[:-1]

for titles in current_labels.label:
    print(titles)

Estimate!!SEX AND AGE!!Total population!!Male
Estimate!!SEX AND AGE!!Total population!!Female
Estimate!!SEX AND AGE!!Total population!!Under 5 years
Estimate!!SEX AND AGE!!Total population!!5 to 9 years
Estimate!!SEX AND AGE!!Total population!!10 to 14 years
Estimate!!SEX AND AGE!!Total population!!15 to 19 years
Estimate!!SEX AND AGE!!Total population!!20 to 24 years
Estimate!!SEX AND AGE!!Total population!!25 to 34 years
Estimate!!SEX AND AGE!!Total population!!35 to 44 years
Estimate!!SEX AND AGE!!Total population!!45 to 54 years
Estimate!!SEX AND AGE!!Total population!!55 to 59 years
Estimate!!SEX AND AGE!!Total population!!60 to 64 years
Estimate!!SEX AND AGE!!Total population!!65 to 74 years
Estimate!!SEX AND AGE!!Total population!!75 to 84 years
Estimate!!SEX AND AGE!!Total population!!85 years and over


In [11]:
final1 = cal_district_numbers(labels_check)
final1

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
Male counts,269224.97,60939.87,71401.0,70799.65,65484.7,63334.33,82438.15,71437.55,84107.49,76641.17,71491.14
Male counts percent,0.27,0.06,0.07,0.07,0.07,0.06,0.08,0.07,0.09,0.08,0.07
Male moe,5418.28,2149.34,2609.73,2556.41,2364.47,2863.35,3359.64,2643.22,2766.16,2934.49,2168.79
Male moa percent,0.01,0.02,0.02,0.02,0.02,0.03,0.02,0.02,0.02,0.02,0.02
Female counts,269098.87,64074.81,71659.08,70073.46,67082.24,61721.7,85439.29,75599.9,82315.26,78514.5,77642.9
Female counts percent,0.27,0.06,0.07,0.07,0.07,0.06,0.09,0.08,0.08,0.08,0.08
Female moe,5298.0,2404.66,2629.87,2228.66,2492.2,2406.98,3332.97,2515.49,2434.23,2654.38,2522.2
Female moa percent,0.01,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02
Under 5 years counts,38539.69,7610.08,11313.28,9199.65,9717.95,9483.11,12491.07,9346.64,10367.4,8217.69,9974.42
Under 5 years counts percent,0.28,0.06,0.08,0.07,0.07,0.07,0.09,0.07,0.08,0.06,0.07


### Race

In [12]:
grouping = groups[1]
print(grouping)

current_labels = labels [labels.label.str.contains(grouping)]
current_labels = current_labels.iloc[1:]
current_labels = current_labels.iloc[:-16]
labels_check = current_labels.column_name.str[:-1]

for titles in current_labels.label:
    print(titles)

RACE
Estimate!!RACE!!Total population!!One race
Estimate!!RACE!!Total population!!Two or more races
Estimate!!RACE!!Total population!!One race
Estimate!!RACE!!Total population!!One race!!White
Estimate!!RACE!!Total population!!One race!!Black or African American
Estimate!!RACE!!Total population!!One race!!American Indian and Alaska Native
Estimate!!RACE!!Total population!!One race!!American Indian and Alaska Native!!Cherokee tribal grouping
Estimate!!RACE!!Total population!!One race!!American Indian and Alaska Native!!Chippewa tribal grouping
Estimate!!RACE!!Total population!!One race!!American Indian and Alaska Native!!Navajo tribal grouping
Estimate!!RACE!!Total population!!One race!!American Indian and Alaska Native!!Sioux tribal grouping
Estimate!!RACE!!Total population!!One race!!Asian
Estimate!!RACE!!Total population!!One race!!Asian!!Asian Indian
Estimate!!RACE!!Total population!!One race!!Asian!!Chinese
Estimate!!RACE!!Total population!!One race!!Asian!!Filipino
Estimate!!RACE!

In [13]:
final2 = cal_district_numbers(labels_check)    
final2



Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
One race counts,456572.04,95274.41,122339.36,120482.49,113456.78,98391.94,132112.92,118362.07,141119.77,131732.20,128582.99
One race counts percent,0.28,0.06,0.07,0.07,0.07,0.06,0.08,0.07,0.09,0.08,0.08
One race moe,8024.58,2944.94,3750.48,3824.06,3692.82,3469.65,4638.05,3745.41,3589.11,3938.01,3501.92
One race moa percent,0.01,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02
Two or more races counts,81751.80,29740.26,20720.71,20390.61,19110.16,26664.09,35764.51,28675.38,25302.97,23423.46,20551.05
...,...,...,...,...,...,...,...,...,...,...,...
Two or more races!!White and Asian moa percent,0.10,0.27,0.29,0.41,0.32,3.24,0.18,0.21,0.17,0.19,0.19
Two or more races!!Black or African American and American Indian and Alaska Native counts,528.29,26.99,111.54,107.48,53.26,15.00,191.12,40.84,60.91,184.74,25.83
Two or more races!!Black or African American and American Indian and Alaska Native counts percent,0.39,0.02,0.08,0.08,0.04,0.01,0.14,0.03,0.05,0.14,0.02
Two or more races!!Black or African American and American Indian and Alaska Native moe,251.62,80.29,98.27,142.17,88.41,86.12,166.35,85.74,107.72,170.02,84.82


### More Race

In [14]:
grouping = groups[2]
print(grouping)

current_labels = labels [labels.label.str.contains(grouping)]
current_labels = current_labels.iloc[1:]
# current_labels = current_labels.iloc[:-16]
labels_check = current_labels.column_name.str[:-1]

for titles in current_labels.label:
    print(titles)

Race alone or in combination with one or more other races
Estimate!!Race alone or in combination with one or more other races!!Total population!!White
Estimate!!Race alone or in combination with one or more other races!!Total population!!Black or African American
Estimate!!Race alone or in combination with one or more other races!!Total population!!American Indian and Alaska Native
Estimate!!Race alone or in combination with one or more other races!!Total population!!Asian
Estimate!!Race alone or in combination with one or more other races!!Total population!!Native Hawaiian and Other Pacific Islander
Estimate!!Race alone or in combination with one or more other races!!Total population!!Some other race


In [15]:
final3 = cal_district_numbers(labels_check)
final3

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
White counts,423107.5,109544.86,94313.67,109738.61,104863.11,109089.1,140467.02,121622.85,128371.34,131821.82,119569.12
White counts percent,0.27,0.07,0.06,0.07,0.07,0.07,0.09,0.08,0.08,0.08,0.08
White moe,8757.59,3809.03,4092.75,3803.89,4208.4,4322.78,5763.56,4009.74,3859.52,4472.51,3740.14
White moa percent,0.01,0.02,0.03,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02
Black or African American counts,66234.65,4295.24,32491.85,6662.21,7396.7,3314.27,14709.8,10863.63,13417.52,9943.92,16015.2
Black or African American counts percent,0.36,0.02,0.18,0.04,0.04,0.02,0.08,0.06,0.07,0.05,0.09
Black or African American moe,3361.08,700.91,2385.98,1041.52,915.63,858.9,1915.42,1986.32,1480.36,1240.83,1599.97
Black or African American moa percent,0.03,0.1,0.04,0.1,0.08,0.16,0.08,0.11,0.07,0.08,0.06
American Indian and Alaska Native counts,10718.6,2612.54,2721.61,2624.15,2722.3,2197.94,3729.09,2668.43,3195.37,2162.37,2815.59
American Indian and Alaska Native counts percent,0.28,0.07,0.07,0.07,0.07,0.06,0.1,0.07,0.08,0.06,0.07


### Hispanic or Latino Race

In [16]:
grouping = groups[3]
print(grouping)

current_labels = labels [labels.label.str.contains(grouping)]
current_labels = current_labels.iloc[1:]
# current_labels = current_labels.iloc[:-16]
labels_check = current_labels.column_name.str[:-1]

for titles in current_labels.label:
    print(titles)

HISPANIC OR LATINO AND RACE
Estimate!!HISPANIC OR LATINO AND RACE!!Total population!!Hispanic or Latino (of any race)
Estimate!!HISPANIC OR LATINO AND RACE!!Total population!!Hispanic or Latino (of any race)!!Mexican
Estimate!!HISPANIC OR LATINO AND RACE!!Total population!!Hispanic or Latino (of any race)!!Puerto Rican
Estimate!!HISPANIC OR LATINO AND RACE!!Total population!!Hispanic or Latino (of any race)!!Cuban
Estimate!!HISPANIC OR LATINO AND RACE!!Total population!!Hispanic or Latino (of any race)!!Other Hispanic or Latino
Estimate!!HISPANIC OR LATINO AND RACE!!Total population!!Not Hispanic or Latino
Estimate!!HISPANIC OR LATINO AND RACE!!Total population!!Not Hispanic or Latino!!White alone
Estimate!!HISPANIC OR LATINO AND RACE!!Total population!!Not Hispanic or Latino!!Black or African American alone
Estimate!!HISPANIC OR LATINO AND RACE!!Total population!!Not Hispanic or Latino!!American Indian and Alaska Native alone
Estimate!!HISPANIC OR LATINO AND RACE!!Total population!!No

In [17]:
final4 = cal_district_numbers(labels_check)
final4

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
Hispanic or Latino (of any race) counts,264506.88,93544.34,82778.29,116404.14,108722.19,115811.24,120243.64,96942.67,80812.27,65148.44,67082.9
Hispanic or Latino (of any race) counts percent,0.22,0.08,0.07,0.1,0.09,0.1,0.1,0.08,0.07,0.05,0.06
Hispanic or Latino (of any race) moe,6952.41,3574.49,3846.32,3847.78,3948.13,4166.89,5489.76,3484.99,3243.88,3939.98,3096.7
Hispanic or Latino (of any race) moa percent,0.02,0.02,0.03,0.02,0.02,0.02,0.03,0.02,0.02,0.04,0.03
Hispanic or Latino (of any race)!!Mexican counts,223862.62,82935.34,71315.48,102771.06,92908.81,106012.01,101433.19,83720.03,67160.08,54654.58,54655.8
Hispanic or Latino (of any race)!!Mexican counts percent,0.21,0.08,0.07,0.1,0.09,0.1,0.1,0.08,0.06,0.05,0.05
Hispanic or Latino (of any race)!!Mexican moe,6674.63,3439.15,3783.61,3709.41,3578.53,4160.57,5093.05,3335.86,3084.04,3904.78,2912.41
Hispanic or Latino (of any race)!!Mexican moa percent,0.02,0.03,0.03,0.02,0.02,0.02,0.03,0.02,0.03,0.04,0.03
Hispanic or Latino (of any race)!!Puerto Rican counts,10342.35,1243.15,2799.97,1008.99,1068.42,408.17,3356.06,1972.53,2147.8,2996.63,3291.93
Hispanic or Latino (of any race)!!Puerto Rican counts percent,0.34,0.04,0.09,0.03,0.03,0.01,0.11,0.06,0.07,0.1,0.11


### Housing Units

In [29]:
grouping = groups[4]
print(grouping)
print()

current_labels = labels [labels.label.str.contains(grouping)]
# current_labels = current_labels.iloc[:-16]
labels_check = current_labels.column_name.str[:-1]

for titles in current_labels.label:
    print(titles)

Total housing units

Estimate!!Total housing units


In [19]:
final5 = cal_district_numbers(labels_check)
final5

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
Total housing units counts,188661.64,57123.02,57254.27,53818.39,43705.62,45540.96,61319.53,61352.55,77166.25,69283.6,62837.16
Total housing units counts percent,0.24,0.07,0.07,0.07,0.06,0.06,0.08,0.08,0.1,0.09,0.08
Total housing units moe,2501.64,1317.08,1360.16,1189.09,939.71,1163.11,1410.2,1362.88,1474.12,1445.48,1184.3
Total housing units moa percent,0.01,0.01,0.01,0.01,0.01,0.02,0.01,0.01,0.01,0.01,0.01


### Citizen, Voting Age

In [20]:
grouping = groups[5]
print(grouping)

current_labels = labels [labels.label.str.contains(grouping)]
current_labels = current_labels.iloc[:-2]
labels_check = current_labels.column_name.str[:-1]

for titles in current_labels.label:
    print(titles)

CITIZEN, VOTING AGE POPULATION
Estimate!!CITIZEN, VOTING AGE POPULATION!!Citizen, 18 and over population


In [21]:
final6 = cal_district_numbers(labels_check)
final6

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
"Citizen, 18 and over population counts",365619.97,83863.01,93394.9,93487.42,81995.24,75691.26,113452.6,103135.74,118668.7,112022.4,107375.75
"Citizen, 18 and over population counts percent",0.27,0.06,0.07,0.07,0.06,0.06,0.08,0.08,0.09,0.08,0.08
"Citizen, 18 and over population moe",6535.02,2665.77,3123.29,2839.13,2569.17,2925.69,3633.71,2973.91,2967.35,3042.5,2552.95
"Citizen, 18 and over population moa percent",0.01,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.01


In [28]:
with pd.ExcelWriter('sa2020_ccd_demo.xlsx') as writer:
    final0.to_excel(writer, sheet_name='Total Population')
    final1.to_excel(writer, sheet_name=groups[0])
    final2.to_excel(writer, sheet_name=groups[1])
    final3.to_excel(writer, sheet_name=groups[2][:30])
    final4.to_excel(writer, sheet_name=groups[3])
    final5.to_excel(writer, sheet_name=groups[4])
    final6.to_excel(writer, sheet_name=groups[5])