# Census Data for San Antonio Districts

In [1]:
import pandas as pd
import numpy as np
import re

from ccd_functions import read_distict, cal_counts, cal_district_numbers

## District Data

This sheet tells which district each census tract aligns with and what percentage of the district is included. District 0 are census tracts outside of San Antonio.

In [2]:
dist = read_distict()
dist.head()

Unnamed: 0,council,tract,percent
0,1.0,192000,0.221309
1,1.0,191900,4.9e-05
2,1.0,192300,0.185373
3,1.0,192100,0.572719
4,1.0,191304,0.786238


## Census Data

This is all the data pulled from the census. 

In [3]:
filename = 'ACSDP5Y2021.DP05-Data.csv'
filename2 = 'ACSDP5Y2021.DP05-Column-Metadata.csv'

In [4]:
dataall = pd.read_csv(filename)
dataall.columns = dataall.columns.str.lower()

dataall = dataall.iloc[1:].reset_index(drop=True)
# dataall = dataall.replace('(X)', np.nan).replace('-',np.nan).replace('**',np.nan)
# dataall = dataall.dropna(axis=1, how='all')

dataall.geo_id = dataall.geo_id.str[-6:]
dataall.geo_id = dataall.geo_id.astype(str)
dataall = dataall.drop(columns='name')

dataall.head()

Unnamed: 0,geo_id,dp05_0001e,dp05_0001ea,dp05_0001m,dp05_0001ma,dp05_0002e,dp05_0002m,dp05_0002ma,dp05_0002ea,dp05_0003e,...,dp05_0087pea,dp05_0088pe,dp05_0088pm,dp05_0088pma,dp05_0088pea,dp05_0089pe,dp05_0089pm,dp05_0089pma,dp05_0089pea,unnamed: 714
0,110100,2934,,565,,1698,365,,,1236,...,,53.2,8.7,,,46.8,8.7,,,
1,110300,2930,,652,,1444,456,,,1486,...,,49.8,6.8,,,50.2,6.8,,,
2,110500,2201,,309,,1030,198,,,1171,...,,39.2,6.8,,,60.8,6.8,,,
3,110600,5384,,1620,,4117,1585,,,1267,...,,77.6,8.5,,,22.4,8.5,,,
4,110700,982,,246,,525,148,,,457,...,,52.8,6.9,,,47.2,6.9,,,


## Labels

These are the labels for the census data.

In [5]:
labels = pd.read_csv(filename2)
labels.columns = labels.columns.str.lower().str.replace('\W+','_', regex=True)
labels.column_name = labels.column_name.str.lower()

labels = labels [labels.label.str.startswith('Estimate')]
labels = labels [~labels.label.str.contains('ratio')]

groups = labels.label.str.split('!', expand=True)[2].unique()
groups

array(['SEX AND AGE', 'RACE',
       'Race alone or in combination with one or more other races',
       'HISPANIC OR LATINO AND RACE', 'Total housing units',
       'CITIZEN, VOTING AGE POPULATION'], dtype=object)

## Now combine and calculate! 

### Sex and Age

In [6]:
grouping = groups[0]
print(grouping)
print()

current_labels = labels [labels.label.str.contains(grouping)]
# current_labels = current_labels.iloc[1:]
current_labels = current_labels.iloc[:-13]
labels_check = current_labels.column_name.str[:-1]

for titles in current_labels.label:
    print(titles)
    
final1 = cal_district_numbers(dist, dataall, labels, labels_check, 'DP05_0001E')
final1

SEX AND AGE

Estimate!!SEX AND AGE!!Total population
Estimate!!SEX AND AGE!!Total population!!Male
Estimate!!SEX AND AGE!!Total population!!Female
Estimate!!SEX AND AGE!!Total population!!Under 5 years
Estimate!!SEX AND AGE!!Total population!!5 to 9 years
Estimate!!SEX AND AGE!!Total population!!10 to 14 years
Estimate!!SEX AND AGE!!Total population!!15 to 19 years
Estimate!!SEX AND AGE!!Total population!!20 to 24 years
Estimate!!SEX AND AGE!!Total population!!25 to 34 years
Estimate!!SEX AND AGE!!Total population!!35 to 44 years
Estimate!!SEX AND AGE!!Total population!!45 to 54 years
Estimate!!SEX AND AGE!!Total population!!55 to 59 years
Estimate!!SEX AND AGE!!Total population!!60 to 64 years
Estimate!!SEX AND AGE!!Total population!!65 to 74 years
Estimate!!SEX AND AGE!!Total population!!75 to 84 years
Estimate!!SEX AND AGE!!Total population!!85 years and over


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
Total population counts,484420.83,137461.90,130625.49,140848.63,132734.78,141064.66,129776.89,152535.62,145193.17,120314.07,141125.95
Total population counts percent,0.26,0.07,0.07,0.08,0.07,0.08,0.07,0.08,0.08,0.06,0.08
Total population moe,8608.46,3922.65,4282.35,4040.20,4164.93,4677.30,4401.06,4572.60,3805.53,3340.46,3831.35
Total population moa percent,0.01,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02
Male counts,242448.52,66884.38,64660.66,70788.79,65588.17,71440.58,63237.06,74315.74,73678.74,58915.46,67451.91
...,...,...,...,...,...,...,...,...,...,...,...
75 to 84 years moa percent,0.04,0.07,0.06,0.06,0.08,0.05,0.09,0.05,0.07,0.08,0.07
85 years and over counts,5047.90,2856.40,1327.99,1891.75,1401.79,2604.57,1117.18,3337.94,1986.00,1436.80,2626.68
85 years and over counts percent,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
85 years and over moe,676.09,427.42,208.76,355.48,288.01,422.32,286.34,642.64,454.51,276.44,457.30


### Race

In [7]:
grouping = groups[1]
print(grouping)

current_labels = labels [labels.label.str.contains(grouping)]
# current_labels = current_labels.iloc[1:]
current_labels = current_labels.iloc[:-16]
labels_check = current_labels.column_name.str[:-1]

for titles in current_labels.label:
    print(titles)
    
final2 = cal_district_numbers(dist, dataall, labels, labels_check, 'DP05_0033E')    
final2

RACE
Estimate!!RACE!!Total population
Estimate!!RACE!!Total population!!One race
Estimate!!RACE!!Total population!!Two or more races
Estimate!!RACE!!Total population!!One race
Estimate!!RACE!!Total population!!One race!!White
Estimate!!RACE!!Total population!!One race!!Black or African American
Estimate!!RACE!!Total population!!One race!!American Indian and Alaska Native
Estimate!!RACE!!Total population!!One race!!American Indian and Alaska Native!!Cherokee tribal grouping
Estimate!!RACE!!Total population!!One race!!American Indian and Alaska Native!!Chippewa tribal grouping
Estimate!!RACE!!Total population!!One race!!American Indian and Alaska Native!!Navajo tribal grouping
Estimate!!RACE!!Total population!!One race!!American Indian and Alaska Native!!Sioux tribal grouping
Estimate!!RACE!!Total population!!One race!!Asian
Estimate!!RACE!!Total population!!One race!!Asian!!Asian Indian
Estimate!!RACE!!Total population!!One race!!Asian!!Chinese
Estimate!!RACE!!Total population!!One race

  error_perc = round(math.sqrt(sum((subset.tru_error / 1.645)**2)) / count, 2)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
Total population counts,484420.83,137461.90,130625.49,140848.63,132734.78,141064.66,129776.89,152535.62,145193.17,120314.07,141125.95
Total population counts percent,0.26,0.07,0.07,0.08,0.07,0.08,0.07,0.08,0.08,0.06,0.08
Total population moe,8608.46,3922.65,4282.35,4040.20,4164.93,4677.30,4401.06,4572.60,3805.53,3340.46,3831.35
Total population moa percent,0.01,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02
One race counts,411420.97,106806.23,111762.96,120457.55,113582.90,110427.12,101370.31,123298.08,123151.80,102051.69,121739.40
...,...,...,...,...,...,...,...,...,...,...,...
Two or more races!!White and Asian moa percent,0.10,0.26,0.30,0.41,0.32,3.19,0.20,0.20,0.19,0.20,0.20
Two or more races!!Black or African American and American Indian and Alaska Native counts,481.89,26.98,97.77,107.49,53.30,15.02,158.23,40.86,61.11,184.74,25.64
Two or more races!!Black or African American and American Indian and Alaska Native counts percent,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
Two or more races!!Black or African American and American Indian and Alaska Native moe,237.17,84.34,92.06,142.15,88.30,91.53,152.46,88.51,102.89,164.56,83.57


### More Race

In [8]:
grouping = groups[2]
print(grouping)
print()

current_labels = labels [labels.label.str.contains(grouping)]
labels_check = current_labels.column_name.str[:-1]

for titles in current_labels.label:
    print(titles)
    
final3 = cal_district_numbers(dist, dataall, labels, labels_check, 'DP05_0063E')
final3

Race alone or in combination with one or more other races

Estimate!!Race alone or in combination with one or more other races!!Total population
Estimate!!Race alone or in combination with one or more other races!!Total population!!White
Estimate!!Race alone or in combination with one or more other races!!Total population!!Black or African American
Estimate!!Race alone or in combination with one or more other races!!Total population!!American Indian and Alaska Native
Estimate!!Race alone or in combination with one or more other races!!Total population!!Asian
Estimate!!Race alone or in combination with one or more other races!!Total population!!Native Hawaiian and Other Pacific Islander
Estimate!!Race alone or in combination with one or more other races!!Total population!!Some other race


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
Total population counts,484420.83,137461.9,130625.49,140848.63,132734.78,141064.66,129776.89,152535.62,145193.17,120314.07,141125.95
Total population counts percent,0.26,0.07,0.07,0.08,0.07,0.08,0.07,0.08,0.08,0.06,0.08
Total population moe,8608.46,3922.65,4282.35,4040.2,4164.93,4677.3,4401.06,4572.6,3805.53,3340.46,3831.35
Total population moa percent,0.01,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02
White counts,380492.05,119145.64,84315.21,109724.27,105008.08,123211.69,110177.31,126566.09,110726.58,101038.56,112853.54
White counts percent,0.21,0.06,0.05,0.06,0.06,0.07,0.06,0.07,0.06,0.05,0.06
White moe,8303.87,3852.14,3887.29,3803.39,4207.41,4575.62,4415.6,4196.36,3579.12,3154.69,3682.97
White moa percent,0.01,0.02,0.03,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02
Black or African American counts,59084.61,5721.92,30883.94,6660.39,7427.99,3832.17,10791.83,10956.99,12039.31,8175.25,15308.58
Black or African American counts percent,0.03,0.0,0.02,0.0,0.0,0.0,0.01,0.01,0.01,0.0,0.01


### Hispanic or Latino Race

In [9]:
grouping = groups[3]
print(grouping)
print()

current_labels = labels [labels.label.str.contains(grouping)]
current_labels = current_labels.iloc[1:]
current_labels = current_labels.iloc[:-10]
labels_check = current_labels.column_name.str[:-1]

for titles in current_labels.label:
    print(titles)
    
final4 = cal_district_numbers(dist, dataall, labels, labels_check, 'DP05_0071E')
final4

HISPANIC OR LATINO AND RACE

Estimate!!HISPANIC OR LATINO AND RACE!!Total population!!Hispanic or Latino (of any race)
Estimate!!HISPANIC OR LATINO AND RACE!!Total population!!Hispanic or Latino (of any race)!!Mexican
Estimate!!HISPANIC OR LATINO AND RACE!!Total population!!Hispanic or Latino (of any race)!!Puerto Rican
Estimate!!HISPANIC OR LATINO AND RACE!!Total population!!Hispanic or Latino (of any race)!!Cuban
Estimate!!HISPANIC OR LATINO AND RACE!!Total population!!Hispanic or Latino (of any race)!!Other Hispanic or Latino


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
Hispanic or Latino (of any race) counts,242281.66,98380.56,76262.95,116383.92,108774.86,130520.94,95530.87,96771.5,70080.04,48006.2,64506.51
Hispanic or Latino (of any race) counts percent,0.21,0.09,0.07,0.1,0.09,0.11,0.08,0.08,0.06,0.04,0.06
Hispanic or Latino (of any race) moe,6635.74,3624.54,3701.85,3847.16,3947.03,4417.7,4209.16,3532.91,3046.34,2641.81,3070.53
Hispanic or Latino (of any race) moa percent,0.02,0.02,0.03,0.02,0.02,0.02,0.03,0.02,0.03,0.03,0.03
Hispanic or Latino (of any race)!!Mexican counts,206026.31,86565.97,65659.69,102754.84,92940.85,119340.58,79478.8,82942.13,58478.93,39064.04,52413.86
Hispanic or Latino (of any race)!!Mexican counts percent,0.18,0.08,0.06,0.09,0.08,0.1,0.07,0.07,0.05,0.03,0.05
Hispanic or Latino (of any race)!!Mexican moe,6389.43,3490.41,3624.84,3708.79,3576.29,4406.67,3726.32,3319.73,2944.43,2591.16,2884.23
Hispanic or Latino (of any race)!!Mexican moa percent,0.02,0.02,0.03,0.02,0.02,0.02,0.03,0.02,0.03,0.04,0.03
Hispanic or Latino (of any race)!!Puerto Rican counts,9417.96,1504.9,2449.52,1009.12,1073.74,419.18,2717.08,2130.5,1836.71,2613.73,3173.55
Hispanic or Latino (of any race)!!Puerto Rican counts percent,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Not Hispanic or Latino Race

In [10]:
grouping = groups[3]
print(grouping)
print()

current_labels = labels [labels.label.str.contains(grouping)]
current_labels = current_labels.iloc[6:]
labels_check = current_labels.column_name.str[:-1]

for titles in current_labels.label:
    print(titles)
    
final5 = cal_district_numbers(dist, dataall, labels, labels_check, 'DP05_0076E')
final5

HISPANIC OR LATINO AND RACE

Estimate!!HISPANIC OR LATINO AND RACE!!Total population!!Not Hispanic or Latino
Estimate!!HISPANIC OR LATINO AND RACE!!Total population!!Not Hispanic or Latino!!White alone
Estimate!!HISPANIC OR LATINO AND RACE!!Total population!!Not Hispanic or Latino!!Black or African American alone
Estimate!!HISPANIC OR LATINO AND RACE!!Total population!!Not Hispanic or Latino!!American Indian and Alaska Native alone
Estimate!!HISPANIC OR LATINO AND RACE!!Total population!!Not Hispanic or Latino!!Asian alone
Estimate!!HISPANIC OR LATINO AND RACE!!Total population!!Not Hispanic or Latino!!Native Hawaiian and Other Pacific Islander alone
Estimate!!HISPANIC OR LATINO AND RACE!!Total population!!Not Hispanic or Latino!!Some other race alone
Estimate!!HISPANIC OR LATINO AND RACE!!Total population!!Not Hispanic or Latino!!Two or more races
Estimate!!HISPANIC OR LATINO AND RACE!!Total population!!Not Hispanic or Latino!!Two or more races!!Two races including Some other race
Est

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
Not Hispanic or Latino counts,242139.17,39081.34,54362.54,24464.72,23959.92,10543.72,34246.02,55764.11,75113.14,72307.87,76619.45
Not Hispanic or Latino counts percent,0.34,0.06,0.08,0.03,0.03,0.01,0.05,0.08,0.11,0.1,0.11
Not Hispanic or Latino moe,6194.83,1828.1,2756.55,1850.89,1795.1,1602.03,2090.39,3225.0,2886.6,2332.26,2756.43
Not Hispanic or Latino moa percent,0.02,0.03,0.03,0.05,0.05,0.09,0.04,0.04,0.02,0.02,0.02
Not Hispanic or Latino!!White alone counts,166617.1,30403.41,23186.23,16718.65,15109.73,6718.59,21576.29,38613.07,47112.23,56090.23,56118.48
Not Hispanic or Latino!!White alone counts percent,0.24,0.04,0.03,0.02,0.02,0.01,0.03,0.05,0.07,0.08,0.08
Not Hispanic or Latino!!White alone moe,5289.2,1559.04,1746.09,1478.45,1551.59,1029.34,1634.21,2502.69,2134.3,1945.8,2458.73
Not Hispanic or Latino!!White alone moa percent,0.02,0.03,0.05,0.05,0.06,0.09,0.05,0.04,0.03,0.02,0.03
Not Hispanic or Latino!!Black or African American alone counts,43874.37,4291.26,25784.93,5204.54,5703.62,2936.88,6430.54,8222.17,9133.46,5047.22,11714.02
Not Hispanic or Latino!!Black or African American alone counts percent,0.06,0.01,0.04,0.01,0.01,0.0,0.01,0.01,0.01,0.01,0.02


### Housing Units

In [11]:
grouping = groups[4]
print(grouping)
print()

current_labels = labels [labels.label.str.contains(grouping)]
labels_check = current_labels.column_name.str[:-1]

for titles in current_labels.label:
    print(titles)
    
final6 = cal_district_numbers(dist, dataall, labels, labels_check, 'DP05_0086E')
final6

Total housing units

Estimate!!Total housing units


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
Total housing units counts,169004.87,62543.29,51190.97,53805.84,43728.79,51617.79,46648.37,63340.62,68283.71,54064.33,58664.41
Total housing units counts percent,0.23,0.09,0.07,0.07,0.06,0.07,0.06,0.09,0.09,0.07,0.08
Total housing units moe,2357.91,1418.24,1164.17,1189.01,939.73,1208.81,1182.5,1426.58,1404.72,1189.99,1151.38
Total housing units moa percent,0.01,0.01,0.01,0.01,0.01,0.01,0.02,0.01,0.01,0.01,0.01


### Citizen, Voting Age

In [12]:
grouping = groups[5]
print(grouping)
print()

current_labels = labels [labels.label.str.contains(grouping)]
labels_check = current_labels.column_name.str[:-1]

for titles in current_labels.label:
    print(titles)
    
final7 = cal_district_numbers(dist, dataall, labels, labels_check, 'DP05_0087E')
final7

CITIZEN, VOTING AGE POPULATION

Estimate!!CITIZEN, VOTING AGE POPULATION!!Citizen, 18 and over population
Estimate!!CITIZEN, VOTING AGE POPULATION!!Citizen, 18 and over population!!Male
Estimate!!CITIZEN, VOTING AGE POPULATION!!Citizen, 18 and over population!!Female


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
"Citizen, 18 and over population counts",329936.39,93199.18,84577.51,93468.52,82157.35,85127.68,88543.4,108491.83,103530.04,86383.81,101280.29
"Citizen, 18 and over population counts percent",0.26,0.07,0.07,0.07,0.07,0.07,0.07,0.09,0.08,0.07,0.08
"Citizen, 18 and over population moe",6243.25,2803.03,2903.5,2838.71,2570.36,3049.32,2838.2,3069.34,2833.1,2378.62,2518.59
"Citizen, 18 and over population moa percent",0.01,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02
Male counts,163395.52,43929.39,40479.47,45602.63,39127.92,41675.92,42727.34,52202.5,50858.14,41595.66,48791.5
Male counts percent,0.13,0.03,0.03,0.04,0.03,0.03,0.03,0.04,0.04,0.03,0.04
Male moe,3913.18,1670.49,1712.44,1826.92,1547.36,2205.46,1809.97,1966.68,1937.66,1430.0,1570.43
Male moa percent,0.01,0.02,0.03,0.02,0.02,0.03,0.03,0.02,0.02,0.02,0.02
Female counts,166540.87,49269.79,44098.04,47865.89,43029.43,43451.75,45816.06,56289.33,52671.91,44788.15,52488.79
Female counts percent,0.13,0.04,0.04,0.04,0.03,0.03,0.04,0.04,0.04,0.04,0.04


In [13]:
with pd.ExcelWriter('sa2020_ccd_demo_update.xlsx') as writer:
    final1.to_excel(writer, sheet_name=groups[0][:30])
    final2.to_excel(writer, sheet_name=groups[1][:30])
    final3.to_excel(writer, sheet_name=groups[2][:30])
    final4.to_excel(writer, sheet_name='HISPANIC OR LATINO')
    final5.to_excel(writer, sheet_name='NOT HISPANIC OR LATINO')
    final6.to_excel(writer, sheet_name=groups[4][:30])
    final7.to_excel(writer, sheet_name=groups[5][:30])