forked from alanyuma/waedd-material-design
-
Notifications
You must be signed in to change notification settings - Fork 0
/
section_5.py
183 lines (152 loc) · 7.01 KB
/
section_5.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
"""
section_4.py
Written by: Aaron Finocchiaro
Gathers data for section 5 from the US Census Bureau and the Bureau of Labor Statistics
and fills a Jinja template to update the HTML document.
Usage:
python section_5.py
Adding more data:
- To add a new source (so a new place where data needs to be requested from), add it
to the data request section.
- To add a new data point to the context_dict to be referenced in the jinja template,
just add a section to this script to pull that data out from the dataframe and add
it to the context_dict. Be sure to follow the sections and formatting to keep it easy
to follow.
"""
import calendar
import datetime
import locale
import pandas as pd
from jinja2 import FileSystemLoader, Environment
from bls_data.bls import BlsData
from pyCensus.censusdata import censusData
#constants
locale.setlocale(locale.LC_ALL, '')
area_dict = {
# 'Kingman city, Arizona' : 2133,
# 'Lake Havasu City city, Arizona' : 2396,
# 'Bullhead City city, Arizona' : 2591,
'Yuma County, Arizona': 5519,
'La Paz County, Arizona': 4514
}
context_dict = dict()
def comma_separated(number:int) -> str:
"""
Custom Jinja filter to format numbers to be separated by commas.
Args:
- number; int passed from jinja template
returns str
"""
return f"{number:n}"
#####
#### Data Requests ####
#####
#request Census Bureau data
county_econ_data = censusData(
['acs','acs5','profile'],
2019,
{
'get': "group(DP03)",
'in' : "state:04",
'for': "county:027,012",
}
)
county_pop_data = censusData(
['acs','acs5','profile'],
2019,
{
'get': "NAME,DP05_0001E",
'in' : "state:04",
'for': "county:027,012",
}
)
#request BLS Data
bls_employment_data = BlsData(
['LAUCN040120000000003', 'LAUCN040270000000003', 'LASST040000000000003'],
datetime.datetime.today().date().year - 10,
datetime.datetime.today().date().year
)
#create a cleaned df for each to work with. Also append county dataframes to regular city/town data for dataframes from census
clean_acs_df = county_econ_data.clean_df()
clean_acs_df = clean_acs_df.set_index("NAME")
clean_county_acs_df = county_econ_data.clean_df()
clean_pop_df = county_pop_data.clean_df()
clean_pop_df = clean_pop_df.set_index("NAME")
clean_bls_employment_df = bls_employment_data.clean_df()
#add ACS survey year to context (mainly to show what year the data is pertenant to)
context_dict['acs_year'] = county_econ_data.year
#####
#### Employment and Unemployment ####
#####
#current emplyment data from Census
context_dict['employment'] = dict(clean_acs_df['Estimate!!EMPLOYMENT STATUS!!Population 16 years and over!!In labor force!!Civilian labor force!!Employed'])
#iterate bls dataframe columns to get specific Unemployment datapoints
for col in clean_bls_employment_df:
#Get current unemplyoment data. Adds a list of [month-year, unemployment_percentage] to context data.
current_valid = clean_bls_employment_df[col].last_valid_index()
context_dict[f"current_unemployment:{col}"] = [
f"{calendar.month_name[int(current_valid.split('-')[1])]} {current_valid.split('-')[0]}",
clean_bls_employment_df[col][current_valid]
]
#Find point in time with highest unemployment data. Adds a list of [month-year, unemployment_percentage] to context data.
max_idx = clean_bls_employment_df[col].idxmax()
context_dict[f"max_unemployment:{col}"] = [
f"{calendar.month_name[int(max_idx.split('-')[1])]} {max_idx.split('-')[0]}",
clean_bls_employment_df[col][max_idx]
]
#Difference between current unemployment for a region and the whole state of AZ. Adds a list of [unemployment_percentage, (higher|lower)]
#to context data.
state_diff = round(clean_bls_employment_df[col][current_valid] - clean_bls_employment_df['Arizona'][current_valid],2)
context_dict[f"current_unemployment_vs_AZ:{col}"] = [abs(state_diff), f"{'higher' if state_diff > 0 else 'lower'}"]
#Difference between peak unemployment and current. Adds a list of [unemployment_percentage, (higher|lower)]
#to context data.
peak_diff = round(clean_bls_employment_df[col][current_valid] - clean_bls_employment_df[col][max_idx],2)
context_dict[f"current_unemployment_vs_peak:{col}"] = [abs(peak_diff), f"{'higher' if peak_diff > 0 else 'lower'}"]
#Per-industry employment data. Make a df of just the industry percents, then iterate the cols and locate the
#top 3 percentages for each region. Add these to the context_dict with the industry names.
industry_df = clean_acs_df.filter(regex=r'^Percent!!INDUSTRY!!Civilian employed population 16 years and over!!.*')
industry_df = industry_df.rename(lambda x: x[len('Percent!!INDUSTRY!!Civilian employed population 16 years and over!!'):], axis=1)
industry_df = industry_df.apply(pd.to_numeric)
industry_df = industry_df.transpose()
for col in industry_df.columns:
top_3 = industry_df[col].nlargest(3)
context_dict[f"top-industries:{col}"] = dict(top_3)
#####
#### Income data ####
#####
#Per capita income
context_dict['per_capita_income'] = dict(clean_acs_df['Estimate!!INCOME AND BENEFITS (IN 2019 INFLATION-ADJUSTED DOLLARS)!!Per capita income (dollars)'])
#Households making between $15k and $50k per year for each region
pct_income_benefits = 'Percent!!INCOME AND BENEFITS (IN 2019 INFLATION-ADJUSTED DOLLARS)!!'
context_dict['pct_hh_between_15_50'] = dict(round(
pd.to_numeric(clean_acs_df[f"{pct_income_benefits}Total households!!$15,000 to $24,999"]) +
pd.to_numeric(clean_acs_df[f"{pct_income_benefits}Total households!!$25,000 to $34,999"]) +
pd.to_numeric(clean_acs_df[f"{pct_income_benefits}Total households!!$35,000 to $49,999"]),2))
#####
#### Population and household data ####
#####
#total households
total_households_df = clean_acs_df["Estimate!!INCOME AND BENEFITS (IN 2019 INFLATION-ADJUSTED DOLLARS)!!Total households"]
context_dict['total_households'] = dict(total_households_df)
#City Population
context_dict['population'] = dict(clean_pop_df['Estimate!!SEX AND AGE!!Total population'])
#population density
for region,area in area_dict.items():
density = round(pd.to_numeric(clean_pop_df['Estimate!!SEX AND AGE!!Total population'].loc[region]) / area,2)
context_dict[f"pop_density:{region}"] = density
#avg household size
context_dict['avg_hh_size'] = dict(
round(pd.to_numeric(clean_pop_df['Estimate!!SEX AND AGE!!Total population']) / pd.to_numeric(total_households_df),2)
)
#Poverty rate in last 12 months
context_dict['poverty_rate'] = dict(
clean_acs_df["Percent!!PERCENTAGE OF FAMILIES AND PEOPLE WHOSE INCOME IN THE PAST 12 MONTHS IS BELOW THE POVERTY LEVEL!!All people"]
)
#prepare Jinja template
file_loader = FileSystemLoader('templates')
env = Environment(loader=file_loader)
env.filters['comma_separated'] = comma_separated
template = env.get_template("workforce-development.html.jinja")
#open file and output a rendered jinja template
with open("workforce-development.html", 'w', encoding='utf-8') as output_html:
output_html.write(template.render(context_dict=context_dict))