# Examining Bloomington Census Populations

This file shows my work examining the Census Bureau Groups (CBG) in Monroe County, IN.

It incorporates weekly place data from SafeGraph, which describes how many residents (i.e. devices) reside in a specific CBG at a given point in time.

I wanted to see the different in population between a week in the Summer and a week in the Winter when school is in session.  I arbitrarily chose *07/22/2019* and *02/03/2020* as the weeks to use for comparison.

This also uses [GeoPandas](https://geopandas.org/) and census shape files to build the maps of the county and each CBG.

In [None]:
# EXAMPLES: http://andrewgaidus.com/Dot_Density_County_Maps/

import geojson
import pandas as pd
import altair as alt
import pprint
import json
import geopandas as gpd
import matplotlib.pyplot as plt

In [None]:
# Get count of devices residing in each CBG on 07/22/2019
devices_residing07222019 = pd.read_csv('home_panel_data/2019-07-22-home-panel-summary.csv')

devices_residing07222019 = devices_residing07222019.rename(columns={'number_devices_residing': 'devices07222019'})
devices_residing07222019 = devices_residing07222019.drop(['date_range_start', 'date_range_end', 'state'], axis=1)

devices_residing07222019.tail()

In [None]:
# Get count of devices residing in each CBG on 02/03/2020
devices_residing02032020 = pd.read_csv('home_panel_data/2020-02-03-home-panel-summary.csv')

devices_residing02032020 = devices_residing02032020.rename(columns={'number_devices_residing': 'devices02032020'})
devices_residing02032020 = devices_residing02032020.drop(['date_range_start', 'date_range_end', 'state'], axis=1)

devices_residing02032020.tail()

In [None]:
# Create a list of unique Census Block Groups for Bloomington, IN
bton_cbgs = pd.read_csv('cbgs_data/bloomington_cbgs.csv')
bton_cbgs.drop_duplicates('census_block_group', keep=False, inplace=True)
bton_cbgs.count

In [None]:
# Merge dataframes so we have one dataframe that includes summer and winter population, population change, and percent change
filtered_2019_df = pd.merge(bton_cbgs, devices_residing07222019, on=['census_block_group'])
filtered_2020_df = pd.merge(devices_residing02032020, bton_cbgs, how='inner')

bton_devices_residing = pd.merge(filtered_2019_df, filtered_2020_df, how='left', on='census_block_group')

bton_devices_residing['pop_change'] = bton_devices_residing['devices02032020'] - bton_devices_residing['devices07222019']
bton_devices_residing['pop_pct_change'] = (bton_devices_residing['pop_change'] / bton_devices_residing['devices07222019'])*100

bton_devices_residing

In [None]:
# Chart the difference in population in an Altair scatter plot
base_chart = alt.Chart(bton_devices_residing).mark_circle(size=60).encode(
    alt.X('devices07222019',
         axis=alt.Axis(title='Number of Resident Devices (Summer)')),
    alt.Y('pop_pct_change:Q',
        scale=alt.Scale(
            domain=(0, 500),
            clamp=True
        ),
        axis=alt.Axis(
         title='Percent Population Change (Winter)'
        )
    ),
    color=alt.Color('pop_change:Q',
                    legend=alt.Legend(title='Percent Population Change')
                   ),
    tooltip=['pop_pct_change']
).interactive().properties(width=800)

base_chart

In [None]:
band_df = pd.DataFrame([{'x_min':0, 'x_max':260, 'y_min':0, 'y_max':100}])
band = alt.Chart(band_df).mark_rect(color='firebrick', opacity=0.3).encode(
    x='x_min:Q',
    x2='x_max:Q',
    y='y_min:Q',
    y2='y_max:Q'
)
alt.layer(base_chart, band)

In [None]:
alt.Chart(bton_devices_residing).mark_bar().encode(
    alt.X("census_block_group:N", axis=None),
    alt.Y("pop_pct_change:Q",
        axis=alt.Axis(
         title='Percent Population Change (Summer to Winter)'
        )
    ),
    color=alt.Color("pop_pct_change:N", legend=None)
).properties(width=800)

In [None]:
# Create a dataframe that only contains CBGs where the population increased by more than 100% from summer to winter
bton_transient_df = bton_devices_residing[bton_devices_residing['pop_pct_change'] > 100]
bton_transient_df

In [None]:
bton_transient_df.to_csv('cbgs_data/bton_transient_cbgs.csv')
bton_transient_df.shape

In [None]:
# Create a dataframe that only contains CBGs where the population increased by more than 100% from summer to winter
bton_static_df = bton_devices_residing[bton_devices_residing['pop_pct_change'] <= 100]
bton_static_df

In [None]:
bton_static_df.to_csv('cbgs_data/bton_static_cbgs.csv')

I now have two dataframes: one that includes the more transient census block groups (e.g. 'students') and one that is more static (e.g. year-round residents.)

# Maps of Monroe County, IN
I'd like to create maps of Monroe County that show the percent change in population between summer (July 2019) and winter (February 2020)

In [None]:
# https://medium.com/dataexplorations/creating-choropleth-maps-in-altair-eeb7085779a1
# Source https://www.census.gov/cgi-bin/geo/shapefiles/index.php?year=2019&layergroup=Block+Groups
gdf = gpd.read_file('census_indiana_shapefile/tl_2019_18_bg.shp')
gdf.head()

In [None]:
# Filter for only CBGs in Monroe County ('015')
gdf = gdf[gdf.COUNTYFP=='105']
gdf.head()

In [None]:
gdf.shape

In [None]:
gdf.dtypes

In [None]:
gdf.plot()

In [None]:
# Need to convert the GEOID field in the geo dataframe to int64 so we can merge it with the other dataframe
gdf.GEOID = gdf.GEOID.astype(int)

In [None]:
# Merge the bton_devices_residing dataframe with the geography dataframe
gdf_merged = gdf.merge(bton_devices_residing, left_on='GEOID', right_on='census_block_group')
gdf_merged.head()
gdf_merged

In [None]:
# Add a column that shows if the row is a "transient" or "static" CBG
gdf_merged['cbg_type'] = ['Transient' if x>100 else 'Static' for x in gdf_merged['pop_pct_change']]
gdf_merged.tail()

In [None]:
# Now build a choropleth of the geographic data colored by percentage of population change from summer to winter
gdf_merged.plot(column='pop_pct_change')

In [None]:
# And a choropleth of the different types of CBGs (transient vs. static)
#plt.figure(figsize=(6,3))
gdf_merged.plot(column='cbg_type',figsize=(12,15))

In [None]:
# Build the choropleth in Altair
# Data for base map will have all CBGs (even those that have missing values)
base_choro_json = json.loads(gdf.to_json())
base_choro_data = alt.Data(values=base_choro_json['features'])

# Data for choropleth layer will only include CBGs where data is found
data_choro_json = json.loads(gdf_merged.to_json())
data_choro_data = alt.Data(values=data_choro_json['features'])

In [None]:
# Create Base Layer
base = alt.Chart(base_choro_data, title = 'Monroe County Census Bureau Groups').mark_geoshape(
    stroke='black',
    strokeWidth=1
).encode(
    tooltip='properties.GEOID:N'
).properties(
    width=800,
    height=800
)

base

In [None]:
#Add Choropleth layer
choro = alt.Chart(data_choro_data).mark_geoshape(

).encode(
    alt.Color('properties.pop_pct_change', 
              type='quantitative', 
              scale=alt.Scale(scheme='viridis'),
              title = "Percentage Population Change"),
    tooltip='properties.pop_pct_change:Q'
)

base + choro

In [None]:
# That choropleth sucks, so let's build one that only shows transient vs. static CBGs
choro2 = alt.Chart(data_choro_data).mark_geoshape(

).encode(
    alt.Color('properties.cbg_type:N', 
              title = "CBG Type")
)

choro2