In [1]:
#importing packages
import pandas as pd
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt
import altair as alt
import seaborn as sns
from vega_datasets import data

In [2]:
#reading in relevant files
atlas_census_data = pd.read_csv("ERSAtlas_CensusData.csv")

In [3]:
#increasing display
pd.options.display.max_columns = None

In [None]:
# #getting state level information into df
# state_level = atlas_census_data.groupby(["State","food_desert_label", "region"]).aggregate({"MedianIncome":"median", "Walk": "mean", "TotalPop": "sum", "ChildPoverty": "mean", "Service": "mean", "Construction":"mean", "Hispanic":"sum", "Asian":"sum", "White":"sum", "Black":"sum", "Native":"sum", "Pacific":"sum"})
# state_level = state_level.reset_index()

In [30]:
#getting state level information into df
state_level = atlas_census_data.groupby(["State", "region", "food_desert_label"]).aggregate({"food_desert_label":"sum", "MedianIncome":"median", "Walk": "mean", "TotalPop": "sum", "ChildPoverty": "mean", "Service": "mean", "Construction":"mean", "Hispanic":"sum", "Asian":"sum", "White":"sum", "Black":"sum", "Native":"sum", "Pacific":"sum"})
state_level = state_level.rename(columns={"food_desert_label": "FoodDesert_Totals"})
state_level = state_level.reset_index()
state_level = state_level.rename(columns={"region": "Region"})

In [32]:
#getting region level information into df
region_level = atlas_census_data.groupby(["region","food_desert_label"]).aggregate({"food_desert_label":"sum", "MedianIncome":"median", "Walk": "mean", "TotalPop": "sum", "ChildPoverty": "mean", "Service": "mean", "Construction":"mean", "Hispanic":"sum", "Asian":"sum", "White":"sum", "Black":"sum", "Native":"sum", "Pacific":"sum"})
region_level = region_level.rename(columns={"food_desert_label": "FoodDesert_Totals", "region": "Region"})
region_level = region_level.reset_index()
region_level = region_level.rename(columns={"region": "Region"})

In [33]:
#getting vega dataset just for map element
state_pop = data.population_engineers_hurricanes()[['state', 'id', 'population']]
state_map = alt.topo_feature(data.us_10m.url, 'states')
state_pop = state_pop.rename(columns={'state':"State"})

#final state level data
final_state_level  = state_pop[["State", "id"]].merge(state_level, how="inner", on="State")

In [129]:
#adding click feature
click = alt.selection_multi(fields=['State'])

In [153]:
#scatter plot #1
yes_food_desert = alt.Chart(final_state_level
).transform_filter(
    alt.datum.food_desert_label == 1
).mark_point(filled = True, color = "red").encode(
    x=alt.X("MedianIncome:Q",  scale=alt.Scale(domain=[30000, 60000]),  axis=alt.Axis(labelAngle=-45, title="Median Income", tickCount=7)),
    y=alt.Y("Walk:Q", axis=alt.Axis(labelAngle=-45, title="Walk (mi)")),
    size=alt.Size("TotalPop:Q"),
    tooltip =alt.Tooltip(["State:N", "MedianIncome:Q", "FoodDesert_Totals:Q", "Region:N"]),
    opacity = alt.condition(click, alt.value(1), alt.value(0.1))
).properties(
    width = 400,
    height=400
).add_selection(click)

#reg line
yes_food_desert_regline = yes_food_desert.transform_regression("MedianIncome", "Walk").mark_line(color="red")

#final yes food desert plot
final_yes_fooddesert = yes_food_desert + yes_food_desert_regline


In [152]:
#scatter plot #2
no_food_desert = alt.Chart(final_state_level
).transform_filter(
    alt.datum.food_desert_label == 0
).mark_point(filled = True, color = "green").encode(
    x=alt.X("MedianIncome:Q", scale=alt.Scale(domain=[50000, 120000]),  axis=alt.Axis(labelAngle=-45, title="Median Income", tickCount=7)),
    y=alt.Y("Walk:Q", axis=alt.Axis(labelAngle=-45, title="Walk (mi)")),
    size=alt.Size("TotalPop:Q"),
    tooltip =alt.Tooltip(["State:N", "MedianIncome:Q", "Region:N"]),
    opacity = alt.condition(click, alt.value(1), alt.value(0.1))
).properties(
    width = 400,
    height=400
).add_selection(click)

#reg line
no_food_desert_regline = no_food_desert.transform_regression("MedianIncome", "Walk").mark_line(color="green")

#final yes food desert plot
final_no_fooddesert = no_food_desert + no_food_desert_regline

In [86]:
#putting scatter plots together
(final_yes_fooddesert|final_no_fooddesert)

In [154]:
combined_scatter = (yes_food_desert|no_food_desert)

In [162]:
#creating map
mini_map = (alt.Chart(state_map).mark_geoshape(color="#cbbeb5").transform_lookup(
    lookup = "id",
    from_=alt.LookupData(final_state_level, "id", ["State", "Region", "TotalPop", "ChildPoverty"])
).encode(
    opacity = alt.condition(click, alt.value(1), alt.value(0.1)),
    tooltip = alt.Tooltip(["State:N", "Region:N", "TotalPop:Q", "ChildPoverty:Q"])
).add_selection(click
).project(type='albersUsa')).properties(
    width = 250,
    height=250
)

In [126]:
# FILTER VERSION IS NOT WORKING
#used as reference: https://towardsdatascience.com/how-to-create-bindings-and-conditions-between-multiple-plots-using-altair-4e4fe907de37
# #create bar graph
# mini_bar = alt.Chart(final_state_level).transform_fold(
#     ["Hispanic", "White", "Black", "Native", "Asian", "Pacific"],
#     as_=["Race", "values"]
# ).mark_bar().encode(
#     y = alt.Y("Race:N"),
#     x=alt.X("values:Q", axis=alt.Axis(title="Count of Population", labelAngle=-45, tickCount=5)),
#     color=alt.Color("food_desert_label:N", scale=alt.Scale(range=["green", "red"]))
# ).transform_filter(click)



In [156]:
#CLICK VERSION OF BAR
#create bar graph
mini_bar_click = alt.Chart(final_state_level).transform_fold(
    ["Hispanic", "White", "Black", "Native", "Asian", "Pacific"],
    as_=["Race", "values"]
).mark_bar().encode(
    y = alt.Y("Race:N"),
    x=alt.X("values:Q", axis=alt.Axis(title="Count of Population", labelAngle=-45, tickCount=5)),
    color=alt.Color("food_desert_label:N", scale=alt.Scale(range=["green", "red"])),
    opacity = alt.condition(click, alt.value(1), alt.value(0.1))
).properties(
    width = 400,
    height=150
).add_selection(click)


In [163]:
#combining map, bar, and scatter plots
####ISSUES
#click does not work with the regression line
(mini_map|mini_bar_click)&combined_scatter

In [None]:
#########NEED TO DO
#add to streamlit
#write paragraphs
#manipulate population and other numerical data in pandas first ( format=",.2f")
#try to figure out reg line and filter instead of click
#fix colors

In [158]:
#try the two variations (child poverty or no)
##minimap child poverty
mini_map_cp = (alt.Chart(state_map).mark_geoshape().transform_lookup(
    lookup = "id",
    from_=alt.LookupData(final_state_level, "id", ["State", "Region", "TotalPop", "ChildPoverty"])
).encode(
    color=alt.Color("ChildPoverty:Q"),
    opacity = alt.condition(click, alt.value(1), alt.value(0.1)),
    tooltip = alt.Tooltip(["State:N", "Region:N", "TotalPop:Q", "ChildPoverty:Q"])
).add_selection(click
).project(type='albersUsa')).properties(
    width = 250,
    height=250
)

In [160]:
(mini_map_cp|mini_bar_click)&combined_scatter