In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import altair as alt
import os
import sys

sys.path.append("website")

import website.altair_visualization as alt_vis
from website.field_confs import density_cuts, density_labels, fields_to_selectors
# from website.selectors import *

In [2]:
%load_ext autoreload
%autoreload 2

# Data Transformations
Data Transformations for Final Vis
 - change temp to farenheit
 - add bins to density variable

In [4]:
!ls data_sources

FIPS 5 County Codes.twbx   combined_data.csv
README.md                  data_with_distance.csv
[1m[36madditional_sources[m[m         final_transformed_data.csv


In [5]:
county = pd.read_csv("data_sources/combined_data.csv")
county.columns

Index(['county', 'county_ascii', 'county_fips', 'state_id', 'state_name',
       'city_largest', 'city_largest_id', 'lat', 'lng', 'population',
       'density', 'timezone', 'timezone_all', 'age_median', 'age_under_10',
       'age_10_to_19', 'age_20s', 'age_30s', 'age_40s', 'age_50s', 'age_60s',
       'age_70s', 'age_over_80', 'male', 'female', 'married', 'divorced',
       'never_married', 'widowed', 'family_size', 'family_dual_income',
       'income_household_median', 'income_household_under_5',
       'income_household_5_to_10', 'income_household_10_to_15',
       'income_household_15_to_20', 'income_household_20_to_25',
       'income_household_25_to_35', 'income_household_35_to_50',
       'income_household_50_to_75', 'income_household_75_to_100',
       'income_household_100_to_150', 'income_household_150_over',
       'income_household_six_figure', 'income_individual_median',
       'home_ownership', 'home_value', 'rent_median', 'rent_burden',
       'education_less_highschoo

In [6]:
county["id"] = county.county_fips
county["low_temp"] = (county["0.05_percentile_high"] * 1.8) + 32
county["high_temp"] = (county["0.95_percentile_high"] * 1.8) + 32
county["precip_num_days"] = county.num_precip_days_greater_1mm
county.sort_values("county_fips")

Unnamed: 0,county,county_ascii,county_fips,state_id,state_name,city_largest,city_largest_id,lat,lng,population,...,0.75_percentile_high,0.95_percentile_high,min_high,max_high,crime_rate_per_100000_x,crime_rate_per_100000_y,id,low_temp,high_temp,precip_num_days
1097,Baldwin,Baldwin,1003,AL,Alabama,Daphne,1.840006e+09,30.7275,-87.7226,212830.0,...,30.6,32.20,5.9,36.7,228.086325,228.086325,1003,55.94,89.96,312.0
1371,Barbour,Barbour,1005,AL,Alabama,Eufaula,1.840002e+09,31.8696,-85.3932,25361.0,...,27.2,29.40,3.3,36.1,177.278771,177.278771,1005,53.06,84.92,96.0
1138,Bibb,Bibb,1007,AL,Alabama,Brent,1.840014e+09,32.9986,-87.1265,22493.0,...,31.1,33.90,5.6,36.7,217.661691,217.661691,1007,53.96,93.02,366.0
1169,Blount,Blount,1009,AL,Alabama,Oneonta,1.840005e+09,33.9809,-86.5674,57681.0,...,27.8,30.00,0.6,33.3,210.810064,210.810064,1009,48.92,86.00,364.0
216,Bullock,Bullock,1011,AL,Alabama,Union Springs,1.840004e+09,32.1005,-85.7157,10248.0,...,30.6,33.30,3.3,36.1,526.365260,526.365260,1011,53.96,91.94,362.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1361,Teton,Teton,56039,WY,Wyoming,Jackson,1.840021e+09,43.9347,-110.5898,23280.0,...,20.3,25.35,-14.6,35.0,179.629962,179.629962,56039,21.92,77.63,258.0
2356,Uinta,Uinta,56041,WY,Wyoming,Evanston,1.840020e+09,41.2876,-110.5476,20479.0,...,25.0,28.60,-13.3,33.9,33.228900,33.228900,56041,25.61,83.48,292.0
2357,Uinta,Uinta,56041,WY,Wyoming,Evanston,1.840020e+09,41.2876,-110.5476,20479.0,...,25.0,30.60,-16.7,35.6,33.228900,33.228900,56041,21.02,87.08,236.0
2198,Washakie,Washakie,56043,WY,Wyoming,Worland,1.840021e+09,43.9050,-107.6827,8027.0,...,27.5,31.95,-13.4,40.0,59.080704,59.080704,56043,27.59,89.51,319.0


In [7]:
# density cuts = 0-20, 20-50, 50-100, 100-1000, 1000+
# county[county.density > 1000]
county["density_group"] = pd.cut(county.density, 
                                 bins=density_cuts, 
                                 labels=density_labels).astype(str)

In [8]:
county[["density", "density_group"]]

Unnamed: 0,density,density_group
0,30.79,20-50
1,10.27,0-20
2,0.15,0-20
3,2904.98,1000+
4,473.49,100-1000
...,...,...
2556,3.94,0-20
2557,0.70,0-20
2558,14.27,0-20
2559,1.48,0-20


In [9]:
# save out any transformations
county.to_csv("website/static/data_sources/final_transformed_data.csv", index=False)

# visualization

In [10]:
data_url = "website/static/data_sources/final_transformed_data.csv"

In [61]:
d

{'config': {'view': {'continuousWidth': 400, 'continuousHeight': 300}},
 'layer': [{'data': {'url': 'https://vega.github.io/vega-datasets/data/us-10m.json',
    'format': {'feature': 'counties', 'type': 'topojson'}},
   'mark': {'type': 'geoshape', 'stroke': 'black', 'strokeWidth': 0.1},
   'encoding': {'color': {'type': 'quantitative',
     'field': 'matchPct',
     'scale': {'domain': [0, 1]}},
    'tooltip': [{'type': 'nominal', 'field': 'county'},
     {'type': 'nominal', 'field': 'state_id'},
     {'type': 'quantitative', 'field': 'low_temp'},
     {'type': 'quantitative', 'field': 'high_temp'},
     {'type': 'quantitative', 'field': 'precip_num_days'},
     {'type': 'ordinal', 'field': 'density_group'},
     {'type': 'quantitative', 'field': 'income_individual_median'}]},
   'height': 500,
   'projection': {'type': 'albersUsa'},
   'selection': {'income': {'type': 'single',
     'fields': ['individual_median'],
     'bind': {'input': 'range', 'max': 100000, 'min': 5000, 'step': 5

In [68]:
d = alt_vis.country_view(data_url, reference_county="Fremont", reference_state="WY")
alt.Chart.from_dict(d)

In [21]:
type(alt.Chart.from_dict(d))

altair.vegalite.v4.api.LayerChart

In [80]:
d = alt_vis.state_view(data_url, reference_county="San Diego", reference_state="CA")
alt.Chart.from_dict(d)

<class 'altair.vegalite.v4.api.Chart'>


## Sliders

In [None]:
# from vega_datasets import data

# counties_boundaries = alt.topo_feature(data.us_10m.url, 'counties')
# state_boundaries = alt.topo_feature(data.us_10m.url, 'states')

# def create_slider(name, min_val, max_val, step, field_inits):
#     slider = alt.binding_range(min=min_val, max=max_val, step=step)
#     return alt.selection_single(name=name, fields=[f[0] for f in field_inits],
#                                 bind=slider, init=dict(field_inits))

# def create_dropdown(name, options, field_inits):
#     dropdown = alt.binding_select(options=options)
#     return alt.selection_single(name=name, 
#                                 fields=[f[0] for f in field_inits], 
#                                 bind=dropdown, init=dict(field_inits))

# temp_selector = create_slider("temperature", -30, 60, 0.1, [("low_temp", 10), ("high_temp", 90)])
# precip_selector = create_slider("precipitation", 0, 365, 1, [("num_days", 100)])
# education_selector = create_slider("education", 0, 100, 1, [("college_or_above", 20)])
# income_selector = create_slider("income", 5000, 100000, 5000, [("individual_median", 25000)])
# density_selector = create_dropdown("density", 
#                                    density_labels, 
#                                    [("group", "100-1000")])



In [None]:
# temp_selector.name
# temp_selector.selection.fields
# str(temp_selector.high_temp)

## Outline States

In [None]:
# outline = alt.Chart(state_boundaries).mark_geoshape(stroke='black', fillOpacity=0).project(
#     type='albersUsa'
# ).properties(
#     width=1000,
#     height=600
# )

## heatmap version

In [None]:
# FIELD_TYPE = "field_type"
# COMPARATOR = "comparator"
# ALIAS = "alias"
# SELECTOR = "selector"
# SELECTOR_FIELD = "field"

# comparators = {
#     "g": ">",
#     "ge": ">=",
#     "l": "<",
#     "le": "<=",
#     "eq": "=="
# }

# # this contains a superset of fields to selectors, should only need to append, not delete
# fields_to_selectors = {
#     "low_temp": {SELECTOR: temp_selector, SELECTOR_FIELD: "low_temp"},
#     "high_temp": {SELECTOR: temp_selector, SELECTOR_FIELD: "high_temp"},
#     "num_precip_days": {SELECTOR: precip_selector, SELECTOR_FIELD: "num_days"},
#     "density_group": {SELECTOR: density_selector, SELECTOR_FIELD: "group"},
#     "income_individual_median": {SELECTOR: income_selector, SELECTOR_FIELD: "individual_median"}
# }

# # this conf is what determines which fields get added to the selection criteria and plotted in the detailed view
# # comment out the ones that you dont want to appear in the vis
# fields_conf = {
#     "low_temp": {
#         FIELD_TYPE: "Q",
#         COMPARATOR: "ge",
#         ALIAS: "Coldest Daily High Temperature"
#     },
#     "high_temp": {
#         FIELD_TYPE: "Q",
#         COMPARATOR: "le",
#         ALIAS: "Hotest Daily High Temperature"
#     },
#     "num_precip_days": {
#         FIELD_TYPE: "Q",
#         COMPARATOR: "le",
#         ALIAS: "Number of Days with Precipitation"
#     },
#     "density_group": { # we should probably change this to a drop down of low/medium/high
#         FIELD_TYPE: "O",
#         COMPARATOR: "eq",
#         ALIAS: "Minimum Population Density"
#     },
#     "income_individual_median": {
#         FIELD_TYPE: "Q",
#         COMPARATOR: "ge",
#         ALIAS: "Median Individual Income"
#     }
# }

In [None]:
# fields_to_selectors["density_group"]

In [None]:
# def create_match_pct(fields_conf, selectors):
#     n_fields = len(fields_conf)
#     formula = []
#     for f, conf in fields_conf.items():
#         selector = f"{selectors.get(f).get(SELECTOR).name}.{selectors.get(f).get(SELECTOR_FIELD)}"
#         formula.append("(datum.{f} {comp} {selector})".format(f=f, 
#                                                               comp=comparators.get(conf.get(COMPARATOR)), 
#                                                               selector=selector))
#     added = "(" + " + ".join(formula) + ")"
#     return added + f" / {n_fields}"

# def build_chart(fields_conf, selectors, width=800, height=500):
#     base_chart = alt.Chart(
#         counties_boundaries
#     ).mark_geoshape(
#         stroke='black',
#         strokeWidth=0.1
#     ).transform_lookup(
#         lookup='id',
#         from_=alt.LookupData("data_sources/final_transformed_data.csv", 'id', ['county', 'state_id'] + list(fields_conf.keys()))
#     ).project(
#         type='albersUsa'
#     ).encode(
#         color="matchPct:O",
#         tooltip=["county:N", "state_id:N"] + [f"{f}:{conf.get(FIELD_TYPE)}" for f, conf in fields_conf.items()],
#     )
    
#     selectors = set([selectors.get(f).get(SELECTOR) for f in fields_conf.keys()])
#     for s in selectors: 
#         base_chart = base_chart.add_selection(s)
    
#     base_chart = base_chart.transform_calculate(
#         matchPct = create_match_pct(fields_conf, fields_to_selectors)
#     ).properties(
#         width=width,
#         height=height
#     )

#     return base_chart

In [None]:
# create_match_pct(fields_conf, fields_to_selectors)

In [None]:
# country_view = alt.layer(build_chart(fields_conf, fields_to_selectors, 800, 500), outline)
# country_view

In [None]:
# (country_view.to_json(indent=2))

In [None]:
# # country_view.save('chart_assets/country_view.html', embed_options={'renderer':'svg'})
# country_view.save('chart_assets/country_view.json')

## Detailed State View

In [None]:
# from functools import reduce

# # add state selection 
# state_dropdown = alt.binding_select(options=sorted(county.state_id.dropna().unique().tolist()))
# state_selector = alt.selection_single(name="state", fields=['state_id'], bind=state_dropdown, init={"state_id": "CA"})

# state_specific = alt.layer(
#     build_chart(fields_conf, fields_to_selectors, 800, 500).add_selection(
#         state_selector
#     ).transform_filter(
#         state_selector
#     ), outline.transform_filter(
#         state_selector
#     ))

# bars = [alt.vconcat(), alt.vconcat()]
# n_fields = len(fields_conf)
# for i, field in enumerate(fields_conf.keys()):
#     field_bars = alt.Chart("data_sources/final_transformed_data.csv").mark_bar(tooltip=True).encode(
#         x=f"{field}:{fields_conf.get(field).get(FIELD_TYPE)}",
#         y="county:N",
#         color="county:N"
#     ).add_selection(
#         state_selector
#     ).transform_filter(
#         state_selector
#     ).transform_window(
#         rank='rank(county)',
#         sort=[alt.SortField("matchPct", order="descending"), alt.SortField('county', order='ascending')]
#     ).transform_filter(
#         alt.datum.rank <= 10
#     )
                  
#     bars[i%2] &= field_bars


# state_view = (state_specific & (bars[0] | bars[1])).resolve_scale(
#     color='independent'
# )
# state_view

In [None]:
# state_view.save('chart_assets/state_view.html')
# state_view.save('chart_assets/state_view.json')

In [None]:
# import altair as alt

# def create_slider(name, min_val, max_val, step, field_inits):
#     slider = alt.binding_range(min=min_val, max=max_val, step=step)
#     return alt.selection_single(name=name, fields=[f[0] for f in field_inits],
#                                 bind=slider, init=dict(field_inits))

# def create_dropdown(name, options, field_inits):
#     dropdown = alt.binding_select(options=options)
#     return alt.selection_single(name=name, 
#                                 fields=[f[0] for f in field_inits], 
#                                 bind=dropdown, init=dict(field_inits))

# class Selectors:
# 	temp_selector = create_slider("temperature", -30, 60, 0.1, [("low_temp", 10), ("high_temp", 90)])
# 	precip_selector = create_slider("precipitation", 0, 365, 1, [("num_days", 100)])
# 	education_selector = create_slider("education", 0, 100, 1, [("college_or_above", 20)])
# 	income_selector = create_slider("income", 5000, 100000, 5000, [("individual_median", 25000)])
# 	density_selector = create_dropdown("density", 
# 	                                   density_labels, 
# 	                                   [("group", "100-1000")])



## DEPRECATED Binary Version

In [None]:
# base_chart = alt.Chart(counties_boundaries
# ).mark_geoshape(
#     stroke='black',
#     strokeWidth=0.1
# ).transform_lookup(
#     lookup='id',
#     from_=alt.LookupData(county, 'id', ['county', 'state_id', 'population', 'low_temp', 'high_temp', 'crime_rate', "education_college_or_above", "income_household_median"])
# ).project(
#     type='albersUsa'
# ).encode(
#     color=alt.condition(
#         (alt.datum.low_temp >= temp_selector.low_temp) & (alt.datum.high_temp <= temp_selector.high_temp) &
#         (alt.datum.education_college_or_above >= education_selector.college_or_above) & (alt.datum.income_household_median >= income_selector.household_median),
#         alt.value('steelblue'), alt.value('lightgray')),
#     tooltip=["county:N", "low_temp:Q", "high_temp:Q", "education_college_or_above:Q", "income_household_median:Q"]
# ).add_selection(
#     temp_selector
# ).add_selection(
#     education_selector
# ).add_selection(
#    income_selector
# ).properties(
#     width=1000,
#     height=600
# )

# alt.layer(base_chart, outline)