In [None]:
import pandas as pd
import numpy as np
import plotly.express as px

Project Prompt:

In this module we’ll be looking at data from the New York City tree census:
https://data.cityofnewyork.us/Environment/2015-Street-Tree-Census-Tree-Data/uvpi-gqnh
This data is collected by volunteers across the city, and is meant to catalog information
about every single tree in the city.

Build a dash app for a arborist studying the health of various tree species (as defined by the
variable ‘spc_common’) across each borough (defined by the variable ‘borough’). This
arborist would like to answer the following two questions for each species and in each
borough:

1. What proportion of trees are in good, fair, or poor health according to the ‘health’ variable?

2. Are stewards (steward activity measured by the ‘steward’ variable) having an impact on the health of trees?

Please see the accompanying notebook for an introduction and some notes on the Socrata
API.

Using the instructor's base code on Socrata and getting around the API limits, I pull in all of the data but by borough below, and then combine into one.

In [None]:
# Bronx 
soql_url_bronx = ('https://data.cityofnewyork.us/resource/nwxe-4ae8.json?' +\
        '$select=health, steward, spc_common, count(tree_id)' +\
        '&$where=boroname=\'Bronx\'' +\
        '&$group=steward, health, spc_common').replace(' ', '%20')
bronx = pd.read_json(soql_url_bronx)

bronx.head(10)

# Brooklyn
soql_url_brooklyn = ('https://data.cityofnewyork.us/resource/nwxe-4ae8.json?' +\
        '$select=health, steward, spc_common, count(tree_id)' +\
        '&$where=boroname=\'Bronx\'' +\
        '&$group=steward, health, spc_common').replace(' ', '%20')
brooklyn = pd.read_json(soql_url_brooklyn)

# Manhattan
soql_url_manhattan = ('https://data.cityofnewyork.us/resource/nwxe-4ae8.json?' +\
        '$select=health, steward, spc_common, count(tree_id)' +\
        '&$where=boroname=\'Bronx\'' +\
        '&$group=steward, health, spc_common').replace(' ', '%20')
manhattan = pd.read_json(soql_url_manhattan)

# Staten Island
soql_url_staten = ('https://data.cityofnewyork.us/resource/nwxe-4ae8.json?' +\
        '$select=health, steward, spc_common, count(tree_id)' +\
        '&$where=boroname=\'Bronx\'' +\
        '&$group=steward, health, spc_common').replace(' ', '%20')
staten = pd.read_json(soql_url_staten)

# Queens
soql_url_queens = ('https://data.cityofnewyork.us/resource/nwxe-4ae8.json?' +\
        '$select=health, steward, spc_common, count(tree_id)' +\
        '&$where=boroname=\'Bronx\'' +\
        '&$group=steward, health, spc_common').replace(' ', '%20')
queens = pd.read_json(soql_url_queens)

# create a 'boro' variable and set correct label before merge
bronx["boro"] = "bronx"
brooklyn["boro"] = "brooklyn"
manhattan["boro"] = "manhattan"
staten["boro"] = "staten island"
queens["boro"] = "queens"

# merge all boroughs together
boroughs = [bronx, brooklyn, manhattan, staten, queens]
trees = pd.concat(boroughs)

The data below will allow us to plot any differences in the steward variable with regard to the trees health, by borough and species. First lets remove any rows that have an NaN as we won't be able to graph those. 

In [122]:
trees.dropna(axis = 0, how = 'any', inplace = True)

trees.head(10)

Unnamed: 0,health,steward,spc_common,count_tree_id,boro
0,Poor,,white ash,20,bronx
2,Good,4orMore,flowering dogwood,2,bronx
3,Poor,3or4,littleleaf linden,3,bronx
4,Fair,,red horse chestnut,2,bronx
5,Fair,3or4,honeylocust,3,bronx
6,Good,,northern red oak,1000,bronx
7,Good,,quaking aspen,3,bronx
8,Fair,1or2,Persian ironwood,3,bronx
9,Good,,bald cypress,34,bronx
10,Good,1or2,southern red oak,4,bronx


Lets make a base graph of whaa we want before we work on getting it in Dash. This one shows tree health, eventually when there are two drop downs the user will be able to filter by borough and species.

In [126]:
# on the app they will filter by boro
# and by species if they want

graph_1 = px.bar(data_frame = trees,
    x = 'health',
    y = 'count_tree_id')

graph_1.show()

And this second graph will have the addition of the stewards, to show if their presense has made a difference in health. Again, with dropdowns for borough and tree species.

In [123]:
# on the app they will filter by boro
# and by species if they want

graph_2 = px.bar(data_frame = trees,
    x = 'steward',
    y = 'count_tree_id',
    color = 'health')

graph_2.show()


Time to try and recreate this in Dash. Based heavily off the code provided here: https://plotly.com/python/bar-charts/

In [116]:
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import plotly.express as px

# so it will run in jupyter notebook in VS Code for me
from jupyter_dash import JupyterDash

Dash app for tree health by borough and tree species.
TODO add the second dropdown for tree species

In [118]:
df = trees
# make a list of each boro name to use in the 'option' line down below
boros = df.boro.unique()

# must user JupyterDash so it displays in this notebook instead of usual dash.Dash
app = JupyterDash(__name__)

app.layout = html.Div([
    dcc.Dropdown(
        id="boro_dropdown",
        options=[{"label": x, "value": x} for x in boros],
        # set default value
        value=boros[0],
        clearable=False,
    ),
    dcc.Graph(id="bar-chart"),
])

@app.callback(
    Output("bar-chart", "figure"), 
    [Input("boro_dropdown", "value")])
def update_bar_chart(boro):
    mask = df["boro"] == boro
    fig = px.bar(df[mask], x="health", y="count_tree_id")
    return fig

# had to add mode = 'inline' to run in jupyter notebook
app.run_server(debug=True, mode = 'inline')

Dash app for showing how the stewards may be affecting tree health, with dropdowns for borough and tree species.

TODO add second dropdown for tree species

In [128]:
df = trees
# make a list of each boro name to use in the 'option' line down below
boros = df.boro.unique()

# must user JupyterDash so it displays in this notebook instead of usual dash.Dash
app = JupyterDash(__name__)

app.layout = html.Div([
    dcc.Dropdown(
        id="boro_dropdown",
        options=[{"label": x, "value": x} for x in boros],
        # set default value
        value=boros[0],
        clearable=False,
    ),
    dcc.Graph(id="bar-chart"),
])

@app.callback(
    Output("bar-chart", "figure"), 
    [Input("boro_dropdown", "value")])
def update_bar_chart(boro):
    mask = df["boro"] == boro
    fig2 = px.bar(df[mask], x="steward", y="count_tree_id", color="health")
    return fig2

# had to add mode = 'inline' to run in jupyter notebook
app.run_server(debug=True, mode = 'inline')