In [103]:
import pandas as pd
import numpy as np

# Dash related libraries
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import plotly.graph_objs as go
import plotly as plotly
from jupyter_plotly_dash import JupyterDash



## NYC OpenData - 2015 Street Tree Census - Tree Data

Street tree data from the TreesCount! 2015 Street Tree Census, conducted by volunteers and staff organized by NYC Parks & Recreation and partner organizations. Tree data collected includes tree species, diameter and perception of health. Accompanying blockface data is available indicating status of data collection and data release citywide.

This data is collected by volunteers across the city, and is meant to catalog information about every single tree in the city. Data will be accessed via the socrata API. The main site for the data is [here](https://data.cityofnewyork.us/Environment/2015-Street-Tree-Census-Tree-Data/uvpi-gqnh), and on the upper right hand side the link to the API is available.

[Here](https://data.cityofnewyork.us/api/views/uvpi-gqnh/files/8705bfd6-993c-40c5-8620-0c81191c7e25?download=true&filename=StreetTreeCensus2015TreesDataDictionary20161102.pdf) is the link to the data dictionary.

### Assignment Description:
Build a dash app for a arborist studying the health of various tree species (as defined by the variable ‘spc_common’) across each borough (defined by the variable ‘borough’). This arborist would like to answer the following two questions for each species and in each borough:
1. What proportion of trees are in good, fair, or poor health according to the ‘health’ variable?
2. Are stewards (steward activity measured by the ‘steward’ variable) having an impact on the health of trees?

In [104]:
url = 'https://data.cityofnewyork.us/resource/uvpi-gqnh.json'
trees = pd.read_json(url)
trees.head(10)

Unnamed: 0,tree_id,block_id,created_at,tree_dbh,stump_diam,curb_loc,status,health,spc_latin,spc_common,...,boro_ct,state,latitude,longitude,x_sp,y_sp,council_district,census_tract,bin,bbl
0,180683,348711,2015-08-27,3,0,OnCurb,Alive,Fair,Acer rubrum,red maple,...,4073900,New York,40.723092,-73.844215,1027431.0,202756.7687,29.0,739.0,4052307.0,4022210000.0
1,200540,315986,2015-09-03,21,0,OnCurb,Alive,Fair,Quercus palustris,pin oak,...,4097300,New York,40.794111,-73.818679,1034456.0,228644.8374,19.0,973.0,4101931.0,4044750000.0
2,204026,218365,2015-09-05,3,0,OnCurb,Alive,Good,Gleditsia triacanthos var. inermis,honeylocust,...,3044900,New York,40.717581,-73.936608,1001823.0,200716.8913,34.0,449.0,3338310.0,3028870000.0
3,204337,217969,2015-09-05,10,0,OnCurb,Alive,Good,Gleditsia triacanthos var. inermis,honeylocust,...,3044900,New York,40.713537,-73.934456,1002420.0,199244.2531,34.0,449.0,3338342.0,3029250000.0
4,189565,223043,2015-08-30,21,0,OnCurb,Alive,Good,Tilia americana,American linden,...,3016500,New York,40.666778,-73.975979,990913.8,182202.426,39.0,165.0,3025654.0,3010850000.0
5,190422,106099,2015-08-30,11,0,OnCurb,Alive,Good,Gleditsia triacanthos var. inermis,honeylocust,...,1014500,New York,40.770046,-73.98495,988418.7,219825.5227,3.0,145.0,1076229.0,1011310000.0
6,190426,106099,2015-08-30,11,0,OnCurb,Alive,Good,Gleditsia triacanthos var. inermis,honeylocust,...,1014500,New York,40.77021,-73.985338,988311.2,219885.2785,3.0,145.0,1076229.0,1011310000.0
7,208649,103940,2015-09-07,9,0,OnCurb,Alive,Good,Tilia americana,American linden,...,1012700,New York,40.762724,-73.987297,987769.1,217157.8561,3.0,133.0,1086093.0,1010410000.0
8,209610,407443,2015-09-08,6,0,OnCurb,Alive,Good,Gleditsia triacanthos var. inermis,honeylocust,...,5006400,New York,40.596579,-74.076255,963073.2,156635.5542,,,,
9,192755,207508,2015-08-31,21,0,OffsetFromCurb,Alive,Fair,Platanus x acerifolia,London planetree,...,3037402,New York,40.586357,-73.969744,992653.7,152903.6306,47.0,37402.0,3320727.0,3072350000.0


## Data Preparation

I have used Socrata API [here](https://dev.socrata.com/docs/paging.html) and SoSQL to select only the appropriate attributes required for answering the questions as part of this assignment and also to overcome the paging limit of the API as per the documentation.  

### Trees Health Proportion:
Using below query, I have aggregated data by Borough, Species, Steward and Health variable to analyze the health proportion of trees by the 'health' variable. Also replaced the 'NaN' values in the health variable with 'Fair' and replaced 'Nan's in the steward with 'None' as part of the data imputation process.

In [105]:
# Aggregate the tree count by Borough, Species and Health condition 
soql_url = ('https://data.cityofnewyork.us/resource/uvpi-gqnh.json?$limit=50000&' +\
        '$select=boroname,spc_common,health,steward,count(tree_id)' +\
        '&$group=boroname,spc_common,health,steward' +\
        '&$order=boroname,spc_common,health,steward').replace(' ', '%20')
trees_summary = pd.read_json(soql_url)
trees_summary['health'] = trees_summary['health'].fillna('Fair')
trees_summary['steward'] = trees_summary['steward'].fillna('None')
trees_summary


Unnamed: 0,boroname,spc_common,health,steward,count_tree_id
0,Bronx,American beech,Fair,1or2,3
1,Bronx,American beech,Fair,3or4,1
2,Bronx,American beech,Fair,,3
3,Bronx,American beech,Good,1or2,2
4,Bronx,American beech,Good,3or4,1
...,...,...,...,...,...
4560,Staten Island,willow oak,Good,3or4,4
4561,Staten Island,willow oak,Good,,177
4562,Staten Island,willow oak,Poor,1or2,2
4563,Staten Island,willow oak,Poor,,9


## Dash Interactive Application

I have used JupyterDash module from jupyter_plotly_dash library to render the dash application within Jupyter notebook itself.

The code for the layout, Callback and appropriate function definitions are captured below - 

In [111]:
app = JupyterDash('HealthProportion')

boroughs = trees_summary['boroname'].unique()
species = trees_summary['spc_common'].unique()
stewards = trees['steward'].unique()

app.layout = html.Div([html.H1(children = 'NYC Street Tree Health'),
    html.P('These graphics display the overall health of trees along city streets in NYC.'),
    html.H2(children = 'NYC Street Tree Health Proportion By Borough & Species'),
    html.P('Please select a Borough and a Species: '),
    html.Div([
        dcc.Dropdown(
            id='boro-selector',
            options=[{'label': i, 'value': i} for i in boroughs],
            value='Bronx'
        ),
        dcc.Dropdown(
            id='species-selector',
            options=[{'label': i, 'value': i} for i in species],
            value='American beech'
        )
    ],style={'width': '50%', 'display': 'inline-block', 'padding':0}),
    html.Div([html.P(id='param-select')]),
    html.Div([
        dcc.Graph(id='output-health-proportion')
    ]),
    html.H2(children = 'NYC Street Tree Health By Stewardship'),
    html.Div([
        dcc.RadioItems(
            id='steward-selector',
            options=[{'label': i, 'value': i} for i in stewards],
            value='None'
        )]),
    html.Div([
        dcc.Graph(id='output-steward-hist')
    ])   
    
])

@app.callback(
    dash.dependencies.Output('param-select', 'children'),
    [dash.dependencies.Input('boro-selector', 'value'),
    dash.dependencies.Input('species-selector', 'value')])
def update_output(boroname, species):
    return u'Borough :"{}", Speceies :"{}"'.format(boroname, species)

@app.callback(
    dash.dependencies.Output('output-health-proportion', 'figure'),
    [dash.dependencies.Input('boro-selector', 'value'),
    dash.dependencies.Input('species-selector', 'value')])
def update_graph(boroname, species):
    dff = trees_summary[(trees_summary['boroname'] == boroname) & (trees_summary['spc_common'] == species)]
    return {
           'data': [go.Pie(labels = dff['health'],values = dff['count_tree_id'], name='HealthProportion')],
           'layout': [go.Layout(title = 'Health Proportion: "{}" Borough & "{}" Species'.format(boroname,species),
                            margin={'l': 100, 'b': 200, 't': 10, 'r': 10},
                            height=1000,
                            hovermode='closest')]
            }


@app.callback(
    dash.dependencies.Output('output-steward-hist', 'figure'),
    [dash.dependencies.Input('boro-selector', 'value'),
    dash.dependencies.Input('species-selector', 'value'),
    dash.dependencies.Input('steward-selector', 'value')])
def steward_graph(boroname, species, steward):
    df = trees_summary[(trees_summary['boroname'] == boroname) & 
                       (trees_summary['spc_common'] == species) & 
                       (trees_summary['steward'] == steward)]
    
    return {
            'data':[go.Bar(name=steward, x=df['health'],y=df['count_tree_id'])],
            'layout':{'title':"Health by Stewardship"}
            }
            
app
