Build a dash app for a arborist studying the health of various tree species (as defined by the variable ‘spc_common’) across each borough (defined by the variable ‘borough’). This arborist would like to answer the following two questions for each species and in each borough:

1. What proportion of trees are in good, fair, or poor health according to the ‘health’ variable?

2. Are stewards (steward activity measured by the ‘steward’ variable) having an impact on the health of trees?

In [1]:
import pandas as pd

In [2]:
url = 'https://data.cityofnewyork.us/resource/nwxe-4ae8.json'
df = pd.read_json(url)
df.head(10)
print(df.shape)

(1000, 45)


In [3]:
df.columns

Index(['address', 'bbl', 'bin', 'block_id', 'boro_ct', 'borocode', 'boroname',
       'brch_light', 'brch_other', 'brch_shoe', 'cb_num', 'census_tract',
       'cncldist', 'council_district', 'created_at', 'curb_loc', 'guards',
       'health', 'latitude', 'longitude', 'nta', 'nta_name', 'problems',
       'root_grate', 'root_other', 'root_stone', 'sidewalk', 'spc_common',
       'spc_latin', 'st_assem', 'st_senate', 'state', 'status', 'steward',
       'stump_diam', 'tree_dbh', 'tree_id', 'trnk_light', 'trnk_other',
       'trunk_wire', 'user_type', 'x_sp', 'y_sp', 'zip_city', 'zipcode'],
      dtype='object')

In [4]:
df['health'].unique()

array(['Fair', 'Good', 'Poor', nan], dtype=object)

In [5]:
df['steward'].unique()

array(['None', '1or2', '3or4', nan, '4orMore'], dtype=object)

In [6]:
df['spc_common'].unique()

array(['red maple', 'pin oak', 'honeylocust', 'American linden',
       'London planetree', 'ginkgo', 'willow oak', 'sycamore maple',
       'Amur maple', nan, 'hedge maple', 'American elm', 'ash',
       'crab apple', 'silver maple', 'Turkish hazelnut', 'black cherry',
       'eastern redcedar', 'Norway maple', 'tulip-poplar', 'sawtooth oak',
       'swamp white oak', 'Sophora', 'Chinese fringetree',
       'southern magnolia', 'sweetgum', 'Callery pear', 'scarlet oak',
       'Atlantic white cedar', 'black oak', 'Japanese zelkova',
       'white oak', 'Ohio buckeye', 'northern red oak', 'silver linden',
       'pignut hickory', 'Kentucky yellowwood', 'mulberry', 'Douglas-fir',
       'crepe myrtle'], dtype=object)

In [7]:
df = df[['borocode', 'boroname', 'tree_id', 'steward', 'health', 'spc_common']]

In [8]:
spc_health = df.groupby(['spc_common', 'health'])[['tree_id']].count()
spc_health.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,tree_id
spc_common,health,Unnamed: 2_level_1
American elm,Fair,3
American elm,Good,4
American elm,Poor,2
American linden,Fair,4
American linden,Good,30
American linden,Poor,5
Amur maple,Fair,3
Amur maple,Good,3
Atlantic white cedar,Good,1
Callery pear,Fair,7


In [9]:
steward_health = df.groupby(['steward', 'health'])[['tree_id']].count()
steward_health.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,tree_id
steward,health,Unnamed: 2_level_1
1or2,Fair,50
1or2,Good,234
1or2,Poor,11
3or4,Fair,7
3or4,Good,46
3or4,Poor,1
4orMore,Good,2
,Fair,126
,Good,454
,Poor,42


---

In [None]:
from sodapy import Socrata

client = Socrata('data.cityofnewyork.us', None)
results = client.get('nwxe-4ae8', limit=683788)



In [None]:
results = pd.DataFrame.from_records(results)
print(results.shape)

In [None]:
client.close()