In [1]:
#read in our libraries
import requests

import pandas as pd
import json

In [2]:
my_url = "https://www.nycgovparks.org/tree-map-feeds/eco-benefits.json"

In [3]:
#make our get request to our url
#this is a big file and may take a while 
r = requests.get(my_url)

In [4]:
#take the content of the response we got to our request
#in this case, it's a text file in the json format
response = r.content

In [5]:
#use Python's built-in json library
#and read the response content into a Python dictionary
my_dict = json.loads(response)

In [6]:
#just look at the keys of the dict (think of them as column headings)
my_dict.keys()

[u'meta', u'data']

In [7]:
#The "meta" key doesn't contain much
my_dict['meta']

{u'copyright': u'Copyright - NYC Parks',
 u'lastBuildDate': u'2017-12-13 05:12:01'}

In [8]:
#the bulk of our data is in the 'data' key of the dictionary
my_trees = my_dict['data']

In [9]:
#we can create a dataframe from a dict, too!
#it uses the keys as column headings
#and the values as the data in each row
cleanup = pd.DataFrame.from_dict(my_trees)

In [10]:
#let's just see the columns in our dataframe
cleanup.columns

Index([u'airPollutantsRemovedInPounds', u'airPollutantsRemovedValueInDollars',
       u'co2ReducedInPounds', u'co2ReducedValueInDollars',
       u'energyConservedInKwH', u'energyConservedValueInDollars',
       u'stormwaterInterceptedInGallons',
       u'stormwaterInterceptedValueInDollars', u'treeId'],
      dtype='object')

In [11]:
#this tells us more about how many values are missing in each column
cleanup.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 713101 entries, 0 to 713100
Data columns (total 9 columns):
airPollutantsRemovedInPounds           694743 non-null object
airPollutantsRemovedValueInDollars     713101 non-null object
co2ReducedInPounds                     694743 non-null object
co2ReducedValueInDollars               713101 non-null object
energyConservedInKwH                   694743 non-null object
energyConservedValueInDollars          713101 non-null object
stormwaterInterceptedInGallons         694743 non-null object
stormwaterInterceptedValueInDollars    713101 non-null object
treeId                                 713101 non-null int64
dtypes: int64(1), object(8)
memory usage: 49.0+ MB


In [12]:
#take 2 mins each
#what's the total dollar value of the CO2 reduced by these trees?
#how many gallons of stormwater were intercepted?
#what's the average air pollutants removed in pounds?

In [13]:
#we have to convert our string to a number before we can sum the column
cleanup['stormwaterInterceptedInGallons'] = pd.to_numeric(cleanup.stormwaterInterceptedInGallons)
cleanup.stormwaterInterceptedInGallons.sum()

1138310796.8256257

In [14]:
#now you try the other two

In [15]:
cleanup['co2ReducedValueInDollars'] = pd.to_numeric(cleanup.co2ReducedValueInDollars)
cleanup.co2ReducedValueInDollars.sum()

4363826.5399999991

In [16]:
#we can also use the mean (aka average) function
cleanup['airPollutantsRemovedInPounds'] = pd.to_numeric(cleanup.airPollutantsRemovedInPounds)
cleanup.airPollutantsRemovedInPounds.mean()

1.9167700853437903

In [17]:
cleanup.head()

Unnamed: 0,airPollutantsRemovedInPounds,airPollutantsRemovedValueInDollars,co2ReducedInPounds,co2ReducedValueInDollars,energyConservedInKwH,energyConservedValueInDollars,stormwaterInterceptedInGallons,stormwaterInterceptedValueInDollars,treeId
0,0.766546,4.01,321.5,1.07,484.73993509,61.2,615.520881,6.09,2134468
1,3.943624,20.62,6841.3,22.85,2244.94094152,283.41,4924.167049,48.75,2405572
2,1.049399,5.49,354.8,1.19,623.13596225,78.67,948.377667,9.39,3366678
3,1.133836,5.93,411.8,1.38,533.84216917,67.39,808.366479,8.0,3727278
4,4.949813,25.88,5554.2,18.55,2279.52229978,287.78,4593.951984,45.48,228115


In [18]:
#now that we've converted our json to an easier format to analyze
#we can dump our dataframe to a csv with pandas and save it for later
cleanup.to_csv('trees.csv')