# Pulling SF Tree Data
----

In [48]:
# Endpoint for requesting SF tree data: https://data.sfgov.org/resource/tkzw-k3nq.json
# Sample URL format for including an app token: https://data.seattle.gov/resource/3k2p-39jp.json?$$app_token=APP_TOKEN

In [49]:
# Dependencies
import json
import requests
import pandas as pd
import pprint as pp
from config import api_key
from config import app_token

In [50]:
#Set the base URL
baseURL = "https://data.sfgov.org/resource/tkzw-k3nq.json"

In [51]:
# Pull data
URL = baseURL + "?$limit=193000&$offset=0" + "&$$app_token=" + app_token
response = requests.get(URL).json()
print("Pull Complete")

Pull Complete


In [52]:
len(response)

192502

In [53]:
# Convert JSON into DF
response_df = pd.DataFrame(response)
response_df.head()

Unnamed: 0,treeid,qlegalstatus,qspecies,qaddress,siteorder,qsiteinfo,planttype,qcaretaker,dbh,plotsize,xcoord,ycoord,latitude,longitude,location,plantdate,permitnotes,qcareassistant
0,196949,DPW Maintained,Pyrus calleryana :: Ornamental Pear,9 Young Ct,1,Sidewalk: Curb side : Cutout,Tree,Private,16,Width 3ft,6016267.25355,2096084.36716,37.7363616200932,-122.38620200123,"{'type': 'Point', 'coordinates': [-122.3862020...",,,
1,203422,DPW Maintained,Acer rubrum :: Red Maple,9 Yerba Buena Ave,1,Sidewalk: Curb side : Yard,Tree,Private,2,Width 4ft,5993354.86667,2097295.22775,37.738391538344,-122.465506999949,"{'type': 'Point', 'coordinates': [-122.4655069...",,,
2,115737,Significant Tree,Acer rubrum :: Red Maple,9x Yerba Buena Ave,1,Sidewalk: Curb side : Cutout,Tree,Private,3,,5993642.27748,2097056.19499,37.7377517864641,-122.46449593033,"{'type': 'Point', 'coordinates': [-122.4644959...",2016-02-24T00:00:00.000,Permit Number 776557,
3,16472,DPW Maintained,Eucalyptus nicholii :: Nichol's Willow-Leafed ...,9X Newhall St,6,Sidewalk: Curb side : Cutout,Tree,DPW,18,3X3,6018697.2048701,2097076.1109207,37.7392189485182,-122.377869364283,"{'type': 'Point', 'coordinates': [-122.3778693...",,,
4,16471,Permitted Site,Eucalyptus nicholii :: Nichol's Willow-Leafed ...,9X Newhall St,5,Sidewalk: Curb side : Cutout,Tree,Private,3,3X3,6018697.2048701,2097076.1109207,37.7392189485182,-122.377869364283,"{'type': 'Point', 'coordinates': [-122.3778693...",,,


In [54]:
# Create a new DF with just the relevant fields:
trees_df = {}
trees_df = pd.DataFrame(trees_df)
trees_df[['Tree ID', 'Species', 'Latitude', 'Longitude', 'Date Planted']] = response_df[['treeid', 'qspecies', 'latitude', 'longitude', 'plantdate']]
trees_df.head()

Unnamed: 0,Tree ID,Species,Latitude,Longitude,Date Planted
0,196949,Pyrus calleryana :: Ornamental Pear,37.7363616200932,-122.38620200123,
1,203422,Acer rubrum :: Red Maple,37.738391538344,-122.465506999949,
2,115737,Acer rubrum :: Red Maple,37.7377517864641,-122.46449593033,2016-02-24T00:00:00.000
3,16472,Eucalyptus nicholii :: Nichol's Willow-Leafed ...,37.7392189485182,-122.377869364283,
4,16471,Eucalyptus nicholii :: Nichol's Willow-Leafed ...,37.7392189485182,-122.377869364283,


# Data cleansing

In [55]:
# Drop any rows containing "NaN"
treesCleansed = trees_df.dropna(how='any')
treesCleansed

Unnamed: 0,Tree ID,Species,Latitude,Longitude,Date Planted
2,115737,Acer rubrum :: Red Maple,37.7377517864641,-122.46449593033,2016-02-24T00:00:00.000
5,16478,Melaleuca linariifolia :: Flaxleaf Paperbark,37.7392189485182,-122.377869364283,1997-05-16T00:00:00.000
14,102171,Prunus serrulata 'Kwanzan' :: Kwanzan Flowerin...,37.7615584024391,-122.440978567809,2012-12-31T00:00:00.000
15,102172,Prunus serrulata 'Kwanzan' :: Kwanzan Flowerin...,37.7615584024391,-122.440978567809,2012-12-31T00:00:00.000
16,12977,Maytenus boaria :: Mayten,37.7699053099025,-122.436179442539,1990-03-26T00:00:00.000
...,...,...,...,...,...
192195,65996,Tree(s) ::,37.7906402729338,-122.480532017343,1990-07-05T00:00:00.000
192290,84713,Washingtonia robusta :: Mexican Fan Palm,37.7690249659747,-122.437947368595,2007-05-17T00:00:00.000
192317,45386,Lophostemon confertus :: Brisbane Box,37.7928819736725,-122.480907237859,2001-07-06T00:00:00.000
192371,34309,Jacaranda mimosifolia :: Jacaranda,37.7500487659499,-122.402814623369,1997-07-14T00:00:00.000


In [56]:
# Drop any rows with an unknown species
treesCleansed = treesCleansed[treesCleansed.Species != "Tree(s) ::"]
treesCleansed

Unnamed: 0,Tree ID,Species,Latitude,Longitude,Date Planted
2,115737,Acer rubrum :: Red Maple,37.7377517864641,-122.46449593033,2016-02-24T00:00:00.000
5,16478,Melaleuca linariifolia :: Flaxleaf Paperbark,37.7392189485182,-122.377869364283,1997-05-16T00:00:00.000
14,102171,Prunus serrulata 'Kwanzan' :: Kwanzan Flowerin...,37.7615584024391,-122.440978567809,2012-12-31T00:00:00.000
15,102172,Prunus serrulata 'Kwanzan' :: Kwanzan Flowerin...,37.7615584024391,-122.440978567809,2012-12-31T00:00:00.000
16,12977,Maytenus boaria :: Mayten,37.7699053099025,-122.436179442539,1990-03-26T00:00:00.000
...,...,...,...,...,...
192194,34334,Jacaranda mimosifolia :: Jacaranda,37.7500487659499,-122.402814623369,1997-07-14T00:00:00.000
192290,84713,Washingtonia robusta :: Mexican Fan Palm,37.7690249659747,-122.437947368595,2007-05-17T00:00:00.000
192317,45386,Lophostemon confertus :: Brisbane Box,37.7928819736725,-122.480907237859,2001-07-06T00:00:00.000
192371,34309,Jacaranda mimosifolia :: Jacaranda,37.7500487659499,-122.402814623369,1997-07-14T00:00:00.000


In [57]:
#Remove the Time from the end of the Date column
cleanDate = treesCleansed 
cleanDate['Date Planted'] = treesCleansed['Date Planted'].map(lambda x: x.rstrip('T00:00:00.000'))
cleanDate

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,Tree ID,Species,Latitude,Longitude,Date Planted
2,115737,Acer rubrum :: Red Maple,37.7377517864641,-122.46449593033,2016-02-24
5,16478,Melaleuca linariifolia :: Flaxleaf Paperbark,37.7392189485182,-122.377869364283,1997-05-16
14,102171,Prunus serrulata 'Kwanzan' :: Kwanzan Flowerin...,37.7615584024391,-122.440978567809,2012-12-31
15,102172,Prunus serrulata 'Kwanzan' :: Kwanzan Flowerin...,37.7615584024391,-122.440978567809,2012-12-31
16,12977,Maytenus boaria :: Mayten,37.7699053099025,-122.436179442539,1990-03-26
...,...,...,...,...,...
192194,34334,Jacaranda mimosifolia :: Jacaranda,37.7500487659499,-122.402814623369,1997-07-14
192290,84713,Washingtonia robusta :: Mexican Fan Palm,37.7690249659747,-122.437947368595,2007-05-17
192317,45386,Lophostemon confertus :: Brisbane Box,37.7928819736725,-122.480907237859,2001-07-06
192371,34309,Jacaranda mimosifolia :: Jacaranda,37.7500487659499,-122.402814623369,1997-07-14


In [58]:
# Output the data to CSV
cleanDate.to_csv("Source CSVs/SF_Trees.csv")
print("Output Complete")

Output Complete
