## Tasks/Questions

- T: plot all the trees graphically, improving on our old Meteorite graphing code

In [10]:
import pandas as pd

import numpy as np
import geopandas
import folium
from folium import plugins
from folium.plugins import HeatMap
from folium.plugins import MarkerCluster
from tqdm import tqdm

## Data Injestion

In [2]:
df = pd.read_csv("sf_trees.csv")

  interactivity=interactivity, compiler=compiler, result=result)


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 192987 entries, 0 to 192986
Data columns (total 12 columns):
tree_id         192987 non-null int64
legal_status    192933 non-null object
species         192987 non-null object
address         191500 non-null object
site_order      191353 non-null float64
site_info       192987 non-null object
caretaker       192987 non-null object
date            68377 non-null object
dbh             151168 non-null float64
plot_size       142974 non-null object
latitude        190155 non-null float64
longitude       190155 non-null float64
dtypes: float64(4), int64(1), object(7)
memory usage: 17.7+ MB


In [4]:
df.describe()

Unnamed: 0,tree_id,site_order,dbh,latitude,longitude
count,192987.0,191353.0,151168.0,190155.0,190155.0
mean,126529.214071,4.579118,9.953767,37.76626,-122.445586
std,79317.040625,12.515736,29.364079,0.249752,0.415291
min,1.0,-50.0,0.0,37.509004,-138.283861
25%,52601.5,1.0,3.0,37.74032,-122.454299
50%,120862.0,2.0,7.0,37.760244,-122.431402
75%,202607.5,4.0,12.0,37.779639,-122.412955
max,261546.0,501.0,9999.0,47.270219,-122.366622


In [5]:
df.head()

Unnamed: 0,tree_id,legal_status,species,address,site_order,site_info,caretaker,date,dbh,plot_size,latitude,longitude
0,53719,Permitted Site,Tree(s) ::,2963 Webster St,1.0,Sidewalk: Curb side : Cutout,Private,1955-09-19,,,37.797869,-122.434054
1,30313,Permitted Site,Tree(s) ::,501 Arkansas St,3.0,Sidewalk: Curb side : Cutout,Private,1955-10-20,,,37.759838,-122.398119
2,30312,Permitted Site,Tree(s) ::,501 Arkansas St,2.0,Sidewalk: Curb side : Cutout,Private,1955-10-20,,,37.759838,-122.398119
3,30314,DPW Maintained,Pittosporum undulatum :: Victorian Box,501 Arkansas St,1.0,Sidewalk: Curb side : Cutout,Private,1955-10-20,16.0,,37.759772,-122.398109
4,30315,Permitted Site,Acacia melanoxylon :: Blackwood Acacia,1190 Sacramento St,5.0,Sidewalk: Curb side : Cutout,Private,1955-10-24,,,37.79265,-122.412449


## Geo Spatial

In [6]:
# Create a new dataframe of just the lat and long columns
geo_df = df.dropna(axis=0, how="any", subset=["latitude", "longitude"])

# we'll preserve the id from the data set
geo_df = geo_df.set_index("tree_id")

In [7]:
geo_df

Unnamed: 0_level_0,legal_status,species,address,site_order,site_info,caretaker,date,dbh,plot_size,latitude,longitude
tree_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
53719,Permitted Site,Tree(s) ::,2963 Webster St,1.0,Sidewalk: Curb side : Cutout,Private,1955-09-19,,,37.797869,-122.434054
30313,Permitted Site,Tree(s) ::,501 Arkansas St,3.0,Sidewalk: Curb side : Cutout,Private,1955-10-20,,,37.759838,-122.398119
30312,Permitted Site,Tree(s) ::,501 Arkansas St,2.0,Sidewalk: Curb side : Cutout,Private,1955-10-20,,,37.759838,-122.398119
30314,DPW Maintained,Pittosporum undulatum :: Victorian Box,501 Arkansas St,1.0,Sidewalk: Curb side : Cutout,Private,1955-10-20,16.0,,37.759772,-122.398109
30315,Permitted Site,Acacia melanoxylon :: Blackwood Acacia,1190 Sacramento St,5.0,Sidewalk: Curb side : Cutout,Private,1955-10-24,,,37.792650,-122.412449
...,...,...,...,...,...,...,...,...,...,...,...
115063,DPW Maintained,::,70 Coral Rd,3.0,Sidewalk: Curb side : Cutout,DPW,,,,37.753839,-122.399069
234951,DPW Maintained,Pittosporum undulatum :: Victorian Box,4724 25th St,1.0,Sidewalk: Curb side : Cutout,Private,,8.0,Width 2ft,37.749267,-122.442046
17192,DPW Maintained,Magnolia grandiflora :: Southern Magnolia,2235X Octavia St,1.0,Median : Cutout,DPW,,12.0,20,37.792795,-122.427894
228173,DPW Maintained,Lophostemon confertus :: Brisbane Box,790 Florida St,2.0,Sidewalk: Curb side : Cutout,Private,,2.0,Width 3ft,37.759348,-122.410957


In [13]:
# Create and register a new `tqdm` instance with `pandas`
# (can use tqdm_gui, optional kwargs, etc.)
tqdm.pandas()

In [14]:
def visualize_geo_spatial_data(df, latitude_column_name, longitude_column_name, file_name_to_save_to="markers_map.html"):
    our_map = folium.Map(tiles="CartoDB dark_matter")
    our_map_cluster = MarkerCluster().add_to(our_map)

    df.progress_apply(
        lambda row:folium.Marker(
            location=[row[latitude_column_name],
            row[longitude_column_name]], 
            radius=10,
            tooltip=row.name,
            popup=row.to_frame().to_html()
        ).add_to(our_map_cluster), axis=1
    )

    our_map.save(file_name_to_save_to)
    our_map.fit_bounds(our_map.get_bounds())
    return our_map

In [None]:
visualize_geo_spatial_data(geo_df, "latitude", "longitude")

100%|██████████| 190155/190155 [04:22<00:00, 724.33it/s]
