In [1]:
import pandas as pd

In [2]:
dtypes_list= {'qLegalStatus': 'str', 'qSpecies': 'str', 'qAddress': 'str', 'qSiteInfo': 'str', 'qCaretaker': 'str', 'PlantDate': 'str', 'PlotSize': 'str'}
trees_filtered = pd.read_csv('static/Street_Tree_List-2022-01-30_FILTERED.csv', dtype = dtypes_list)
trees_raw = pd.read_csv('static/Street_Tree_List-2022-01-30_RAW.csv', dtype = dtypes_list)

In [3]:
trees_filtered.head(5)

Unnamed: 0,TreeID,qLegalStatus,qSpecies,qAddress,SiteOrder,qSiteInfo,qCaretaker,PlantDate,DBH,PlotSize,Latitude,Longitude
0,200311,DPW Maintained,Liquidambar styraciflua 'Rotundiloba' :: Round...,300 Spear St,9.0,Sidewalk: Curb side : Yard,Private,,5,,37.789193,-122.390006
1,10425,DPW Maintained,Schinus terebinthifolius :: Brazilian Pepper,1419 Haight St,2.0,Sidewalk: Curb side : Cutout,DPW,,90,3x3,37.770094,-122.445693
2,43163,DPW Maintained,Acacia melanoxylon :: Blackwood Acacia,222 Madison St,1.0,Sidewalk: Curb side : Cutout,Private,7/11/00 0:00,8,,37.726036,-122.42269
3,188921,DPW Maintained,Callistemon citrinus :: Lemon Bottlebrush,2001 25th St,16.0,Sidewalk: Property side : Yard,Private,,6,Width 8ft,37.751955,-122.399482
4,643,DPW Maintained,Ficus microcarpa :: Chinese Banyan,3224 24th St,1.0,Sidewalk: Curb side : Cutout,Private,,18,3x3,37.752398,-122.416772


In [4]:
trees_filtered.dtypes

TreeID            int64
qLegalStatus     object
qSpecies         object
qAddress         object
SiteOrder       float64
qSiteInfo        object
qCaretaker       object
PlantDate        object
DBH               int64
PlotSize         object
Latitude        float64
Longitude       float64
dtype: object

In [5]:
print('total trees: ' + str(len(trees_filtered)))
species_counts = trees_filtered['qSpecies'].value_counts()
print('number of species with only 1 tree: ' + str(len(species_counts[species_counts == 1])))
print('total number of species: ' + str(trees_filtered['qSpecies'].nunique()))

total trees: 37118
number of species with only 1 tree: 77
total number of species: 419


In [6]:
species_counts[:15]

Platanus x hispanica :: Sycamore: London Plane                               2633
Metrosideros excelsa :: New Zealand Xmas Tree                                1937
Lophostemon confertus :: Brisbane Box                                        1727
Pittosporum undulatum :: Victorian Box                                       1669
Tristaniopsis laurina :: Swamp Myrtle                                        1577
Ficus microcarpa nitida 'Green Gem' :: Indian Laurel Fig Tree 'Green Gem'    1425
Magnolia grandiflora :: Southern Magnolia                                    1377
Prunus cerasifera :: Cherry Plum                                             1306
Arbutus 'Marina' :: Hybrid Strawberry Tree                                   1063
Prunus serrulata 'Kwanzan' :: Kwanzan Flowering Cherry                        841
Acacia melanoxylon :: Blackwood Acacia                                        820
Olea europaea :: Olive Tree                                                   789
Corymbia ficifol

Make csv file to be used for the visualization.
- Keep TreeID, Latitude, and Longitude
- Process qSpecies to only include the common name

In [7]:
trees_subset = trees_filtered[['TreeID', 'qSpecies', 'Latitude', 'Longitude']].copy()

In [8]:
trees_subset.head(5)

Unnamed: 0,TreeID,qSpecies,Latitude,Longitude
0,200311,Liquidambar styraciflua 'Rotundiloba' :: Round...,37.789193,-122.390006
1,10425,Schinus terebinthifolius :: Brazilian Pepper,37.770094,-122.445693
2,43163,Acacia melanoxylon :: Blackwood Acacia,37.726036,-122.42269
3,188921,Callistemon citrinus :: Lemon Bottlebrush,37.751955,-122.399482
4,643,Ficus microcarpa :: Chinese Banyan,37.752398,-122.416772


In [9]:
trees_subset = trees_subset.rename(columns = {'TreeID': 'tree_id', 'qSpecies': 'species', 'Latitude': 'latitude', 'Longitude': 'longitude'})
trees_subset.head(5)

Unnamed: 0,tree_id,species,latitude,longitude
0,200311,Liquidambar styraciflua 'Rotundiloba' :: Round...,37.789193,-122.390006
1,10425,Schinus terebinthifolius :: Brazilian Pepper,37.770094,-122.445693
2,43163,Acacia melanoxylon :: Blackwood Acacia,37.726036,-122.42269
3,188921,Callistemon citrinus :: Lemon Bottlebrush,37.751955,-122.399482
4,643,Ficus microcarpa :: Chinese Banyan,37.752398,-122.416772


In [10]:
trees_subset['species'] = trees_subset['species'].str.split('::', expand = True)[1]
trees_subset.head(5)

Unnamed: 0,tree_id,species,latitude,longitude
0,200311,Roundleaf sweetgum,37.789193,-122.390006
1,10425,Brazilian Pepper,37.770094,-122.445693
2,43163,Blackwood Acacia,37.726036,-122.42269
3,188921,Lemon Bottlebrush,37.751955,-122.399482
4,643,Chinese Banyan,37.752398,-122.416772


In [11]:
print(str(len(trees_subset[trees_subset['species'] == ''])) + ' trees of unknown species')

377 trees of unknown species


Replace empty species values with "Unknown". (Want to include these data points in the visualization but not use them for dominant species calculation.)

In [12]:
trees_subset.loc[trees_subset['species'] == '', 'species'] = 'Unknown'
trees_subset[trees_subset['species'] == 'Unknown'].head(5)

Unnamed: 0,tree_id,species,latitude,longitude
60,142726,Unknown,37.804452,-122.415768
156,7073,Unknown,37.773152,-122.445264
480,141550,Unknown,37.777344,-122.450421
534,17683,Unknown,37.77987,-122.435296
759,10366,Unknown,37.770755,-122.441632


In [14]:
trees_subset.to_csv('static/trees_vis_data.csv', index = False)