# How Are Street Trees Distributed in Neighbourhoods?

Question 2: What is the most common type of tree the city has been planting? How are trees distributed in neighbourhoods?

```{note}
Please hit "Click to show" to see Python codes.
```

In [1]:
import pandas as pd
import numpy as np
import altair as alt
from datetime import date
alt.data_transformers.disable_max_rows()
pd.options.mode.chained_assignment = None #suppress warning

In [2]:
# use small dataset (5000 rows)
url = 'https://raw.githubusercontent.com/UBC-MDS/data_viz_wrangled/main/data/Trees_data_sets/small_vancouver_trees.csv'

# make sure date_planted has a data type "datetime"
data = pd.read_csv(url, parse_dates=[5])

In [3]:
# FUNCTIONS
# calculate ages from date_planted
def calculate_age(date_planted):
    today = date.today()
    return today.year - date_planted.year - ((today.month, today.day) < (date_planted.month, date_planted.day))

# get height range description from height_range_id
def get_height_range_desc(height_range):
    if height_range == 0:
        return '0-10 ft'
    elif height_range == 1:
        return '10-20 ft'
    elif height_range == 2:
        return '20-30 ft'
    elif height_range == 3:
        return '30-40 ft'
    elif height_range == 4:
        return '40-50 ft'
    elif height_range == 5:
        return '50-60 ft'
    elif height_range == 6:
        return '60-70 ft'
    elif height_range == 7:
        return '70-80 ft'
    elif height_range == 8:
        return '80-90 ft'
    elif height_range == 9:
        return '90-100 ft'
    elif height_range == 10:
        return '100+ ft'
    else:
        return ''

In [4]:
# remove columns that will not be used for visualization
df = data.drop(columns=['Unnamed: 0','std_street','assigned','street_side_name','civic_number','plant_area','curb','tree_id','cultivar_name','root_barrier'], axis=1)

# rename columns
df.rename(columns={
    'on_street':'street',
    'species_name':'species',
    'neighbourhood_name':'neighbourhood',
    'genus_name':'genus',
    'height_range_id':'height_range'
}, inplace=True)

# convert data type
df['diameter'] = df['diameter'].astype(int)
df['on_street_block'] = df['on_street_block'].astype(str)

# add calculated fields from other columns
df['allergen'] = np.where(df['genus'].isin(['ALNUS','FAGUS','BETULA','TYPHA','CASTANEA','ULMUS','CORYLUS','TSUGA','LARIX','ACER','QUERCUS','POPULUS']), True, False)
df['year'] = pd.DatetimeIndex(df['date_planted']).year
df['year'].fillna(0, inplace=True)
df['year'] = df['year'].astype(int)
df['height_range_desc'] = df['height_range'].apply(get_height_range_desc)
df['st_block'] = df['on_street_block'] + ' ' + df['street']

year_df = df[df['year']!=0]
year_df['age'] = year_df['date_planted'].apply(calculate_age)
year_df['height_range_desc'] = year_df['height_range'].apply(get_height_range_desc)

In [5]:
year_plot = alt.Chart(year_df).mark_bar().encode(
    alt.X('year:O', title='Year'),
    alt.Y('count():Q', title='Number of trees planted')
).properties(height=200, width=375, title='Number of planted trees by year')

sort_y = year_df['genus'].value_counts().index.to_list()

trends_plot = alt.Chart(year_df).mark_rect().encode(
    alt.Y('neighbourhood:N', title='Neighbourhood'),
    alt.X('genus:N', title='Genus name', sort=sort_y),
    alt.Color('count():Q', title='Number of trees planted')
).properties(width=375, title='Planted trees by neighbourhood and tree type')

# create onclick function
year_selection = alt.selection_multi(fields=['year'])

year_bars = year_plot.encode(
    opacity = alt.condition(year_selection, alt.value(1), alt.value(0.2))
).add_selection(year_selection)

trends_plot = trends_plot.transform_filter(year_selection)

# create dropdown function
neighbourhoods = sorted(year_df['neighbourhood'].unique())
dropdown = alt.binding_select(name='Neighbourhood: ', options=[None] + neighbourhoods, labels=['All'] + neighbourhoods)
neighbourhood_selection = alt.selection_single(fields=['neighbourhood'], bind=dropdown)

year_bars = year_bars.add_selection(neighbourhood_selection).transform_filter(neighbourhood_selection)

trends_plot = trends_plot.add_selection(neighbourhood_selection).transform_filter(neighbourhood_selection)

plot_2 = (year_bars | trends_plot).properties(title=alt.TitleParams('Figure 2', 
                                                                    subtitle='ACER (Maple) is the most common genus for Vancouver street trees. Oh, Canada!', 
                                                                    subtitleColor='steelblue',
                                                                    anchor='middle'
                                                                    )
                                              )
plot_2

These are interactive charts. You can filter data by year(s) and/or neighbourhood.

To filter years, click on a year bar on the left chart. You can select single or multiple years
To filter neighbourhoods, select a neighbourhood in the dropdown menu. </ul>
It looks like ACER is the most common type/genus (maple trees) and the city has been planting them a lot starting from 1993. Well, this makes sense to me. What country do we live in?

On the techical side, this might not be the best way to answer the questions. I find it a bit difficult to use.