## Facilities and Educational Data for Boston Public Schools
Data is acquired from here: https://data.boston.gov/dataset/buildbps-facilities-and-educational-data-for-boston-public-schools
where the terms are explained in a  pdf 

In [1]:
from bokeh.models import ColumnDataSource, Plot, LinearAxis, Grid, Circle, HoverTool, BoxSelectTool,\
    SaveTool, CustomJS, DatetimeAxis, LinearAxis, NumeralTickFormatter
from bokeh.models.widgets.tables import (
    DataTable, TableColumn, IntEditor
)
from bokeh.models.widgets import DataTable, TableColumn, StringFormatter,\
        NumberFormatter, StringEditor, IntEditor, NumberEditor, SelectEditor,\
        CheckboxButtonGroup, CheckboxGroup, MultiSelect, RadioButtonGroup,\
        Select, Slider, Panel, Tabs, TextInput, Paragraph, Div, Button, Dropdown

from bokeh.models.glyphs import Circle
from bokeh.io import curdoc, output_notebook, show as showio, output_file
from bokeh.plotting import show, figure
from bokeh.document import Document
from bokeh.models.layouts import Column, Row
from bokeh.embed import file_html
from bokeh.resources import INLINE
from bokeh.util.browser import view

import copy
import pandas as pd
import numpy as np

In [2]:
output_notebook()

In [21]:
df = pd.read_csv("../data/buildbps.csv", sep=",")

### Scatter plot

In [22]:
data_source = ColumnDataSource(df)
p = figure(plot_width=400, plot_height=400)
p.scatter("BPS_Water_Bill", "BPS_Gas_Bill", source=data_source)
show(p)

ERROR:bokeh.core.validation.check:E-1001 (BAD_COLUMN_NAME): Glyph refers to nonexistent column name. This could either be due to a misspelling or typo, or due to an expected column being missing. : key "x" value "BPS_Water_Bill" (closest match: " BPS_Water_Bill "), key "y" value "BPS_Gas_Bill" (closest match: " BPS_Gas_Bill ") [renderer: GlyphRenderer(id='1993', ...)]


In [23]:
df[' BPS_Water_Bill ']

0             $-   
1        $1,685.00 
2       $20,763.00 
3        $5,227.00 
4        $5,262.00 
           ...     
136     $36,183.00 
137     $37,653.00 
138      $5,394.00 
139      $7,457.00 
140     $11,457.00 
Name:  BPS_Water_Bill , Length: 141, dtype: object

In [24]:
df['BPS_Water_Bill'] = df[' BPS_Water_Bill '].replace( '\$-','0', regex=True ).replace( '[\$,)]','', regex=True ).replace( '[(]','-',   regex=True ).astype(float)
df['BPS_Gas_Bill'] = df[' BPS_Gas_Bill '].replace( '\$-','0', regex=True ).replace( '[\$,)]','', regex=True ).replace( '[(]','-',   regex=True ).astype(float)

In [25]:
data_source = ColumnDataSource(df)
p = figure(plot_width=400, plot_height=400)
p.scatter("BPS_Water_Bill", "BPS_Gas_Bill", source=data_source)
show(p)

## Circles
Let's add a 3rd dimension to the plot

In [26]:
# we can associate the size of the circles with a column of the datatable, but we need to normalize it in someway
# size = 

In [27]:
df['BPS_Electric_Bill'] = df[' BPS_Electric_Bill '].replace( '\$-','0', regex=True ).replace( '[\$,)]','', regex=True ).replace( '[(]','-',   regex=True ).astype(float)
data_source = ColumnDataSource(df)

In [28]:

size = df.BPS_Electric_Bill/df.BPS_Electric_Bill.max()*20

# We either add the array of sizes to the data_source object or should avoid its usage for now
p = figure(plot_width=400, plot_height=400)
p.circle(x=df["BPS_Water_Bill"], y=df["BPS_Gas_Bill"], size=size.to_numpy(), color="navy", alpha=0.5) #, source=data_source)
show(p)

In [29]:
p = figure(plot_width=400, plot_height=400, title="Expenses of the buildings")

p.circle(x=df["BPS_Water_Bill"], y=df["BPS_Gas_Bill"], size=size.to_numpy(), color="navy", alpha=0.5) #, source=data_source)
p.xaxis.formatter = NumeralTickFormatter()
p.xaxis.axis_label = "Water Bill"
p.yaxis.formatter = NumeralTickFormatter(format='$0,0')
p.yaxis.axis_label = "Gas Bill"
show(p)

### Colors
Let's add a 4th dimension to the plot with colors

In [30]:
size = df.BPS_Electric_Bill/df.BPS_Electric_Bill.max()*20
colors = df.SMMA_FA_Existing_PV.apply(lambda x: 'red' if x=="No" else 'green')

# We either add the array of sizes to the data_source object or should avoid its usage for now
p = figure(plot_width=400, plot_height=400, title="Expenses of the buildings")
p.circle(x=df["BPS_Water_Bill"], y=df["BPS_Gas_Bill"], size=size.to_numpy(), color=colors, alpha=0.5)
p.xaxis.formatter = NumeralTickFormatter(format='$0,0'); p.xaxis.axis_label = "Water Bill"
p.yaxis.formatter = NumeralTickFormatter(format='$0,0'); p.yaxis.axis_label = "Gas Bill"
show(p)

In [31]:
data_source = ColumnDataSource(dict(
    electricity = df.BPS_Electric_Bill/df.BPS_Electric_Bill.max()*20,
    colors = df.SMMA_FA_Existing_PV.apply(lambda x: 'red' if x=="No" else 'green'),
    label = df.SMMA_FA_Existing_PV.apply(lambda x: 'Without PV' if x=="No" else 'With PV'),
    #colors = df.SMMA_FA_space_Solar.apply(lambda x: 'red' if x=="No" else 'green'),
    #label = df.SMMA_FA_space_Solar.apply(lambda x: 'Without PV' if x=="No" else 'With PV'),
    water = df["BPS_Water_Bill"],
    gas = df["BPS_Gas_Bill"]
    ))

# We either add the array of sizes to the data_source object or should avoid its usage for now
p = figure(plot_width=400, plot_height=400, title="Expenses of the buildings")
p.circle(x='water', y='gas', size='electricity', color='colors', alpha=0.5,
        legend_group='label', source=data_source)
p.xaxis.formatter = NumeralTickFormatter(format='$0,0'); p.xaxis.axis_label = "Water Bill"
p.yaxis.formatter = NumeralTickFormatter(format='$0,0'); p.yaxis.axis_label = "Gas Bill"
show(p)

### Add html tags

In [32]:
desc = Div(text="Description of the plot: Not many buildings have Photovoltaic panles...")
largetitle = Div(text="<h2> Infographics of School Buildings</h2>")

In [33]:
show(Column(largetitle, Row(p,desc)))

In [34]:
from bokeh.palettes import Spectral5

cat = list(df.groupby('SMMA_FA_FP_Sprinklers').count().SMMA_Identifier.index)
values = list(df.groupby('SMMA_FA_FP_Sprinklers').count().SMMA_Identifier)
b = figure(x_range=cat, plot_height=250, title="Type of fire fighting equipment",
           toolbar_location=None, tools="")

source = ColumnDataSource(data=dict(Equipment=cat, counts=values, color=Spectral5))
b.vbar(x='Equipment', top='counts', color='color', width=0.9, source=source)

b.xgrid.grid_line_color = None
b.y_range.start = 0

show(b)

### Now some interactivity

In [35]:
p.legend.location = "top_left"
p.legend.click_policy="hide"
show(p)