# Activity: Bokeh  


In [None]:
import pandas as pd
import numpy as np
import bokeh.io
import bokeh.plotting
import bokeh.models
from bokeh.transform import factor_cmap, factor_mark
from bokeh.models import (BooleanFilter, CDSView, ColumnDataSource, 
    Range1d, DataTable, TableColumn, FactorRange, BasicTickFormatter)
from bokeh.palettes import all_palettes
from bokeh.layouts import row, column

bokeh.io.output_notebook()


# Activity 1: Scatterplot with hoverable points


- Load the `college-scorecard.csv` data 
- Subset to colleges with
- Create a scatterplot with the:
    - x axis: admissions rate (`ADM_RATE`)
    - y axis: median earnings 10 years after college entry (`MD_EARN_WNE_P10`)
- Include a hoverable tooltip  with each institution's name and the above two values 
    


In [None]:
cs = pd.read_csv("college-scorecard.csv", low_memory = False)


In [None]:
tooltips_map = [
    ('Name', '@INSTNM'),
    ('Admissions rate', '@ADM_RATE{0.00}'), 
    ('Median earnings', '@MD_EARN_WNE_P10')
]
cs_b = bokeh.models.ColumnDataSource(cs)
a1 = bokeh.plotting.figure(plot_width = 400, plot_height = 300,
                         x_axis_label = 'University admissions rate',
                         y_axis_label = "Median earnings 10 years post-entry",
                         tools = "hover,pan,zoom_in,box_select",
                         tooltips = tooltips_map)

a1.scatter(x = 'ADM_RATE', y = 'MD_EARN_WNE_P10', 
          source = cs_b)
a1.yaxis.formatter = BasicTickFormatter(use_scientific=False)
a1.y_range = Range1d(0, 160000)

In [None]:
# bokeh.io.show(a1)

# Activity 2: shade by group + create an interactive legend

- Create a categorical variable (loan_category) that uses the PCTFLOAN (percent of students receiving federal student loans) variable and codes it into the following buckets:
        - No federal loans: 0% receiving
        - Low federal loans: >0% & <30%
        - Middle federal loans: >=30% & <60%
        - High federal loans: >60%
- Recreate the above scatterplot but shade for those 4 categories + create an interactive legend that allows you to select which categories to display on the plot



In [None]:
conditions = [cs['PCTFLOAN'] == 0,
             cs['PCTFLOAN'] < 0.3,
             cs['PCTFLOAN'] < 0.6,
             cs['PCTFLOAN'] >= 0.6]
choices = ["0% federal loans", "Low federal loans",
          "Middle federal loans", "High federal loans"]

cs['loan_category'] = np.select(conditions, choices, default = None)


In [None]:
p = bokeh.plotting.figure(plot_width = 400, plot_height = 300,
                         x_axis_label = 'University admissions rate',
                         y_axis_label = "Median earnings 10 years post-entry",
                         tools = "hover,pan,zoom_in",
                         tooltips = tooltips_map)
for one_category in choices:
    df = bokeh.models.ColumnDataSource(cs[cs.loan_category == one_category])
    p.scatter(x = 'ADM_RATE', y = 'MD_EARN_WNE_P10', 
          source = df,
          legend_label = one_category,
          color = factor_cmap(field_name = 'loan_category', 
                             palette = bokeh.palettes.Spectral4,
                             factors = choices))
p.legend.location = "top_right"
p.legend.click_policy = "hide"
p.yaxis.formatter = BasicTickFormatter(use_scientific=False)
p.y_range = Range1d(0, 160000)

In [None]:
#bokeh.io.show(p)

# Activity 3: linked charts 

Create side by side scatterplots with the ability to select a region of the points:
    - First plot: same as in activity one (x axis is the admissions rate versus y axis is median earnings)
    - Second plot: x axis is the average SAT score (`SAT_AVG`) and y axis is the median earnings 

In [None]:
a3 = bokeh.plotting.figure(plot_width = 400, plot_height = 300,
                         x_axis_label = 'Average SAT score',
                         y_axis_label = "Median earnings 10 years post-entry",
                         tools = "hover,pan,zoom_in,box_select",
                         tooltips = tooltips_map)

a3.scatter(x = 'SAT_AVG', y = 'MD_EARN_WNE_P10', 
          source = cs_b)
a3.yaxis.formatter = BasicTickFormatter(use_scientific=False)
a3.y_range = Range1d(0, 160000)

In [None]:
#bokeh.io.show(row(a1, a3))