<a href="https://colab.research.google.com/github/statmike/bq-release-timeline/blob/main/notebook/BQ_Feature_Timeline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Covert the release notes for Big Query into an interactive timeline

TODO:
- [ ] Better Hover Tips - clickable links?


## Crawl Release Pages For Data

In [None]:
from urllib.request import urlopen
from bs4 import BeautifulSoup
import pandas as pd
import datetime as datetime

In [None]:
def crawler(product,url):
  header = ["date","release_type","description"]
  releases = []
  html = urlopen(url)
  pot = BeautifulSoup(html,'lxml')
  bowl = pot.find('section',attrs={"class":"releases"})
  spoons = bowl.findAll(['h2','div','p','ul'])
  for spoon in spoons:
    if spoon.name == 'h2':
      keep_date = spoon.text #get('data-text')
    if spoon.name == 'div':
      keep_type = spoon.get('class')[0]
      keep_disc = spoon.findNext('p').text
      releases.append([keep_date,keep_type,keep_disc])
  df = pd.DataFrame(releases,columns=header)
  df.date = df.date.apply(lambda x: datetime.datetime.strptime(x,"%B %d, %Y")) 
  df['product'] = product
  releasemap = {"release-fixed":"Fix","release-issue":"Issue","release-changed":"Change","release-feature":"Feature"}
  df['release_type'].replace(releasemap, inplace=True)
  return df

In [None]:
pdict = {"bq":"BigQuery","bqml":"BigQuery ML","bqbi":"BigQuery BI Engine","bqdt":"BigQuery Data Transfer Service"}
bq = crawler('bq','https://cloud.google.com/bigquery/docs/release-notes')
bqml = crawler('bqml','https://cloud.google.com/bigquery-ml/docs/release-notes')
bqbi = crawler('bqbi','https://cloud.google.com/bi-engine/docs/release-notes')
bqdt = crawler('bqdt','https://cloud.google.com/bigquery-transfer/docs/release-notes')

In [None]:
df = pd.concat([bq,bqml,bqbi,bqdt], axis=0, ignore_index=True)
df = df.sort_values(by=['date'], ascending=False)

## Use Bokeh To Plot Timeline of Release Data

In [None]:
from bokeh.plotting import figure, show, output_file, save
from bokeh.io import show, output_notebook
from bokeh.layouts import column
from bokeh.models import CDSView, GroupFilter, ColumnDataSource, RangeTool, HoverTool


# Call once to configure Bokeh to display plots inline in the notebook.
output_notebook()

In [None]:
colormap = {"bq":"#4285F4", "bqml":"#EA4335", "bqbi":"#FBBC04", "bqdt":"#34A853"}
source = ColumnDataSource(data=dict(date=df['date'], release=df['release_type'], tip=df['description'],
                                    product=df['product'], productname=[pdict[x] for x in df['product']],
                                    colors=[colormap[x] for x in df['product']]))
ycats = df.release_type.unique()

tooltips = [("","@product"),("","@tip")]

# main plot
p = figure(title="Big Query Release Notes",
           plot_height=300, plot_width=800, tools="xpan", toolbar_location=None,
           x_axis_type="datetime", x_axis_location="above", tooltips=[("","@productname"),("","@tip")],
           background_fill_color="#F8F9FA", y_range=ycats, x_range=(df.date[100], df.date[0]))
p.yaxis.axis_label = 'Release Type'

#loop over products and display glyphs (circles), use CDSView to create view of product subset from source
for prod in df['product'].unique():
  view = CDSView(source=source, filters=[GroupFilter(column_name='product', group=prod)])
  p.circle('date','release',source=source, view=view, line_color=None, size=10, fill_color='colors', legend_label=pdict[prod])

p.legend.location='top_left'
p.legend.click_policy="hide"

# selection tool
select = figure(title="Drag the middle and edges of the selection box to change the range above",
                plot_height=130, plot_width=800, y_range=p.y_range,
                x_axis_type="datetime", y_axis_type=None,
                tools="", toolbar_location=None, background_fill_color="#F8F9FA")
range_tool = RangeTool(x_range=p.x_range)
range_tool.overlay.fill_color = "#5F6368"
range_tool.overlay.fill_alpha = 0.2
select.circle('date', 'release', source=source, line_color=None, fill_color='colors')
select.ygrid.grid_line_color = None
select.add_tools(range_tool)
select.toolbar.active_multi = range_tool

full = column(p,select)

show(full)

In [None]:
output_file("bqplot.html")

In [None]:
save(full)

'/content/bqplot.html'

Download this file.  Then, copy/paste the html to where you want to embed it.

---
I used these instructions to create a cloud function to run the code.  A cloud Scheduler task invokes a Pub/Sub trigger for the Cloud Function every evening.

- https://cloud.google.com/scheduler/docs/tut-pub-sub
