In [2]:
#@markdown # CRUX History API
#@markdown Welcome to the CrUX History API. This Colab allows you to get time
#@markdown series data from the Chrome User Experience Report (CrUX)
#@markdown for specific origins and urls via the CrUX History API, and also get
#@markdown you started making your own API requests. You can easily look under
#@markdown the hood by clicking "Show code" at the end of this section.
#@markdown
#@markdown Please also see
#@markdown * [the reference documentation](https://developer.chrome.com/docs/crux/history-api/)
#@markdown * [our blog post](https://developer.chrome.com/blog/chrome-ux-report-history-api/)
#@markdown * [the canonical version of this Colab](https://colab.research.google.com/github/GoogleChrome/CrUX/blob/main/colab/crux-history-api.ipynb)
#@markdown * [the canonical source of this Colab](https://github.com/GoogleChrome/CrUX/blob/main/colab/crux-history-api.ipynb)
#@markdown * [our discussion forum for praise, complaints, and questions](https://groups.google.com/a/chromium.org/g/chrome-ux-report)
#@markdown
#@markdown ## API key
#@markdown To run this Colab, you'll need an API key. You can create one in the
#@markdown [Credentials page](https://console.developers.google.com/apis/credentials)
#@markdown and provision it for Chrome UX Report API usage. If you're making
#@markdown your own requests, this key needs to be included in every request
#@markdown with the `?key=` parameter, but in this Colab you can simply
#@markdown enter it as CRUX_KEY in the next line. After that, press the play
#@markdown button and the magic begins (it's OK to use the default Colab
#@markdown runtime).
CRUX_KEY = "AIzaSyDlFxRXjvlSsyoqLF5vSDRU8m1TUcbIAAM" #@param {type: "string"}

#@markdown Specify origin or url (not both).
ORIGIN = "https://www.royaltonresorts.com/" #@param {type: "string"}
URL = "" #@param {type: "string"}
#@markdown Specify a form factor (phone, desktop, tablet) or all.
FORM_FACTOR = "ALL" #@param ["ALL", "PHONE", "DESKTOP", "TABLET"]
#@markdown Specify the metrics; if left blank, we'll report all metrics.
METRICS = "" #@param {type:"string"}

#@markdown What to include in the rendered output below?
EMIT_GRAPHS = True #@param {type:"boolean"}
EMIT_TABULAR_OUTPUT = False #@param {type:"boolean"}
EMIT_REQUEST_RESPONSE = False #@param {type:"boolean"}


import altair as alt
import requests
import pandas
import json
from typing import Any, Tuple, Dict, List
from IPython import display
from google.colab import data_table


def get_crux_api_response_from_form() -> Tuple[Dict[str, str], Dict[str, Any]]:
  """Based on the form data, makes a CrUX history request.

  The first return value is the request, the second is the parsed response.
  """
  url = f'https://chromeuxreport.googleapis.com/v1/records:queryHistoryRecord?key={CRUX_KEY}'
  json_request = {}
  if URL:
    json_request['url'] = URL
  if ORIGIN:
    json_request['origin'] = ORIGIN
  if FORM_FACTOR != 'ALL':
    json_request['form_factor'] = FORM_FACTOR
  if METRICS:
    json_request['metrics'] = METRICS
  response = requests.post(url, json.dumps(json_request))
  return json_request, response.json()


def short_name(metric: str) -> str:
  """A short name for a metric name.

  In CrUX API responses, metric names are provided in snake case, e.g.,
  first_contentful_paint. This function computes the three or four letter
  shortname, e.g., FCP.
  """
  short = ''.join([s[0].upper()
                   for s in metric.split('_') if s != 'experimental'])
  return short if short != 'ITNP' else 'INP'


def metrics_in(response: dict) -> List[str]:
  """Returns the metric names in a CrUX API history response."""

  metrics = list(response['record']['metrics'].keys())
  metrics.sort()
  return metrics


def thresholds_by_metric(response: dict) -> Dict[str, Tuple[float, float]]:
  """The thresholds by metric name.

  Key in the returned dict is a metric name, e.g. 'first_contentful_paint'.
  Value is a tuple of the low threshold, which separates 'good' from
  'needs improvement', and the high threshold, which separates
  'needs improvement' from 'poor'.
  """

  result = {}
  for metric, data in response['record']['metrics'].items():
    if 'histogramTimeseries' not in data: continue
    result[metric] = (float(data['histogramTimeseries'][1]['start']),
                      float(data['histogramTimeseries'][1]['end']))
  return result


def dataframe_for(metric, response) -> pandas.DataFrame:
  """Extracts the p75, histogram density, and fraction timeseries for a metric."""

  timestamp = lambda e: pandas.Timestamp(e['year'], e['month'], e['day'])
  cols = {
    'first_date': [timestamp(e['firstDate'])
                   for e in response['record']['collectionPeriods']],
    'last_date': [timestamp(e['lastDate'])
                  for e in response['record']['collectionPeriods']],
  }
  data = response['record']['metrics'][metric]
  if 'fractionTimeseries' in data:
    for (label, value) in data['fractionTimeseries'].items():
      cols[label] = value['fractions']
  if 'percentilesTimeseries' in data:
    cols['p75'] = data['percentilesTimeseries']['p75s']
  if 'histogramTimeseries' in data:
    cols['good'] = data['histogramTimeseries'][0]['densities']
    cols['needs improvement'] = data['histogramTimeseries'][1]['densities']
    cols['poor'] = data['histogramTimeseries'][2]['densities']
  return pandas.DataFrame(cols)


def url_normalization_details(response) -> str:
  """Summarizes the URL normalization made by the API."""

  if 'urlNormalizationDetails' not in response: return ''
  return 'URL was normalized from "{originalUrl}" to "{normalizedUrl}".'.format(
    **response['urlNormalizationDetails'])


def display_header(label: str):
  """Displays a header in the output."""
  display.display(display.HTML(f'<h2>{label}</h2>'))


def make_p75_chart(stats: pandas.DataFrame, metric: str,
                   lo_threshold: float, hi_threshold: float) -> alt.Chart:
  """Creates a P75 chart, displaying p75 data points per collection period.

  The P75 data points are the metric value along the y axis, the last_date
  of the collection time period is the x axis. Two horizontal
  lines separate good_range from ni_range (needs improvement) and
  from poor_range; these are rendered as areas in green / orange / red.
  For graphs that don't have 'poor' P75 values, no red area is rendered.
  """
  p75_stats = stats[['last_date', 'p75']].melt(
      'last_date', var_name='percentile', value_name=short_name(metric))
  good_range = alt.Chart(
      pandas.DataFrame({'y': [0], 'y2': [lo_threshold]})).mark_rect(
          color='green', opacity=0.2).encode(
              alt.Y('y', axis=alt.Axis(title=None)), y2='y2')
  ni_range = alt.Chart(
      pandas.DataFrame({'y': [lo_threshold], 'y2': [hi_threshold]})
      ).mark_rect(color='orange', opacity=0.2).encode(
          alt.Y('y', axis=alt.Axis(title=None)), y2='y2')
  ranges = good_range + ni_range
  max_p75 = float(pandas.to_numeric(stats['p75']).max())
  if max_p75 > hi_threshold:  # Are there poor p75 values?
    poor_range = alt.Chart(
        pandas.DataFrame({'y': [hi_threshold],
                          'y2': [max_p75 * 1.1]})).mark_rect(
                              color='red', opacity=0.2).encode(
                                  alt.Y('y',
                                        axis=alt.Axis(title=None)), y2='y2')
    ranges += poor_range
  p75_chart = ranges + alt.Chart(p75_stats).mark_line().encode(
      alt.X('last_date:T', axis=alt.Axis(title=None)),
      y=short_name(metric)+':Q', color='percentile:N')
  return p75_chart


def make_tribin_chart(stats: pandas.DataFrame, metric: str,
                      lo_threshold: float, hi_threshold : float) -> alt.Chart:
  """A Tribin chart shows histogram bin density values in a stacked bar chart.

  The y axis is the percentage of page loads that fall into a user experience
  category ("good", "needs improvement", "poor"). The API returns
  NaN densities for missing data, which we map to 0.0 here so that these
  render as missing bars, since NaN can't be reasonably rendered on the y axis.
  """
  good_label = f'good ({short_name(metric)} <= {lo_threshold})'
  ni_label = ('needs improvement '
              f'({lo_threshold} < {short_name(metric)} <= {hi_threshold})')
  poor_label = f'poor ({short_name(metric)} > {hi_threshold})'
  tribin_stats = stats[['last_date',
                        'good',
                        'needs improvement',
                        'poor']].melt(
                            'last_date', var_name=short_name(metric),
                            value_name='pct').replace(
                                {'good': good_label,
                                 'needs improvement': ni_label,
                                 'poor': poor_label,
                                 'NaN': 0.0})
  tribin_chart = alt.Chart(tribin_stats).mark_bar().encode(
      alt.X('last_date:T', axis=alt.Axis(title=None)),
      alt.Y('sum(pct)', axis=alt.Axis(title=None, format='%')),
      color=alt.Color(
          short_name(metric), scale=alt.Scale(
              domain=[poor_label, ni_label, good_label],
              range=['red','orange', 'green'])),
              order=alt.Order(short_name(metric),
                              sort='ascending')).configure_legend(
                                  labelLimit=300)
  return tribin_chart


def make_fractions_chart(stats: pandas.DataFrame, metric: str) -> alt.Chart:
  """A chart showing fraction timeseries in a stacked bar chart.

  The y axis is the fraction, adding up to 1.0. The API returns
  NaN densities for missing data, which we map to 0.0 here so that these
  render as missing bars, since NaN can't be reasonably rendered on the y axis.
  """
  fraction_stats = stats[[e for e in stats.columns.values.tolist()
                          if e != 'first_date']].melt(
                            'last_date', var_name=short_name(metric),
                            value_name='pct').replace(
                                {'NaN': 0.0})
  tribin_chart = alt.Chart(fraction_stats).mark_bar().encode(
      alt.X('last_date:T', axis=alt.Axis(title=None)),
      alt.Y('sum(pct)', axis=alt.Axis(title=None, format='%')),
      color=alt.Color(short_name(metric)),
          order=alt.Order(short_name(metric),
                          sort='ascending')).configure_legend(labelLimit=300)
  return tribin_chart


def display_metric_stats(metric: str, stats: pandas.DataFrame,
                         lo_threshold:float, hi_threshold:float):
  """For a specific metric, displays the requested stats (graphs, tabular)."""
  name = short_name(metric)
  display_header(f'{name} ({metric})')
  stats = dataframe_for(metric, response)
  web_dev_link = f'https://web.dev/{short_name(metric).lower()}/'
  web_dev_link_text = f'web.dev/{short_name(metric).lower()}/'
  display.display(display.HTML(
      'Learn more at '
      f'<a href="{web_dev_link}" target="_blank">{web_dev_link_text}</a>.'))

  if EMIT_GRAPHS:
    display.display(make_p75_chart(stats, metric, lo_threshold, hi_threshold))
    display.display(make_tribin_chart(stats, metric, lo_threshold, hi_threshold))

  if EMIT_TABULAR_OUTPUT:
    display.display(data_table.DataTable(stats.round(4),
                                         min_width='100', include_index=False))


def display_fraction_metric_stats(metric: str, stats: pandas.DataFrame):
  """For a specific metric, displays the requested stats (graphs, tabular).

  This routine is specialized for fraction metrics, which don't have thresholds,
  tribins (histograms), or p75 timeseries. Instead, these metrics have labeled
  fractions for each timeseries entry, which add up to 1.0 (100%).
  """
  display_header(f'{metric}')
  stats = dataframe_for(metric, response)
  if EMIT_GRAPHS:
    display.display(make_fractions_chart(stats, metric))
  if EMIT_TABULAR_OUTPUT:
    display.display(data_table.DataTable(stats.round(4),
                                         min_width='100', include_index=False))


request, response = get_crux_api_response_from_form()
if 'record' not in response:
  display_header('No record found in response!')
  EMIT_REQUEST_RESPONSE = True
else:
  key = response['record']['key']
  identifier = ('url <code>{url}</code>'.format(**key) if 'url' in key
                else 'origin <code>{origin}</code>'.format(**key))
  form_factor = ' on {formFactor}'.format(**key) if 'formFactor' in key else ''
  display_header(f'Displaying CrUX data for {identifier}{form_factor}')
  display.display(display.HTML(url_normalization_details(response)))
  thresholds = thresholds_by_metric(response)

  if EMIT_GRAPHS:
    display.display(display.HTML("""
      For each metric with histograms and percentiles, we display two graphs:
      <ul>
        <li>The percentile graph shows the p75 values for the metric over time.
          The shaded areas indicate good (light green),
          needs improvement (light orange), and poor (light red).
        <li>The tribin graph shows the percentages of page loads with
           a good, needs improvement, or poor user experience over time.
        </ul>
      If the metric has labeled fractions, then we display these in a single
      stacked bar chart.
      In all cases, each point in time in the graph on the x axis
      represents a 28 day collection period ending in that date.
    """))

  for metric in metrics_in(response):
    if metric in thresholds:
      lo_threshold, hi_threshold = thresholds[metric]
      display_metric_stats(metric, dataframe_for(metric, response),
                           lo_threshold, hi_threshold)
    else:
      display_fraction_metric_stats(metric, dataframe_for(metric, response))

if EMIT_REQUEST_RESPONSE:
  display_header('CrUX History API Request')
  json_str = json.dumps(request)
  display.display(display.HTML(f'<pre>{json_str}</pre>'))

  display_header('CrUX History API Response')
  json_str = json.dumps(response, indent=2)
  display.display(display.HTML(f'<pre>{json_str}</pre>'))

  value_name='pct').replace(


  value_name='pct').replace(
