In [1]:
# Parameterization
#
# Often times notebooks will need to execute by injecting variables and values from externally to the notebook. This might be service credentials, specific
# endpoints, variations for the notebook (e.g. if a notebook is executing a hyperparameter grid search, they may be
# executed many times in parallel but only with external variables changing). Today, there are two primary ways that notebooks are parameterized - via new tool
# (like Papermill) or with code generation directly into the notebook (e.g. regex replacement against fields
# below). 
#
# Today: Values are just hard coded in, and require manual changes

epochs = 200
data_source = "http://data.contoso.com/blob/important_data.csv"
postgresql_credentials = ""

In [2]:
# Lack Environment Description
# 
# Import behavior is always bad, but most commonly, people will import several packages at the top of a file. They are unlikely to include specific versions
# and may use structures which are hard to introspect (e.g. 'from foo import bar as qaz'). However, the libraries may be imported outside the notebook itself, via 
# inline bash commands or via a command line (Jupyter notebooks execute inside the command line environment - so if packages were imported there, the notebook will
# run normally). This will often lead to mismatched environments when the package is deployed to another environment or containerized. Because the time in
# deployment of complex pipelines is so long, this could be 10 minutes or more before noticing that something is wrong.

# Requires the packages are already installed (lack version)
import numpy
import matplotlib


# Inline bash (bad) installs a package which may be different than the running kernel
!pip install tensorflow

# 
# Includes local bash
# No exception handling between cells
# Cannot be executed headlessly
# No caching for common functions
# No parallel execution
# Missed opportunity to run step statelessly
# High Mem
# High GPU
# Out of order execution (to prevent blocking)
# No retry automation for external service
# No parameter-ization


Collecting tensorflow
  Downloading tensorflow-2.5.0-cp38-cp38-manylinux2010_x86_64.whl (454.4 MB)
[K     |████████████████████████████████| 454.4 MB 50 kB/s 
[?25hCollecting keras-nightly~=2.5.0.dev
  Downloading keras_nightly-2.5.0.dev2021032900-py2.py3-none-any.whl (1.2 MB)
[K     |████████████████████████████████| 1.2 MB 67.2 MB/s 
[?25hCollecting gast==0.4.0
  Downloading gast-0.4.0-py3-none-any.whl (9.8 kB)
Collecting astunparse~=1.6.3
  Downloading astunparse-1.6.3-py2.py3-none-any.whl (12 kB)
Collecting h5py~=3.1.0
  Downloading h5py-3.1.0-cp38-cp38-manylinux1_x86_64.whl (4.4 MB)
[K     |████████████████████████████████| 4.4 MB 72.0 MB/s 
[?25hCollecting termcolor~=1.1.0
  Downloading termcolor-1.1.0.tar.gz (3.9 kB)
Collecting google-pasta~=0.2
  Downloading google_pasta-0.2.0-py3-none-any.whl (57 kB)
[K     |████████████████████████████████| 57 kB 2.6 MB/s 
[?25hCollecting typing-extensions~=3.7.4
  Downloading typing_extensions-3.7.4.3-py3-none-any.whl (22 kB)
Collect