In [18]:
import os
import ipywidgets as widgets
from ipywidgets import *
from bqplot import *
import numpy as np
import pandas as pd
import qgrid
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib notebook
import warnings
warnings.filterwarnings("ignore")

# 1. Plots a bar graph representing the number of docstring errors.

To generate the file in `json` format, the following command needs to be executed in `master` branch clone of pandas,
after completing the development environment setup.

`./scripts/validate_docstrings.py --format=json > /path/to/json/pandas_docstring_errors.json`

This script currently supports pandas version >= 0.25.0

In [19]:
path = '../json/pandas_docstring_errors.json.zip'
df = (pd.read_json(path, compression='zip')
            .transpose()
            .filter(items=['errors'])
            .explode('errors')
            .dropna()
            .reset_index()
     )
df.head(2)

Unnamed: 0,index,errors
0,pandas.Categorical,"[PR01, Parameters {fastpath} not documented]"
1,pandas.Categorical,"[PR09, Parameter ""dtype"" description should fi..."


## 1) Split a list 'error' into separate data columns 'error_code' and 'error_name'

In [20]:
df[['error_code','error_name']] = pd.DataFrame(df.errors.tolist(), index=df.index)
df = df.drop(["errors","index"], axis=1)
df.head(2)

Unnamed: 0,error_code,error_name
0,PR01,Parameters {fastpath} not documented
1,PR09,"Parameter ""dtype"" description should finish wi..."


## 2) Make a table to count the number of error_codes

In [21]:
df_code = df['error_code'].value_counts().reset_index()
df_code.columns = ['error_code','counts']
df_code.head(2)

Unnamed: 0,error_code,counts
0,GL08,517
1,PR09,459


# 2. Interactive controls 

In [22]:
plot_output=widgets.Output()
count_output= widgets.Output()
error_output=widgets.Output()

In [23]:
ALL = 'ALL'
def unique_sorted_values_plus_ALL(array):
    unique = array.unique().tolist()
    unique.sort()
    unique.insert(0, ALL)
    return unique

# 1.1) define a widget
dropdown_code = widgets.Dropdown(options = unique_sorted_values_plus_ALL(df_code.error_code))

# 1.2) Define a qgrid widget

col_opts = { 'editable': False}
qgrid.set_grid_option('maxVisibleRows', 10)
qgrid_widget = qgrid.show_grid(df, 
                               column_options=col_opts,
                               show_toolbar=False)
qgrid_widget.layout = widgets.Layout(width='800px')


In [24]:
# 2) use widget output to update tables/plots
def data_filtering(code):
    count_output.clear_output()
    plot_output.clear_output()
    error_output.clear_output()
    # 1.1 if no filtering
    if (code ==ALL):
        count_filter = df_code
        error_filter = df
    # 1.2 filter by code
    else:
        count_filter = df_code[df_code.error_code ==code]
        error_filter = df[df.error_code ==code]
    # 2.1 plot_output
    with plot_output:
        sns.set(style='whitegrid')
        ax=sns.barplot(x='error_code', y='counts', data=count_filter)
        plt.xticks(rotation=45)
        plt.xlabel('')
        plt.ylabel('Counts')
        plt.show()
    # 2.2 capture table output
    with count_output:
        display(count_filter)
   # 2.3 error_output
    with error_output:
        display(qgrid.show_grid(error_filter, column_options=col_opts,show_toolbar=False))
        #qgrid_widget.observe(on_row_selected, names=['_selected_rows'])

In [25]:
# 3) capture widget output
def dropdown_code_eventhandler(change):
    data_filtering(change.new)

def qgrid_widget_eventhandler(change):
    data_filtering(change.new)    
    
dropdown_code.observe(dropdown_code_eventhandler, names='value')

qgrid_widget.observe(qgrid_widget_eventhandler, names='value')

In [26]:
# 4) Add widget in dashboard layout
input_widgets = widgets.HBox([dropdown_code])

# 5) Create a container for the output
tab = widgets.Tab([ plot_output,count_output, error_output])
tab.set_title(0, 'Bar Plot')
tab.set_title(1, 'Error code Count')
tab.set_title(2, 'Error details')

In [28]:
# 6) Stack a dashboard
dashboard = widgets.VBox([input_widgets, tab])
display(dashboard)

# Select an error code from the dropdown then check the three tabs

VBox(children=(HBox(children=(Dropdown(options=('ALL', 'EX02', 'EX03', 'GL01', 'GL02', 'GL08', 'PR01', 'PR02',…

## 3. Deploy the notebook on Binder

e.g. check this notebook deployed on Binder from my personal repo 

https://mybinder.org/v2/gh/dujm/test-pddocs-demo/master?filepath=docstring_error_interactive.ipynb

## 4. Turn the notebook into an app using voila
#### In your terminal, run
 * Create a conda environment  
`
conda env create
conda activate pandas-docs`

 * Render a notebook as an interactive notebook using voila|
   * 1) Default: render a notebook as a standalone application without source code  
`voila voila/notebooks/basics.ipynb`  

   * 2) If you want to show the source code  
`voila voila/notebooks/basics.ipynb --strip_sources=False
`
 * Open http://localhost:8866/