In [1]:
# import usual things
import pandas as pd
import bqplot
import numpy as np
import ipywidgets
import matplotlib.pyplot as plt

In [2]:
data_url = 'https://raw.githubusercontent.com/UIUC-iSchool-DataViz/is445_bcubcg_fall2022/main/data/licenses_fall2022.csv'

In [3]:
licenses = pd.read_csv(data_url, parse_dates = ['Original Issue Date', 'Effective Date', 'Expiration Date', 'LastModifiedDate', 'Discipline Start Date', 'Discipline End Date'])
licenses.columns

Index(['_id', 'License Type', 'Description', 'License Number',
       'License Status', 'Business', 'Title', 'First Name', 'Middle',
       'Last Name', 'Prefix', 'Suffix', 'Business Name', 'BusinessDBA',
       'Original Issue Date', 'Effective Date', 'Expiration Date', 'City',
       'State', 'Zip', 'County', 'Specialty/Qualifier',
       'Controlled Substance Schedule',
       'Delegated Controlled Substance Schedule', 'Ever Disciplined',
       'LastModifiedDate', 'Case Number', 'Action', 'Discipline Start Date',
       'Discipline End Date', 'Discipline Reason'],
      dtype='object')

In [4]:
licenses['Expiration Date'] = pd.to_datetime(licenses['Expiration Date'],errors='coerce',format='%m/%d/%Y')

In [5]:
licenses['Expiration-Effective'] = licenses['Expiration Date']-licenses['Effective Date']

In [6]:
licenses['day']=licenses['Expiration-Effective'].map(lambda x: x/np.timedelta64(1,'D'))

In [7]:
licenses['Original Issue Date'] = pd.to_datetime(licenses['Original Issue Date'],errors='coerce',format='%m/%d/%Y')
licenses['Original_Issue_Year'] = licenses['Original Issue Date'].dt.year

In [8]:
def generate_pivot_table_from_type_status(licenses, takeLog = True):
    pitable = pd.pivot_table(licenses, values='day', index=['License Status'], columns=['License Type'], 
                      aggfunc = np.mean, fill_value=0)
    license_status = pitable.index.values.astype('str')
    license_type = pitable.columns.values.astype('str')
    if takeLog:
        pitable[pitable <= 0] = np.nan # set zeros to NaNs
        # then take log
        pitable = np.log10(pitable)
    return pitable, license_status, license_type

In [9]:
pitable, license_status, license_type = generate_pivot_table_from_type_status(licenses)

In [10]:
region_mask = ((licenses['License Type'] == license_type[2]) & (licenses['License Status'] == license_status[0]))

In [11]:
year_group = licenses[region_mask].groupby("Original_Issue_Year")["day"].median()

In [12]:
year_group

Original_Issue_Year
1993.0    749.0
2005.0    794.0
2012.0    687.0
2017.0    730.0
2018.0    649.0
Name: day, dtype: float64

In [13]:
years = year_group.index
values = year_group.values

In [23]:
dur, dur_edges = np.histogram(years, weights=values, bins=5)

In [24]:
dur, dur_edges

(array([ 749.,    0.,  794.,  687., 1379.]),
 array([1993., 1998., 2003., 2008., 2013., 2018.]))

In [16]:
dur_centers = (dur_edges[:-1]+dur_edges[1:])/2 # fancy way to get bin centers for our histogram

In [17]:
x2_scl = bqplot.LinearScale()
y2_scl = bqplot.LinearScale()
x2_axs = bqplot.Axis(label='Year', scale=x2_scl)
y2_axs = bqplot.Axis(label='median number of days', scale=y2_scl, orientation='vertical', side='left')

In [18]:
region_mask = ((licenses['License Type'] == license_type[2]) & (licenses['License Status'] == license_status[0]))
year_group = licenses[region_mask].groupby("Original_Issue_Year")["day"].median()

In [19]:
hist_dur = bqplot.Bars(x=dur_centers, y=dur, scales={'x':x2_scl, 'y':y2_scl})

In [20]:
fig_right = bqplot.Figure(marks=[hist_dur], axes=[x2_axs, y2_axs])
fig_right

Figure(axes=[Axis(label='Year', scale=LinearScale()), Axis(label='median number of days', orientation='vertica…

In [21]:
year_group

Original_Issue_Year
1993.0    749.0
2005.0    794.0
2012.0    687.0
2017.0    730.0
2018.0    649.0
Name: day, dtype: float64

In [22]:
myLabel = ipywidgets.Label()
def on_selected(change):
    if len(change['owner'].selected) == 1: # only selecting one bin at a time
        i,j = change['owner'].selected[0]
        v = pitable.iloc[i,j]
        myLabel.value = 'mean day in log10 '+str(v)
        if str(v) == 'nan':     # if there is no data after mask, just shows the original bar plot
            year_group = licenses.groupby("Original_Issue_Year")["day"].median()
        else:
            region_mask = ((licenses['License Type'] == license_type[j]) & (licenses['License Status'] == license_status[i]))
            year_group = licenses[region_mask].groupby("Original_Issue_Year")["day"].median()
        years = year_group.index
        values = year_group.values
        hist_median.x = years
        hist_median.y = values


col_sc = bqplot.ColorScale(scheme="Blues", min=np.nanmin(pitable), max=np.nanmax(pitable))

x_sc = bqplot.OrdinalScale() # for categorical data
y_sc = bqplot.OrdinalScale()

c_ax = bqplot.ColorAxis(scale=col_sc, orientation='vertical', side='right')

x_ax = bqplot.Axis(scale=x_sc, label='License Type')
y_ax = bqplot.Axis(scale=y_sc, label='License Status', orientation='vertical')


heat_map = bqplot.GridHeatMap(color=pitable, 
                              row = license_status,
                              column=license_type,
                              scales={'color':col_sc, 'row':y_sc, 'column':x_sc},
                             interactions = {'click':'select'},
                             selected_style={'fill':'red'})
heat_map.observe(on_selected,'selected')

fig_left = bqplot.Figure(marks=[heat_map], axes=[c_ax, y_ax, x_ax])

fig_left.layout.min_width='500px'
fig_right.layout.min_width='500px'
figures = ipywidgets.HBox([fig_left,fig_right])
myDashboard = ipywidgets.VBox([myLabel,figures])
myDashboard

VBox(children=(Label(value=''), HBox(children=(Figure(axes=[ColorAxis(orientation='vertical', scale=ColorScale…

NameError: name 'hist_median' is not defined

NameError: name 'hist_median' is not defined

NameError: name 'hist_median' is not defined