### Plot provider cost predictions and residuals

In [1]:
import pandas as pd
import zipfile
from bokeh.charts import Bar, Scatter, output_notebook, show, vplot, hplot, output_file
from bokeh.charts.attributes import CatAttr
from bokeh.models import NumeralTickFormatter, HoverTool, Range1d, Span, LabelSet, ColumnDataSource
from bokeh.models.widgets import Panel, Tabs
from bokeh.models.glyphs import Text
from bokeh.plotting import figure
from bokeh.palettes import PuOr5
import statsmodels.api as sm
lowess = sm.nonparametric.lowess
from sklearn.linear_model import LinearRegression



### Read the zipped file

In [7]:
zf = zipfile.ZipFile('../data/offenders_2013_caplg_ucr_clean2.csv.zip')
df = pd.read_csv(zf.open('offenders_2013_caplg_ucr_clean2.csv'))

  interactivity=interactivity, compiler=compiler, result=result)


### Which offenders are arrested, compared to their police agencies' racial demographics?

In [None]:
tabs_list = []
for offense in offenses:
    offense_title = offense.replace('_', ' ').title()
    subset = df[df['offense_' + offense] == 1]
    subset_black = subset[subset['black_not_white'] == 1]
    subset_white = subset[subset['black_not_white'] == 0]

    predictions_white = pd.DataFrame(lowess(subset_white['arrested'], subset_white['w_officers_percent']))
    predictions_black = pd.DataFrame(lowess(subset_black['arrested'], subset_black['w_officers_percent']))
    
    p = figure(tools=TOOLS, title="Probability Arrested by Race and Percent Officers White")
    p.line(x = predictions_white[0], y = predictions_white[1], color = white_color, legend="White")
    p.line(x = predictions_black[0], y = predictions_black[1], color = black_color, legend="Black")
    p.xaxis.axis_label = "Percent Officers White"
    p.yaxis.axis_label = "Probability Arrested (Local Regression)"
    p.y_range = Range1d(-0.1, 1.1)
    tab = Panel(child=p, title=offense_title)
    tabs_list.append(tab)

tabs_object = Tabs(tabs=tabs_list)
show(tabs_object)

### Which offenders are arrested, compared to their police agencies' racial demographics?

#### Use datashader, which can handle many points

In [None]:
import datashader as ds
from datashader.bokeh_ext import InteractiveImage
from functools import partial
from datashader.utils import export_image
from datashader.colors import colormap_select, Greys9, Hot, viridis, inferno
from IPython.core.display import HTML, display
from datashader import transfer_functions as tf
from datashader.utils import export_image
from bokeh.plotting import output_notebook, figure
import statsmodels.api as sm
lowess = sm.nonparametric.lowess
from sklearn.linear_model import LinearRegression

In [None]:
offense = 'shoplifting'
output_notebook()

# Make height small b/c do'nt need empty space in middle
plot_width  = int(750)
plot_height = int(plot_width // 2.2)

background = "white"
export = partial(export_image, export_path="output", background=background)
cm = partial(colormap_select, reverse=(background=="white"))
TOOLS="pan,wheel_zoom,box_zoom,reset,previewsave"

x_min = df['w_officers_percent'].min()
x_max = df['w_officers_percent'].max()
x_range = (x_min, x_max)
y_range = (-0.1, 1.1)
area = x_range, y_range

def base_plot(prediction_white, predictions_black,
              plot_title, x_label, y_label, tools=TOOLS, plot_width=plot_width, plot_height=plot_height, **plot_args):
    p = figure(title = plot_title, tools=tools, plot_width=plot_width, plot_height=plot_height,
        x_range=x_range, y_range=y_range, outline_line_color=None,
        min_border=0, min_border_left=0, min_border_right=0,
        min_border_top=0, min_border_bottom=0, **plot_args)
    p.xaxis.axis_label = x_label
    p.yaxis.axis_label = y_label
    p.line(x = predictions_white[0], y = predictions_white[1], color = "green", legend="White")
    p.line(x = predictions_black[0], y = predictions_black[1], color = "red", legend="Black")
    p.legend[0].location = "top_left"
    
    return p

def create_image(x_range, y_range, w=plot_width, h=plot_height):
    cvs = ds.Canvas(plot_width=w, plot_height=h, x_range=x_range, y_range=y_range)
    agg = cvs.points(subset, 'w_officers_percent', 'arrested', ds.mean('black_not_white'))
    img = tf.shade(agg, cmap=["green", "yellow", "red"], how='eq_hist')
    return tf.dynspread(img, threshold=0.5, max_px=4)

subset = df[df['offense_' + offense] == 1]
subset_black = subset[subset['black_not_white'] == 1]
subset_white = subset[subset['black_not_white'] == 0]

predictions_white = pd.DataFrame(lowess(subset_white['arrested'], subset_white['w_officers_percent']))
predictions_black = pd.DataFrame(lowess(subset_black['arrested'], subset_black['w_officers_percent']))

p = base_plot(predictions_white, predictions_black,
              plot_title = "Offenders by Race and Agency Racial Demographics",
              x_label = "Percent Officers White", y_label = "Arrested", background_fill_color=background)
export(create_image(*area), offense + "_raw")
InteractiveImage(p, create_image)