In [1]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import bokeh
from bokeh.plotting import figure, show
from bokeh.layouts import gridplot
from ipywidgets import interact, interact_manual

In [2]:
from bokeh.io import output_notebook
output_notebook()

In [3]:
path = r'/Users/polinarozhkova/Desktop/GitHub/cr_eda_chicago/'
final_merge_df = pd.read_csv(os.path.join(path, 'clean_data/merge_all.csv'))
cr_reports = pd.read_excel(os.path.join(path, 'inputs/CR_from_CPD_Annual_Reports_copy.xlsx'))

In [4]:
all_cleared_df = final_merge_df[final_merge_df['cleared'] == 'Y']

In [5]:
all_cleared_df = all_cleared_df[all_cleared_df['time_to_clear'] > 0]

In [6]:
all_cleared_df['time_to_clear'].describe()

count    5300.000000
mean      422.502830
std       935.828131
min         1.000000
25%         5.000000
50%        60.000000
75%       323.250000
max      7476.000000
Name: time_to_clear, dtype: float64

In [7]:
# Bins will be five days in width, so the number of bins is (length of interval / 5).
arr_hist, edges = np.histogram(all_cleared_df['time_to_clear'], 
                               bins = int(400/5)) 
                               #,range = [-60, 120])
# Put the information in a dataframe
days_to_clear = pd.DataFrame({'days': arr_hist, 
                       'left': edges[:-1], 
                       'right': edges[1:]})

In [8]:
days_to_clear

Unnamed: 0,days,left,right
0,2961,1.0000,94.4375
1,557,94.4375,187.8750
2,351,187.8750,281.3125
3,211,281.3125,374.7500
4,152,374.7500,468.1875
...,...,...,...
75,4,7008.8125,7102.2500
76,0,7102.2500,7195.6875
77,0,7195.6875,7289.1250
78,1,7289.1250,7382.5625


Interactive Plot showing distribution of time to clearance for the cases cleared in selected year

In [15]:
def plot_time_to_clear(df, year):
    plot = figure(x_axis_label='Time to Clearance in Days', y_axis_label='Count', plot_height=500)
    data = df.loc[year]
    # Bins will be five days in width, so the number of bins is (length of interval / 5).
    arr_hist, edges = np.histogram(data['time_to_clear'], 
                               bins = int(400/5)) 
                               #,range = [-60, 120])
    # Put the information in a dataframe
    days_to_clear = pd.DataFrame({'days': arr_hist, 'left': edges[:-1],
                                  'right': edges[1:]})
    plot.quad(bottom=0, top=data['time_to_clear'], left=days_to_clear['left'],
               right=days_to_clear['right'],fill_color='red')
    return plot

In [17]:
years = list(all_cleared_df['year'].unique())

In [18]:
@interact(year=years)
def make_plot_for(year=years[0]):
    plot = plot_time_to_clear(all_cleared_df, year)
    show(plot)

interactive(children=(Dropdown(description='year', options=(2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 20…