# import packages

In [1]:
%matplotlib inline 
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns
from scipy.stats import ttest_ind

# import bond data provided by Harris County

In [2]:
path = './fworrrevisedwheeler'

file_list = ['/DATA - Wheeler (2017 approvals - felony).xlsx',
'/DATA - Wheeler (2018 approvals - felony).xlsx']
df_list = []
for file in file_list: 
    for i in range(5):
        df_iter = pd.read_excel(path+file, sheet_name=i)
        df_list.append(df_iter)
    approvals = pd.concat(df_list)
    approvals['CaseNumber'] = approvals['CaseNumber'].astype(str)

file_list = ['/DATA - Wheeler (2017 forfeitures - felony).xlsx',
'/DATA - Wheeler (2018 forfeitures - felony).xlsx']
df_list = []
for file in file_list: 
    for i in range(5):
        df_iter = pd.read_excel(path+file, sheet_name=i)
        df_list.append(df_iter)
        forfeitures = pd.concat(df_list)
    forfeitures['CaseNumber'] = forfeitures['CaseNumber'].str[:-1]
    forfeitures['CaseNumber'] = forfeitures['CaseNumber'] + '0'
    forfeitures['ForfDate'] = forfeitures['ForfDate'].astype(int)
    forfeitures['ForfDate'] = pd.to_datetime(forfeitures['ForfDate'], format='%Y%m%d')


# determine who forfeitted

In [3]:
df = approvals.merge(forfeitures, on='CaseNumber', how='left', suffixes=('_app', '_for'))

df = df.dropna(subset=['BondAmtMade'])

df['forfeit'] = np.where(df['ForfDate'] > df['DateApproved'], 1, 0)

# bin CodeDesc into Code groups

In [4]:
conditions = [
    df['CodeDesc'].isin(['SURETY','APPEAL']),
    df['CodeDesc'].isin(['CASH', 'CASH APPEAL']),
    df['CodeDesc']=='PERSONAL',
    df['CodeDesc']=='PERSONAL PTR'
]
choices = ['Surety', 'Cash', 'Personal', 'PTR']
df['Code'] = np.select(conditions, choices, default=df['CodeDesc'])

In [5]:
df['Code'].value_counts()

Surety                         31406
PTR                             6044
EARLY PRESENTMENT - PTRL         809
Cash                             478
Personal                          24
SB7 - PTRL                        23
UNSECURED BAIL BOND-SHERIFF        3
Name: Code, dtype: int64

In [6]:
df = df.loc[df['Code'].isin(['Surety', 'Cash', 'Personal', 'PTR'])]

# 2017 only

In [12]:
df = df.loc[df['DateApproved'].dt.year==2017]

# calculate bond amount summary statistics by Code and forfeit status

In [15]:
df.groupby(['Code','forfeit'])['BondAmtMade'].describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean,std,min,25%,50%,75%,max
Code,forfeit,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Cash,0,231.0,6247.402597,11042.326362,150.0,1500.0,2000.0,5000.0,70000.0
Cash,1,27.0,3277.777778,5613.057122,1000.0,1750.0,2000.0,2000.0,30000.0
PTR,0,2998.0,5139.926618,8230.578717,500.0,1500.0,2000.0,5000.0,150000.0
PTR,1,455.0,5576.923077,17948.857086,500.0,1500.0,2000.0,5000.0,300000.0
Personal,0,6.0,32333.333333,29104.409746,2000.0,9000.0,35000.0,40000.0,80000.0
Personal,1,2.0,5000.0,0.0,5000.0,5000.0,5000.0,5000.0,5000.0
Surety,0,16677.0,17533.624153,23672.09463,500.0,5000.0,10000.0,25000.0,500000.0
Surety,1,2717.0,15523.187339,19260.787189,500.0,5000.0,10000.0,20000.0,250000.0


In [80]:
df_mean = df.groupby(['Code', 'forfeit']).agg({'BondAmtMade': 'median'}).reset_index()

In [82]:
df_mean = df_mean.pivot_table(index='Code', columns='forfeit', values='BondAmtMade')

In [83]:
df_mean.columns=['No Forfeit', 'Forfeit']

# test whether bond amounts are significantly different

In [53]:
df_list = []
for i in df['Code'].unique().tolist():
    bond_forf = df.loc[(df['Code']==i) & (df['forfeit']==1)]['BondAmtMade']
    bond_no_forf = df.loc[(df['Code']==i) & (df['forfeit']==0)]['BondAmtMade']
    t, p = ttest_ind(bond_forf, bond_no_forf, equal_var=False)
    tr = pd.DataFrame(data=[i], columns=['code'])
    tr['t-stat']=round(t,2)
    tr['p-value']=round(p,2)
    df_list.append(tr)
tr = pd.concat(df_list)
tr = tr.set_index('code')
tr

Unnamed: 0_level_0,t-stat,p-value
code,Unnamed: 1_level_1,Unnamed: 2_level_1
Surety,-4.87,0.0
Cash,-2.28,0.03
Personal,-2.3,0.07
PTR,0.51,0.61


In [84]:
df_out = df_mean.merge(tr, left_index=True, right_index=True)

In [85]:
df_out = df_out.merge(df.groupby('Code').agg({'CaseNumber': 'count'}), left_index=True, right_index=True)

df_out = df_out.rename(columns={'CaseNumber': 'N Cases'})

In [90]:
df_out = df_out.merge(df.groupby('Code').agg({'forfeit': 'sum'}), left_index=True, right_index=True)

In [91]:
df_out = df_out.rename(columns={'forfeit': 'N Forfeitures'})

In [92]:
df_out

Unnamed: 0,No Forfeit,Forfeit,t-stat,p-value,N Cases,N Forfeitures
Cash,2000.0,2000.0,-2.28,0.03,258,27
PTR,2000.0,2000.0,0.51,0.61,3453,455
Personal,35000.0,5000.0,-2.3,0.07,8,2
Surety,10000.0,10000.0,-4.87,0.0,19394,2717


In [88]:
df_out.to_excel('median_bond_forfeit.xlsx')