# <center>Identifying Factors that Influence Bond Amount and Pre-Trial Release</center>  
## <center>A statistical analysis of Harris County felony defendant records</center>

In [1]:
# import packages
import numpy as np
import pandas as pd

from scipy import stats
stats.chisqprob = lambda chisq, df: stats.chi2.sf(chisq, df)
import statsmodels.formula.api as smf

%matplotlib inline

In [2]:
# read felony records dataset
df = pd.read_csv('https://raw.github.com/natethedrummer/pretrial-release/master/felony_offenses.csv')

# include disposed cases only
df = df[df['CASE DISPOSED STATUS'] == 'DISPOSED']

# made bail
df.rename(columns={'access': 'Made Bail'}, inplace=True)

# bin offense
series_offense = pd.Series({'ARSON': 'ARSON',
                          'SALE DRUG': 'DRUG',
                          'POSS DRUG': 'DRUG',
                          'FEL DWI': 'DWI',
                          'KIDNAPPING': 'KIDNAPPING',
                          'CAP MURDER': 'MURDER',
                          'CAPITAL MURDER': 'MURDER',
                          'ASLT-MURDR': 'MURDER',
                          'MURD/MANSL': 'MURDER',
                          'MURDER': 'MURDER',
                          'ROBBERY': 'ROBBERY',
                          'THEFT': 'ROBBERY',
                          'BURGLARY': 'ROBBERY',
                          'burglary': 'ROBBERY',
                          'AUTO THEFT': 'ROBBERY',
                          'RAPE': 'SEX ABUSE',
                          'SEX ABUSE': 'SEX ABUSE',
                          'OTHER FEL': 'OTHER',
                          'OTHERMISD': 'OTHER'})

df['offense_bin'] = df['Offense'].map(series_offense)

# binary offense variables
offense_list = df['offense_bin'].unique().tolist()
for offense in offense_list:
    series = pd.Series({offense: 1})
    df[offense] = df['offense_bin'].map(series)
    df[offense].fillna(value=0, inplace=True)

# Felony Class Offense
df['FC'] = np.where(df['OffenseClass']=='FC', 1, 0)    
df['F1'] = np.where(df['OffenseClass']=='F1', 1, 0)    
df['F2'] = np.where(df['OffenseClass']=='F2', 1, 0)    
df['F3'] = np.where(df['OffenseClass']=='F3', 1, 0)    
df['FS'] = np.where(df['OffenseClass']=='FS', 1, 0)    

# priors
df.rename(columns={'Misd priors': 'Prior Misdemeanor Count'}, inplace=True)
df['Prior Misdemeanor'] = np.where(df['Prior Misdemeanor Count']>=1, 1, 0)
df.rename(columns={'felony priors': 'Prior Felony Count'}, inplace=True)
df['Prior Felony'] = np.where(df['Prior Felony Count']>=1, 1, 0)

# dwi
series = pd.Series({'DWI': 1})
df['DWI'] = df['offense_bin'].map(series)
df['DWI'].fillna(value=0, inplace=True)

# family offense
df['Offense Against Family'] = df['OffenseDescription'].str.contains('fam|chil|kid', case=False, na=False)
df['Offense Against Family'] = df['Offense Against Family'].astype(int)

# race
series = pd.Series({'BLACK': 1})
df['Black'] = df['race'].map(series)
df['Black'].fillna(value=0, inplace=True)
series = pd.Series({'HISPANIC': 1})
df['Hispanic'] = df['race'].map(series)
df['Hispanic'].fillna(value=0, inplace=True)
series = pd.Series({'WHITE': 1})
df['White'] = df['race'].map(series)
df['White'].fillna(value=0, inplace=True)

# sex
series = pd.Series({'F': 1})
df['Female'] = df['gender'].map(series)
df['Female'].fillna(value=0, inplace=True)
series = pd.Series({'M': 1})
df['Male'] = df['gender'].map(series)
df['Male'].fillna(value=0, inplace=True)

# bond amount
df[~(df['BOND $'] == 'NO BOND')]    
df['Bond Amount'] = (df[~(df['BOND $'] == 'NO BOND')])['BOND $'].astype(float)
df = df[np.isfinite(df['Bond Amount'])]
df = df[df['Bond Amount'] > 0]

# log of bond amount
df['Bond_Amount_ln'] = np.log(df['Bond Amount'])

# counsel type
df['Hired Attorney'] = np.where(df['counsel_type']=='Hired Attorney', 1, 0)    
df['Appointed Attorney'] = np.where(df['counsel_type']=='Appointed Attorney', 1, 0)    
df['Public Defender'] = np.where(df['counsel_type']=='Public Defender', 1, 0)    
df['Unknown Counsel'] = np.where(df['counsel_type']=='Other/Unknown', 1, 0)    

# bond type
df = df.rename(columns={'bail type made simple': 'Bail Type'})
df['PTR'] = np.where(df['Bail Type']=='PTR', 1, 0)    
df['Surety'] = np.where(df['Bail Type']=='SURETY', 1, 0)    
df['Cash'] = np.where(df['Bail Type']=='CASH', 1, 0)    
df['None'] = np.where(df['Bail Type']=='NONE', 1, 0)  

# age
df = df[df['age'] != "#VALUE!"]
df['Age'] = df['age'].astype(float)

# descriptive stats
df_desc = df[['Made Bail', 'Surety', 'Cash', 'PTR', 'None', 'Bond Amount',
              'Hired Attorney', 'Appointed Attorney', 'Public Defender', 'Unknown Counsel',
             'Black', 'Hispanic', 'White', 'Male', 'Female',
             'F1', 'F2', 'F3', 'FC', 'FS',
             'Prior Felony', 'Prior Felony Count', 'Prior Misdemeanor', 'Prior Misdemeanor Count']].describe()
df_desc = df_desc.ix[['count','mean']]
df_desc = df_desc.transpose()
df_desc.index.name = 'Variable'
df_desc.rename(columns={'count': 'Sample Size',
    'mean': 'Mean Value'
    }, inplace=True)
df_desc = df_desc.sort_index()
df_desc['Sample Size'] = df_desc['Sample Size'].astype(int).apply(lambda x: '{:,}'.format(x))
df_desc['Mean Value'] = (df_desc['Mean Value'] * 100).astype(int).astype(str) + '%'

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated


# Describe Defendants: Made Bail  
Only 40% of defendants made bail and were released before trial.

In [3]:
df_desc.loc[['Made Bail']]

Unnamed: 0_level_0,Sample Size,Mean Value
Variable,Unnamed: 1_level_1,Unnamed: 2_level_1
Made Bail,2490,42%


# Describe Defendants: Bond Type  
Nearly all defendants who made bail used a surety bond to get out of jail.

In [6]:
df_desc.loc[['PTR', 'Surety', 'Cash', 'None']]

Unnamed: 0_level_0,Sample Size,Mean Value
Variable,Unnamed: 1_level_1,Unnamed: 2_level_1
PTR,2490,1%
Surety,2490,40%
Cash,2490,0%
,2490,57%


# Describe Defendants: Legal Representation  
Only 35% of defendants hired a private attorney.

In [7]:
df_desc.loc[['Hired Attorney', 'Appointed Attorney', 'Public Defender', 'Unknown Counsel']]

Unnamed: 0_level_0,Sample Size,Mean Value
Variable,Unnamed: 1_level_1,Unnamed: 2_level_1
Hired Attorney,2490,34%
Appointed Attorney,2490,58%
Public Defender,2490,5%
Unknown Counsel,2490,2%


# Describe Defendants: Demographics  
The majority of defendants (72%) were people of color.  
48% of defendants were Black and 24% were Hispanic.

In [8]:
df_desc.loc[['Black', 'Hispanic', 'White']]

Unnamed: 0_level_0,Sample Size,Mean Value
Variable,Unnamed: 1_level_1,Unnamed: 2_level_1
Black,2490,48%
Hispanic,2490,24%
White,2490,25%


The majority of defendants (83%) were Male.

In [9]:
df_desc.loc[['Male', 'Female']]

Unnamed: 0_level_0,Sample Size,Mean Value
Variable,Unnamed: 1_level_1,Unnamed: 2_level_1
Male,2490,82%
Female,2490,17%


# Describe Defendants: Felony Class  
The most frequent (45%) felony charge was a State Jail Felony (FS).  
Less than 1% of defendants were charged with a Capital Felony (FC).

In [10]:
df_desc.loc[['F1', 'F2', 'F3', 'FC', 'FS']]

Unnamed: 0_level_0,Sample Size,Mean Value
Variable,Unnamed: 1_level_1,Unnamed: 2_level_1
F1,2490,9%
F2,2490,17%
F3,2490,22%
FC,2490,0%
FS,2490,49%


# Describe Defendants: Prior Charges  
53% of defendants had a prior felony charge and 65% had a prior misdemeanor charge.

In [11]:
df_desc.loc[['Prior Felony', 'Prior Misdemeanor']]

Unnamed: 0_level_0,Sample Size,Mean Value
Variable,Unnamed: 1_level_1,Unnamed: 2_level_1
Prior Felony,2490,52%
Prior Misdemeanor,2490,65%


On average, a defendant had previously been charged with:   
* 1.9 felonies  
* 2.3 misdemeanors

In [12]:
df_desc['Mean Value'] = df_desc['Mean Value'].apply(lambda x: float(x.strip('%'))/100)
df_desc.loc[['Prior Felony Count', 'Prior Misdemeanor Count']]

Unnamed: 0_level_0,Sample Size,Mean Value
Variable,Unnamed: 1_level_1,Unnamed: 2_level_1
Prior Felony Count,2490,1.89
Prior Misdemeanor Count,2490,2.3


# Describe Defendants: Bond Amount  
On average, a defendant's Bond Amount was set at $22,369.

In [13]:
df_desc['Mean Value'] = df_desc['Mean Value'].apply(lambda x: '${:,.0f}'.format(x))
df_desc.loc[['Bond Amount']]

Unnamed: 0_level_0,Sample Size,Mean Value
Variable,Unnamed: 1_level_1,Unnamed: 2_level_1
Bond Amount,2490,"$23,424"


# Factors that Influence Bond Amount  
A linear regression model was used to identify factors that have a statistically significant relationship with bond amount.  
The model uncovers a few key findings, including:  
* Defendants tend to receive a higher bond amount when charged with a more severe felony or having a history of prior felony charges.  
* Hispanic, Male, and older defendants often receive a higher bond amount regardless of the crime's severity or their criminal history.  

In [14]:
df_bond_model = df.rename(columns={'Offense Against Family': 'Family_Offense',
                                 'DWI': 'DWI_Offense',
                                 'Prior Felony': 'Prior_Felony',
                                 'Hired Attorney': 'Hired_Attorney',
                                 'Appointed Attorney': 'Appointed_Attorney',
                                 'Public Defender': 'Public_Defender',
                                 'Made Bail': 'Made_Bail'})

results = smf.ols('Bond_Amount_ln ~ FC + F1 + F2 + F3 + Prior_Felony + Black + Hispanic + Male + Age', data=df_bond_model).fit()

In [15]:
print("""
The null hypothesis is rejected in the F-test of overall significance with a p-value of {0} and an F-statistic of {1}. The adjusted R squared statistic is {2}.
""".format(round(results.f_pvalue, 3), round(results.fvalue, 3), round(results.rsquared_adj, 3)))


The null hypothesis is rejected in the F-test of overall significance with a p-value of 0.0 and an F-statistic of 173.187. The adjusted R squared statistic is 0.384.



The following factors have a statistically significant (p < 0.05) and positive relationship with bond amount.  Factors are listed in order of importance. 

In [16]:
df_coef = pd.concat([results.params, results.pvalues], axis=1)
df_coef.columns=['Coefficient', 'P-Value']
df_coef = df_coef.drop(['Intercept'])
df_coef['P-Value'] = round(df_coef['P-Value'], 3)
df_coef['Coefficient'] = round(df_coef['Coefficient'], 3)
df_coef = df_coef.loc[df_coef['P-Value'] < 0.05]
df_coef

Unnamed: 0,Coefficient,P-Value
F1,1.871,0.0
F2,1.181,0.0
F3,0.379,0.0
Prior_Felony,0.554,0.0
Hispanic,0.134,0.006
Male,0.213,0.0
Age,0.012,0.0


Listed below are the full results from the linear regression model.

In [17]:
results.summary2()

0,1,2,3
Model:,OLS,Adj. R-squared:,0.384
Dependent Variable:,Bond_Amount_ln,AIC:,6259.8143
Date:,2018-04-21 22:04,BIC:,6318.0147
No. Observations:,2490,Log-Likelihood:,-3119.9
Df Model:,9,F-statistic:,173.2
Df Residuals:,2480,Prob (F-statistic):,5.03e-255
R-squared:,0.386,Scale:,0.72043

0,1,2,3,4,5,6
,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
Intercept,8.0400,0.0726,110.8117,0.0000,7.8977,8.1823
FC,0.1716,0.8496,0.2020,0.8400,-1.4944,1.8376
F1,1.8713,0.0611,30.6065,0.0000,1.7514,1.9912
F2,1.1810,0.0481,24.5758,0.0000,1.0868,1.2753
F3,0.3794,0.0439,8.6517,0.0000,0.2934,0.4655
Prior_Felony,0.5545,0.0370,14.9822,0.0000,0.4819,0.6270
Black,-0.0671,0.0419,-1.6012,0.1095,-0.1493,0.0151
Hispanic,0.1336,0.0482,2.7740,0.0056,0.0392,0.2281
Male,0.2132,0.0457,4.6665,0.0000,0.1236,0.3028

0,1,2,3
Omnibus:,244.913,Durbin-Watson:,1.982
Prob(Omnibus):,0.0,Jarque-Bera (JB):,422.908
Skew:,0.683,Prob(JB):,0.0
Kurtosis:,4.488,Condition No.:,1744.0


# Factors that Influence Pre-Trial Release  
A logistic regression model was used to identify factors that have a statistically significant relationship with making bail and getting released before trial.  
The model uncovers a few key findings, including:  
* Defendants are less likely to make bail when their bond amount is higher.  
* 
* Defendants tend to receive a higher bond amount when charged with a more severe felony or having a history of prior felony charges.  
* Hispanic, Male, and older defendants often receive a higher bond amount regardless of the crime's severity or their criminal history.  

In [18]:
df_ptr_model = df.rename(columns={'Offense Against Family': 'Family_Offense',
                                 'DWI': 'DWI_Offense',
                                 'Prior Felony': 'Prior_Felony',
                                 'Hired Attorney': 'Hired_Attorney',
                                 'Appointed Attorney': 'Appointed_Attorney',
                                 'Public Defender': 'Public_Defender',
                                 'Made Bail': 'Made_Bail'})

results = smf.logit('Made_Bail ~ Bond_Amount_ln + F1 + F2 + F3 + FS + Prior_Felony + Black + Hispanic + Male + Age + Hired_Attorney + Appointed_Attorney + Public_Defender', data=df_ptr_model).fit(full_output=False, disp=False)

In [19]:
print("""
The null hypothesis is rejected in the Log Likelihood Ratio Test of overall significance with a p-value of {0} and 
a Log Likelihood Ratio of {1}.  The pseudo R squared statistic is {2}.
""".format(round(results.llr_pvalue, 3), round(results.llr, 3), round(results.prsquared, 3)))


The null hypothesis is rejected in the Log Likelihood Ratio Test of overall significance with a p-value of 0.0 and 
a Log Likelihood Ratio of 1202.202.  The pseudo R squared statistic is 0.354.



The following factors have a statistically significant (p < 0.05) relationship with the likelihood of making bail.  
Factors are listed from positive to negative correlation with making bail.

In [20]:
df_coef = pd.concat([results.params, results.pvalues], axis=1)
df_coef.columns=['Coefficient', 'P-Value']
df_coef = df_coef.drop(['Intercept'])
df_coef['P-Value'] = round(df_coef['P-Value'], 3)
df_coef['Coefficient'] = round(df_coef['Coefficient'], 3)
df_coef = df_coef.loc[df_coef['P-Value'] < 0.05]
df_coef

Unnamed: 0,Coefficient,P-Value
Bond_Amount_ln,-0.785,0.0
Prior_Felony,-0.268,0.026
Hispanic,-0.535,0.001
Hired_Attorney,3.596,0.0
Public_Defender,0.919,0.015


Listed below is a summary of results from the logistic regression model.

In [21]:
results.summary2()

0,1,2,3
Model:,Logit,AIC:,2224.2682
Dependent Variable:,Made_Bail,BIC:,2305.7488
Date:,2018-04-21 22:05,Log-Likelihood:,-1098.1
No. Observations:,2490,LL-Null:,-1699.2
Df Model:,13,LLR p-value:,5.9437e-249
Df Residuals:,2476,Scale:,1.0
Pseudo R-squared:,0.354,,

0,1,2,3,4,5,6
,Coef.,Std.Err.,z,P>|z|,[0.025,0.975]
Intercept,6.4821,0.9043,7.1683,0.0000,4.7098,8.2545
Bond_Amount_ln,-0.7854,0.0683,-11.4979,0.0000,-0.9193,-0.6515
F1,0.4540,0.6326,0.7176,0.4730,-0.7860,1.6940
F2,0.1388,0.6171,0.2249,0.8221,-1.0708,1.3483
F3,-0.0095,0.6132,-0.0155,0.9876,-1.2114,1.1924
FS,-1.0001,0.6101,-1.6391,0.1012,-2.1960,0.1958
Prior_Felony,-0.2682,0.1201,-2.2325,0.0256,-0.5037,-0.0327
Black,0.1947,0.1325,1.4696,0.1417,-0.0650,0.4543
Hispanic,-0.5355,0.1576,-3.3986,0.0007,-0.8443,-0.2267
