Part 2 of the Civil Asset Forfeiture analysis. This script takes in the manually cleaned sheet and generates final summary tables. 

Last edited by Sophie Bair 10/26/22

In [None]:
import pandas as pd 
import numpy as np
import pandoc

# 1. import 

In [None]:
caf = pd.read_excel('caf_pt2_input.xlsx')
caf.head()

# 2. Create crosstabs of interest 

2a. demographic breakdown + how demographic vars correlate with representation, understanding of case, and outcome 

In [None]:
race_gender = pd.crosstab(index=caf['litigant_race'], columns=caf['litigant_gender'], margins=True)
race_gender

In [None]:
# do race/gender correlate with representation in any way? 

pd.crosstab(index=caf['litigant_race'], columns=caf['atty_present'])

In [None]:
only_yes_no = caf[caf['atty_present'] != 'Not sure']
atty_race_count = pd.crosstab(index=only_yes_no['litigant_race'], columns=only_yes_no['atty_present'], margins=True)
atty_race_pct = pd.crosstab(index=only_yes_no['litigant_race'], columns=only_yes_no['atty_present'], normalize='index', margins=True)
atty_race_pct # removing observations that were unsure, black defendents seem to have higher rates of pro se representation at first glance  

In [None]:
pd.crosstab(index=only_yes_no['litigant_gender'], columns=only_yes_no['atty_present'], normalize='index', margins=True)
# seems pretty consistent across males and females 

In [None]:
# does race/gender correlate with outcomes? 

caf_settled_subset = caf[caf['case_settled_or_resolved_explanation_transcribed'].isin(['dismissed', 'rule in favor of the claimant', 'rule in favor of the state', 'settlement', 'state declined to pursue'])]
pd.crosstab(columns=caf_settled_subset['case_settled_or_resolved_explanation_transcribed'], index=caf['litigant_race'])
# honestly, i think n sizes are going to be too small to say anything here 

In [None]:
pd.crosstab(index=caf['litigant_race'], columns=caf['litigant_understanding'], margins=True)

In [None]:
pd.crosstab(index=caf['litigant_race'], columns=caf['litigant_understanding'],  normalize='index', margins=True)
# white defendents had highest rates of understanding 

In [None]:
pd.crosstab(index=caf['litigant_gender'], columns=caf['litigant_understanding'],  normalize='index', margins=True)
# gender is pretty even 

2b. do different judges have different rates of litigant outcomes/understanding?

In [None]:
# kind of tangential, but curious if court-watchers with lots of observations are just observing one judge 
# (and thus could be skewing data a bit) or if it's evenly distributed (would make for better comparison)
pd.crosstab(columns=caf['name_cleaned'], index=caf['judge_name_cleaned'], normalize='index')

In [None]:
pd.crosstab(columns=caf['name_cleaned'], index=caf['judge_name_cleaned'])

In [None]:
# some heavy hitters (such as Alessandra) do overwhelmingly observe one judge - could be skewing results for judge carroll
# however a lot do seem more balanced 

In [None]:
pd.crosstab(columns=caf_settled_subset['case_settled_or_resolved_explanation_transcribed'], index=caf_settled_subset['judge_name_cleaned'])

In [None]:
pd.crosstab(columns=caf_settled_subset['case_settled_or_resolved_explanation_transcribed'], index=caf_settled_subset['judge_name_cleaned'], normalize='index')
# generally judge carroll is more likely to rule in favor of the state, and judge patton is much more likely to get the state to decline to pursue
# could perform a significance test if this is of interest but almost certainly not going to be able to say anything with these n-sizes 

In [None]:
pd.crosstab(columns=caf['litigant_understanding'], index=caf['judge_name_cleaned'])

In [None]:
pd.crosstab(columns=caf['litigant_understanding'], index=caf['judge_name_cleaned'], normalize='index')
# despite the qualitative reports, judge carroll has a relatively high rate of litigant understanding 

2c. breakdown of seizing entities 

In [None]:
isp_seizing_entity = pd.crosstab(index=caf['isp_district_cleaned'], columns=caf['seizing_entity'])
suburban_seizing_entity = pd.crosstab(index=caf['suburban_district_cleaned'], columns=caf['seizing_entity'])

2d. summaries of categorical data relating to case

In [None]:
property_seized = pd.DataFrame(caf[['vehicle_seized', 'cash_seized', 'drugs_seized', 'real_estate_seized', 'weapons_seized', 'other_property_seized']].count(axis=0))

In [None]:
len(caf[caf['na_seized'] != 1]) # total number of cases where property was identifeid 

In [None]:
seizure_reason = pd.DataFrame(caf[['drugs', 'aggravated_fleeing_eluding','dui','driving_without_license','burglary','robbery','violence','gun_possession','involved_in_accident','money_laundering','speeding_or_traffic_violation','traffic_stop','violating_probation']].sum(axis=0).sort_values(ascending=False))

# 3. export

In [None]:
caf.columns

In [None]:
writer = pd.ExcelWriter('tab_results.xlsx')
# single tables 
for i in ['judge_name_cleaned', 'atty_present', 'party', 'jointly_owned_property', 'probable_cause_hearing', 'probable_cause_established', 'fee_waiver_filed', 'fee_waiver_filed_explanation_transcribed', 'hardship_exception_requested', 'case_settled_or_resolved', 'case_settled_or_resolved_explanation_transcribed', 'fine_fee_issues_cleaned', 'zoom_difficulties', 'admin_difficulties', 'litigant_understanding', 'seizing_entity']:
    tab = pd.crosstab(index=caf[i], columns='count').sort_values('count', ascending=False)
    if len(i) < 32: # some column names are too long 
        tab.to_excel(writer, sheet_name=i)
    else:
        tab.to_excel(writer, sheet_name=i[0:31])
        
# cross tables 
race_gender.to_excel(writer, sheet_name='race_gender')
isp_seizing_entity.to_excel(writer, sheet_name='isp_seizing_entity')
suburban_seizing_entity.to_excel(writer, sheet_name='suburban_seizing_entity')
property_seized.to_excel(writer, sheet_name='property_seized')
seizure_reason.to_excel(writer, sheet_name='seizure_reason')

writer.save()