In [2]:
import pandas as pd

In [13]:
data = pd.read_csv('data/texas_jp_aoc_reports.csv')
data['added'] = (
    pd.to_numeric(
        data['added'],
        errors='coerce')
    .fillna(0))
data.head()

Unnamed: 0,county,JP_court,case_type,date_range,num_reports_submitted,active_pending_start_date,reactivated,added,disposed,placed_on_inactive_status,active_pending_end_date,source
0,Anderson,Precinct 1 Place 1,Civil,9/1/2014 - 8/31/2015,12,86,0,129.0,129,0,90,"AOC Statistical Report, FY2015"
1,Anderson,Precinct 2 Place 1,Civil,9/1/2014 - 8/31/2015,12,26,0,111.0,101,0,35,"AOC Statistical Report, FY2015"
2,Anderson,Precinct 3 Place 1,Civil,9/1/2014 - 8/31/2015,12,221,0,91.0,57,0,233,"AOC Statistical Report, FY2015"
3,Anderson,Precinct 4 Place 1,Civil,9/1/2014 - 8/31/2015,12,30,0,124.0,104,1,50,"AOC Statistical Report, FY2015"
4,Andrews,Precinct 1,Civil,9/1/2014 - 8/31/2015,12,84,0,46.0,50,0,83,"AOC Statistical Report, FY2015"


In [14]:
print(data.dtypes)

county                        object
JP_court                      object
case_type                     object
date_range                    object
num_reports_submitted          int64
active_pending_start_date     object
reactivated                   object
added                        float64
disposed                      object
placed_on_inactive_status     object
active_pending_end_date       object
source                        object
dtype: object


# The Plan
We have been focusing on nine Texas counties:
- El Paso
- Travis
- Harris
- Tarrant
- Denton
- Williamson
- Nueces
- Fort Bend
- Smith

For each county, for each year from 2016 to 2020, we want to sum up the numbers in the "added" column for each row for each county (each county is broken into different courts).

In [8]:
counties = ['El Paso', 'Travis', 'Harris', 'Tarrant', 'Denton', 'Williamson', 'Nueces', 'Fort Bend', 'Smith']
timeframes = data['date_range'].unique()
print(timeframes)

['9/1/2014 - 8/31/2015' '9/1/2015 - 8/31/2016' '9/1/2016 - 8/31/2017'
 '9/1/2017 - 8/31/2018' '9/1/2018 - 8/31/2019' '9/1/2019 - 8/31/2020']


In [21]:
county_results = []
timeframe_results = []
added_results = []

for county in counties:
    for timeframe in timeframes:
        subset = data.loc[(data['county'] == county) & (data['date_range'] == timeframe)]
        added = subset['added'].sum()
        county_results.append(county)
        timeframe_results.append(timeframe)
        added_results.append(added)

results = pd.DataFrame(
    list(zip(county_results, timeframe_results, added_results)),
    columns = ['county', 'timeframe', 'civil_cases']
    )

In [22]:
results.head()

Unnamed: 0,county,timeframe,civil_cases
0,El Paso,9/1/2014 - 8/31/2015,7854.0
1,El Paso,9/1/2015 - 8/31/2016,8898.0
2,El Paso,9/1/2016 - 8/31/2017,11774.0
3,El Paso,9/1/2017 - 8/31/2018,15351.0
4,El Paso,9/1/2018 - 8/31/2019,19213.0


In [23]:
results.to_csv('data/texas_counties_aoc_counts.csv')