In [None]:
import os.path
import pandas as pd
from pandas.api.types import CategoricalDtype
import numpy as np
import re

In [None]:
# TODO: add case type to criminal case reports in PDCMS
NAMES = ["Case#", "Open Date", "Closed Date", "Court", "Latest Event",
             "Intake Type", "TC C/L", "Top Charge", "Dispo", "Case Type"]
CASE_TYPES = CategoricalDtype(categories=["Violent Felony",
                                          "Non-Violent Felony",
                                          "Misdemeanors, Violation, or Other"],
                              ordered=True)
DATA_DIR = os.path.abspath('../../Data/')

In [None]:
def classify_tc(tc):
    """Classifies top charge class and levels into MOCJ-requested categories.
    """
    tc = str(tc) #coerce TC type to string
    if (tc == "H"):
        return ("Homicide")
    elif tc.endswith("F") or ("FL" in tc) or ("FV" in tc) or (tc == "F"):
        return ("Other Felony")
    else:
        return ("Misdemeanor, Violation, or Other")

## Opened Cases
Criminal, VOP, and VOCD cases opened in the reporting year

In [None]:
cases_opened = pd.read_csv(os.path.join(DATA_DIR, "ILS_cases_opened_2018.csv"),
                           names=NAMES,
                           header=0)
#cases_opened["tc_type"] = case_opened["TC C/L"].apply(lambda x: classify_tc(tc))

In [None]:
# Re-code TC C/L for extradition cases
cases_opened.loc[(cases_opened["Top Charge"].str.contains("FUG", na=False)), 'TC C/L'] = "M"
# AC Misdemeanors don't get counted correctly
cases_opened.loc[(cases_opened["Top Charge"].str.contains("AC - M", na=False)), 'TC C/L'] = "M"
# Re-code TC C/L for homicides
cases_opened.loc[(cases_opened["Top Charge"]).str.startswith("MUR", na=False), 'TC C/L'] = 'H'

In [None]:
cases_opened["top_charge_type"] = cases_opened["TC C/L"].apply(lambda x: classify_tc(x))

In [None]:
pd.set_option('display.max_rows', 1000)

In [None]:
cases_opened.pivot_table(columns=["top_charge_type"], aggfunc="count")

In [None]:
# Getting rid of IDV/Visitation cases
cases_opened = cases_opened[~cases_opened['Top Charge'].str.contains("VISITATION", na=False)]
cases_opened = cases_opened[~cases_opened['Top Charge'].str.contains('FAMILY OFFENSE', na=False)]
cases_opened = cases_opened.loc[~((cases_opened["Court"] == "IDV")
                              & (cases_opened["Intake Type"] == "Assignment"))]
cases_opened = cases_opened.loc[~((cases_opened["Court"] == "IDV")
                              & (cases_opened["Intake Type"].str.startswith("Trans")))]

In [None]:
cases_opened = pd.read_csv(os.path.join(DATA_DIR, "ILS_cases_opened_2018.csv"))
#cases_opened["tc_type"] = case_opened["TC C/L"].apply(lambda x: classify_tc(tc))

In [None]:
# Re-code TC C/L for extradition cases
cases_opened.loc[(cases_opened["Top Charge"].str.contains("FUG", na=False)), 'TC C/L'] = "M"
# AC Misdemeanors don't get counted correctly
cases_opened.loc[(cases_opened["Top Charge"].str.contains("AC - M", na=False)), 'TC C/L'] = "M"
# Re-code TC C/L for homicides
cases_opened.loc[(cases_opened["Top Charge"]).str.startswith("MUR", na=False), 'TC C/L'] = 'H'

In [None]:
cases_opened["top_charge_type"] = cases_opened["TC C/L"].apply(lambda x: classify_tc(x))

In [None]:
cases_opened.groupby("top_charge_type").size()

## Closed Cases
Criminal, VOCD, and VOP cases closed in the reporting year.

In [None]:
cases_closed = pd.read_csv(os.path.join(DATA_DIR, "ILS_cases_closed_2018.csv"),
                           names=NAMES,
                           header=0)
#cases_closed["tc_type"] = case_opened["TC C/L"].apply(lambda x: classify_tc(tc))

In [None]:
cases_closed.head()

In [None]:
# Re-code TC C/L for extradition cases
cases_closed.loc[(cases_closed["Top Charge"].str.contains("FUG", na=False)), 'TC C/L'] = "M"
# AC Misdemeanors don't get counted correctly
cases_closed.loc[(cases_closed["Top Charge"].str.contains("AC - M", na=False)), 'TC C/L'] = "M"
# Re-code TC C/L for homicides
cases_closed.loc[(cases_closed["Top Charge"]).str.startswith("MUR", na=False), 'TC C/L'] = 'H'

In [None]:
# Getting rid of IDV/Visitation cases
cases_closed = cases_closed[~cases_closed['Top Charge'].str.contains("VISITATION", na=False)]
cases_closed = cases_closed[~cases_closed['Top Charge'].str.contains('FAMILY OFFENSE', na=False)]
cases_closed = cases_closed.loc[~((cases_closed["Court"] == "IDV")
                              & (cases_closed["Intake Type"] == "Assignment"))]
cases_closed = cases_closed.loc[~((cases_closed["Court"] == "IDV")
                              & (cases_closed["Intake Type"].str.startswith("Trans")))]

In [None]:
cases_closed["top_charge_type"] = cases_closed["TC C/L"].apply(lambda x: classify_tc(x))

In [None]:
cases_closed[['top_charge_type', 'Case#']].pivot_table(columns=["top_charge_type"], aggfunc="count")

#### Non-Disposition Reasons

| Reason     | PDCMS Code |
|---------------|---------|
| Absconded     |
| Not indigent |
|Conflict of Interest | C/I
| Trans. to Family Court |  AFC 
| Mental Institution | 730
| Abated | ABAT
| Retained private counsel | RPC
| Lack of Cooperation |
| Other (Specify) | 871 (Duplicate File and Defendant Pro se, PD relieved)
|----------------|--------------------------------|

In [None]:
DISPOS = {'final':      ['ACD', 'ACQ', 'CLOS', 'CONS', 'COV',
                         'DDP', 'DEND', 'DISM', 'DISS', 'DWOP',
                         'EXH', 'FACD', 'GJNB', 'MACD', 'PLEA',
                         'PLSE', 'ST', 'SAT', 'RES', 'SOR1',
                         'TERM','TOPV','WADI'],
           'trial':     ['DIAT', 'ACQ', 'FTTR', 'MLTR', 'MTTR', 'NGMD', 'TRVR'],
           'adjourned': ['ADBR', 'ADBX','ADQU','ADRI','ADSA'],
           'probation': ['PRBX' 'PROB','PROT','REV','VDCH'],
           'cond_dis':  ['VOCD'],
           'mental':    ['730'],
           'abated':    ['ABAT'],
           'family':    ['AFC'],
           'conflict':  ['C/I'],
           'other':     ['OTER', 'OTH', 'R18B', 'RELC', 'RLAS', 'RNDS'],
           'rpc':       ['RPC']}

In [None]:
# TODO fix this over-engineered code
dispo_cat_df = pd.DataFrame.from_records([(dkey, codes) for dkey, dvalue in DISPOS.items() for codes in dvalue],
                                         columns=['type', 'dispo_code'])

In [None]:
cases_closed.join(dispo_cat_df, left)

In [None]:
# Mental Institution
# Code: 730, NGRI

In [None]:
# Abated
# Codes: ABAT

In [None]:
# Retained Private Counsel
# Codes: 