In [1]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", 500)
pd.set_option("display.min_rows", 200)
pd.set_option('display.max_colwidth', None)

pd.options.mode.chained_assignment = None
import re 
from datetime import timedelta, datetime
import sys
sys.path.append('../../..')

from src.box import read_box_df

Questions:

* Characteristics of courts and cases, such as which courts adjudicate guardianships, whether specialized judges are assigned these matters, characteristics of petitioners; characteristics of appointed guardians; the incidence and frequency of ordering imposition of guardianship, versus revocation or orders limiting, denying, or revoking plenary guardianship;
* Prevalence of legal representation for guardian, and type of civil legal representation for proposed protected persons;
* What can be understood about the guardianship process from a review of court proceedings, such as the evidence required to impose guardianship; level of involvement of the proposed protected person in the proceeding; alternatives to guardianship proposed or considered; frequency of cases involving revocation or limits to the guardian’s power; and, duration and extent of proceedings prior to imposing a guardianship;
* Guardianship monitoring instructions and/or oversight in the proceedings.


In [2]:
DATA_PATH='../data/indiana'

idf = pd.read_csv(f'{DATA_PATH}/indiana_guardianship.csv')

In [3]:
rdf = idf[[
    'case_number', 
    'court_ir', 
    'respondent_name_ir', 
    'respondent_type_ir', 
    'respondent_birth_year_ir', 
    'guardianship_type_ir',
    'guardian_name_ir',
    'guardianship_scope_ir', 
    'decision_date_granted_ir',
    'expiration_date_ir',
    'decision_date_granted_ir_min'
]].rename(columns={'decision_date_granted_ir': 'issue_date_ir', 'decision_date_granted_ir_min': 'issue_date_ir_min'})

In [4]:
ddf = pd.read_csv(f'{DATA_PATH}/indiana_guardianship_dockets.csv')
cdf = pd.read_csv(f'{DATA_PATH}/indiana_clean_agc_filings.csv')
pdf = pd.read_csv(f'{DATA_PATH}/indiana_guardianship_parties.csv')
# rdf = pd.read_csv(f'{DATA_PATH}/indiana_agc_results.csv')
# cdf_all =  pd.read_csv(f'{DATA_PATH}/indiana_guardianship_cases.csv')

# drop one duplicate case
cdf = cdf[cdf.case_number != '10C018001GU000007']
pdf = pdf[pdf.case_number != '10C018001GU000007']

cdf['case_number'] = cdf['case_number'].str.upper()

# reduce columns
cdf = cdf[['c2dp_case_key', 'fips', 'state', 'county', 'case_number', 'last_collected', 'clean_case_type', 'date_filed', 'raw_case_status', 'last_docket_date', 'last_docket']]
# cdf_all['case_number'] = cdf_all['case_number'].str.upper()
pdf['case_number'] = pdf['case_number'].str.upper()
ddf['case_number'] = ddf['case_number'].str.upper()
rdf['case_number'] = rdf['case_number'].str.upper()

# cleaning duplicate entries (TODO: remove this ugly logic!)
dedup_str = lambda x: '; '.join(list(set([c.strip() for c in x.split(';') if c and c.replace(' ', '')]))) if x and x==x else ''

# rdf['guardianship_scope'] = rdf['guardianship_scope'].str.strip().str.replace('\s+', ' ', regex=True).apply(dedup_str)
# rdf['issue_date'] = rdf['issue_date'].str.strip().str.replace('\s+', ' ', regex=True).apply(dedup_str)

# get_min_issue_date = lambda x: min([datetime.strptime(c, '%m/%d/%Y') for c in x.split('; ')]) if x else None
# rdf['issue_date_min'] = rdf['issue_date'].apply(get_min_issue_date)
# rdf['expiration_date'] = rdf['expiration_date'].str.strip().str.replace('\s+', ' ', regex=True).apply(dedup_str)

# rdf.columns = [
#     'case_number', 
#     'court_ir', 
#     'respondent_name_ir', 
#     'respondent_type_ir', 
#     'respondent_birth_year_ir', 
#     'guardianship_type_ir',
#     'guardian_name_ir',
#     'guardianship_scope_ir', 
#     'issue_date_ir',
#     'expiration_date_ir',
#     'issue_date_ir_min'
# ]
rdf_m = cdf.merge(rdf, on='case_number', how='left')


print('Number of cases in results scrape:', rdf.case_number.nunique())
print('Number of cases not in registry:', rdf.court_ir.isnull().sum())
print('Percentage of missing cases:', round(rdf.court_ir.isnull().sum()/rdf.shape[0]*100))

print('\nTypes of guardianship counts:\n', rdf.guardianship_type_ir.value_counts())

def clean_text(df, colname):
    space_replace_list = ['\n', '\/']
    for sp in space_replace_list:
        df[colname] = df[colname].str.replace(sp, ' ', regex=True)
    df[colname] = df[colname].str.replace("[^A-Za-z\. ]", '', regex=True)
    df[colname] = df[colname].str.replace("\s+", ' ', regex=True)
    df[colname] = df[colname].str.replace(" co\s+", ' co', regex=True)
    df[colname] = df[colname].str.replace("co gu", 'cogu')
    return df[colname].str.strip().str.lower()

ddf['combined_docket_event_description'] = ddf['docket_event_description'].fillna('') + ' ' + ddf['docket_event_comment'].fillna('')
ddf['combined_docket_event_description'] = clean_text(ddf, 'combined_docket_event_description')
ddf['docket_event_date'] = pd.to_datetime(ddf.docket_event_date)

# fix out of bounds date on subdates and convert to datetime
ddf.loc[ddf.docket_event_subdate == '06/22/2916', 'docket_event_subdate'] = '06/22/2016'
ddf['docket_event_subdate'] = pd.to_datetime(ddf['docket_event_subdate'])

# judge
# NOTE THAT WE ARE REMOVING SUFFIX WHICH MAY IDENTIFY TYPE OF JUDGE
ddf_judge = ddf.copy()
ddf_judge['docket_event_judge'] = ddf_judge['docket_event_judge'].str.replace('( -|- ).*', '', regex=True).str.replace('.', '')
judge_df = ddf_judge[~ddf_judge.docket_event_judge.isnull()].drop_duplicates(['case_number', 'docket_event_judge']).groupby('case_number')['docket_event_judge'].apply(lambda x: '; '.join(x)).reset_index()
judge_df.rename(columns={'docket_event_judge': 'judge'}, inplace=True)
rdf_m = rdf_m.merge(judge_df, on='case_number', how='left')

# FILTER DDF TO ONLY INCLUDE CASES MISSING REGISTRY
cases_not_found_in_registry = rdf[rdf.court_ir.isnull()].case_number.unique().tolist()
ddf_m = ddf.copy()

# cleaning parties
pdf.rename(columns={'party_extconncodedesc': 'party_type'}, inplace=True)
pdf['party_registry_id'] = pdf['party_oans'].str.extract('((?<=Registry\, number\=)\d+)')
pdf['party_race'] = pdf.party_description.str.extract('(Multiracial|Asian|White|Indian|Black|Native Hawaiian or Other Pacific Islander|Other)')
pdf['party_gender'] = pdf.party_description.str.extract('(Male|Female)')

# to add respondent year of birth
pdf_w_yob = pdf[['case_number', 'party_partykey', 'party_type']].merge(rdf[['case_number', 'respondent_birth_year_ir']], on='case_number', how='left')
pdf_w_yob = pdf_w_yob[pdf_w_yob.party_type.isin(['Incapacitated Adult/Child', 'Respondent'])]
pdf_w_yob = pdf_w_yob[~pdf_w_yob.respondent_birth_year_ir.isnull()]
pdf = pdf.merge(pdf_w_yob[['party_partykey', 'respondent_birth_year_ir']], on='party_partykey', how='left')

# for labeling docket entries
vardf = pd.read_csv(f'{DATA_PATH}/indiana_2024_variables.csv')
vardf['value'] = clean_text(vardf, 'value')
var_dicts = vardf.to_dict('records')
var_cols = vardf[~vardf.value.isnull()].varname.unique().tolist()

Number of cases in results scrape: 9855
Number of cases not in registry: 1947
Percentage of missing cases: 20

Types of guardianship counts:
 guardianship_type_ir
Permanent    7101
Temporary     822
Name: count, dtype: int64


In [5]:
def concat(x):
    nonnull = [c for c in x if c and c == c]
    return '; '.join(nonnull) if nonnull else None

regex_or_concat = lambda lst: "(" + "|".join(lst) + ")"


## Minor Guardianship Cases

In [6]:
# for when explicitly labeled in registry as minor
minor_registry_cases = rdf[rdf.respondent_type_ir == 'Incapacitated  Minor'].case_number.unique().tolist()
print('Number of minor cases (by registry type):', len(minor_registry_cases))

# looking at approx age of respondent for minor guardianships
rdf_m['file_year'] = pd.to_datetime(rdf_m['date_filed']).dt.year
has_birth_year = ~rdf_m.respondent_birth_year_ir.isnull() & (rdf_m.respondent_birth_year_ir != 'nown')
rdf_m.loc[has_birth_year, 'approx_age_at_filing'] = rdf_m.loc[has_birth_year]['file_year'] - rdf_m[has_birth_year]['respondent_birth_year_ir'].astype(int)
minor_dob_cases = rdf_m[rdf_m.approx_age_at_filing < 17].case_number.unique().tolist()
print('Number of minor cases (by registry DOB):', len(minor_dob_cases))

# docket parsing for minor guardianships
ddf_m['minor_guardian'] = (
    ddf_m.combined_docket_event_description.str.contains('(minor guardian|guardian.* (^adult\/)minor|(person|estate) of a minor)', regex=True) 
    & ~ddf_m.combined_docket_event_description.str.contains('expiration', regex=True)
)
ddf_m['adult_guardian'] = (
    ddf_m.combined_docket_event_description.str.contains('(adult guardian|guardian.* adult)', regex=True) 
)
minor_docket_cases = ddf_m[ddf_m.minor_guardian].case_number.unique().tolist()
adult_docket_cases = ddf_m[ddf_m.adult_guardian].case_number.unique().tolist()
print('Number of minor cases (parsing docket text):', ddf_m[ddf_m.minor_guardian].case_number.nunique())

minor_cases = list(set(minor_registry_cases + minor_docket_cases + minor_dob_cases))
print('Total distinct minor guardianship cases:', len(minor_cases))

rdf_m['adult_guardianship_docket'] = rdf_m.case_number.isin(adult_docket_cases)
rdf_m['minor_guardianship_docket'] = rdf_m.case_number.isin(minor_docket_cases)
rdf_m['minor_guardian_registry'] = rdf_m.case_number.isin(minor_registry_cases)
rdf_m['minor_guardian_dob'] = rdf_m.case_number.isin(minor_dob_cases)
rdf_m['maybe_minor_guardian_case'] = rdf_m.case_number.isin(minor_cases)

# writing to CSV for sending to Punya
# ddf_m[ddf_m.minor_guardian][['case_number', 'raw_case_type', 'date_filed', 'date_closed', 'docket_event_date', 'docket_event_description', 'docket_event_comment', 'combined_docket_event_description']].to_csv(f'{DATA_PATH}/intermediate_datasets/docket_minor_guardianship.csv', index=False)
# rdf[rdf.respondent_type_ir == 'Incapacitated  Minor'].to_csv(f'{DATA_PATH}/intermediate_datasets/registry_minor_guardianship.csv', index=False)
# rdf_m[rdf_m.approx_age_at_filing < 17].to_csv(f'{DATA_PATH}/intermediate_datasets/dob_minor_guardianship.csv', index=False)

Number of minor cases (by registry type): 27
Number of minor cases (by registry DOB): 30
Number of minor cases (parsing docket text): 85
Total distinct minor guardianship cases: 103


## Basic indiana_guardianship_filings Parsing

In [7]:
output_cdf = rdf_m.rename(columns={
    'guardian(s)': 'guardian_names', 
    'issue_date_ir': 'guardianship_issue_date',
    'expiration_date': 'guardianship_status',
    'clean_case_type': 'casetype',
    'raw_case_status': 'casestatus',
    'court_ir': 'court',
    'c2dp_case_key': 'case_key',
    'date_filed': 'datefiled',
    'judge': 'casejudge',
    'respondent_birth_year_ir': 'r_birth_year'
}).copy()

# output_cdf['guardianship_status'] = output_cdf['guardianship_status'].fillna('Active')
output_cdf = output_cdf[~output_cdf.case_number.isin(minor_cases)]
output_cdf = output_cdf[[
    'case_key',
    'datefiled',
    'court',
    'county',
    'casejudge',
    'casetype',
    # 'casestatus',
    'r_birth_year'
    
]]
output_cdf

Unnamed: 0,case_key,datefiled,court,county,casejudge,casetype,r_birth_year
0,indiana___53C01-1901-GU-000008,2019-01-18,"Monroe Circuit Court 1, Monroe County, Indiana",monroe,"Cure, Elizabeth; Bradley, Geoffrey J",gu - guardianship - guardian: adult,2000
1,indiana___03D01-1707-GU-004116,2017-07-27,"Bartholomew Superior Court 1, Bartholomew County, Indiana",bartholomew,"Worton, James D",gu - guardianship - guardian: adult,1946
2,indiana___02D03-2208-GU-000228,2022-08-12,"Allen Superior Court 3, Allen County, Indiana",allen,"Houk, Phillip E",gu - guardianship - guardian: adult,1931
3,indiana___49D08-2205-GU-016956,2022-05-20,"Marion Superior Court 8, Marion County, Indiana",marion,"Eichholtz, Steven R; Kendrick, Melanie",gu - guardianship - guardian: adult,1958
4,indiana___18C05-2309-GU-000117,2023-09-12,,delaware,"Cannon, Thomas A, Jr",gu - guardianship - guardian: adult,
5,indiana___49D08-1604-GU-014381,2016-04-22,"Marion Superior Court 8, Marion County, Indiana",marion,"Eichholtz, Steven R; Batties, Mark D; Kendrick, Melanie PT",gu - guardianship - guardian: adult,1992
7,indiana___28C01-1903-GU-000010,2019-03-27,"Greene Circuit Court, Greene County, Indiana",greene,"Allen, Erik",gu - guardianship - guardian: adult,1995
8,indiana___71D04-1608-GU-000178,2016-08-26,"St. Joseph Superior Court 4, St. Joseph County, Indiana",st. joseph,"Reagan, Margot F",gu - guardianship - guardian: adult,1998
9,indiana___73C01-2004-GU-000011,2020-04-17,"Shelby Circuit Court, Shelby County, Indiana",shelby,"Meltzer, Trent E",gu - guardianship - guardian: adult,1989
10,indiana___49D08-1812-GU-049692,2018-12-18,"Marion Superior Court 8, Marion County, Indiana",marion,"Scanlan, Kelly; Scanlan, Kelly M; Kendrick, Melanie",gu - guardianship - guardian: adult,1946


In [8]:
output_cdf.case_key.nunique()

9752

In [9]:
# output_cdf.to_csv(f'{DATA_PATH}/deliverables/indiana_guardianship_filings.csv', index=False)

## Basic indiana_guardianship_parties Parsing

In [10]:
# output_pdf = pdf.rename(columns={
#     'respondent_birth_year_ir': 'party_birth_year',
# })
# output_pdf = output_pdf[~output_pdf.case_number.isin(minor_cases)]
# output_pdf = output_pdf[[
#     'case_key', 
#     'party_type', 
#     'party_name', 
#     'party_gender',
#     'party_race',
#     'party_zip',
#     'party_attorneys',
#     'party_birth_year'
# ]]
# print(output_pdf.case_key.nunique())
# output_pdf.to_csv(f'{DATA_PATH}/deliverables/indiana_guardianship_parties.csv', index=False)

## Merging Party Information onto Case

### Respondent

In [11]:
pdf_respondent = pdf[pdf.party_type.isin(['Incapacitated Adult/Child'])].sort_values(['case_number', 'party_attorneys']).drop_duplicates(['case_number', 'party_name'], keep='first')
pdf_respondent['count'] = pdf_respondent.groupby('case_number')['case_number'].transform('count')
pdf_respondent['concat_names'] = pdf_respondent.groupby('case_number')['party_name'].transform(concat)
pdf_respondent = pdf_respondent.drop_duplicates(['case_number'], keep='first')[['case_number', 'party_name', 'party_attorneys', 'party_gender', 'party_race', 'count', 'concat_names']]
pdf_respondent.columns = ['case_number', 'respondent_name', 'respondent_attorney', 'respondent_gender', 'respondent_race', 'respondent_count', 'respondent_concat_names']
print("Number of cases that have more than 1 incapacitated person:", pdf_respondent[pdf_respondent['respondent_count'] > 1].shape[0])
rdf_m = rdf_m.merge(pdf_respondent, on='case_number', how='left')

Number of cases that have more than 1 incapacitated person: 44


### Petitioner

In [12]:
pdf_petitioner = pdf[pdf.party_type.isin(['Petitioner'])].sort_values(['case_number', 'party_attorneys']).drop_duplicates(['case_number', 'party_name'], keep='first')
pdf_petitioner['count'] = pdf_petitioner.groupby('case_number')['case_number'].transform('count')
pdf_petitioner['concat_names'] = pdf_petitioner.groupby('case_number')['party_name'].transform(concat).apply(dedup_str)
pdf_petitioner['concat_gender'] = pdf_petitioner.groupby('case_number')['party_gender'].transform(concat).apply(dedup_str)
pdf_petitioner['concat_race'] = pdf_petitioner.groupby('case_number')['party_race'].transform(concat).apply(dedup_str)
pdf_petitioner['concat_attorneys'] = pdf_petitioner.groupby('case_number')['party_attorneys'].transform(concat).apply(dedup_str)

pdf_petitioner = pdf_petitioner.drop_duplicates(['case_number'], keep='first')[['case_number', 'concat_attorneys', 'concat_gender', 'concat_race', 'count', 'concat_names']]
pdf_petitioner.columns = ['case_number', 'petitioner_attorney', 'petitioner_gender', 'petitioner_race', 'petitioner_count', 'petitioner_names']
rdf_m = rdf_m.merge(pdf_petitioner, on='case_number', how='left')

## Searching Docket Text

### Phrases from CSV

In [13]:
check_values = lambda val: (ddf_m.docket_event_description.str.lower() == val) | ddf_m.docket_event_comment.str.lower().str.contains(val)
check_partial_values = lambda val: ddf_m.combined_docket_event_description.str.lower().str.contains(val, na=False)
ddf_m['docket_event_date'] = pd.to_datetime(ddf_m.docket_event_date)
for var in var_dicts:
    if var['varname'] and (var['value'] == var['value']):
        try:
            varname = var['varname']
            varname_date = varname + '_date'
            val = var['value'].lower().strip()
            ddf_m[varname] = check_partial_values(val)
            ddf_m[varname_date] = None
            ddf_m.loc[ddf_m[varname], varname_date] = ddf_m[ddf_m[varname]].docket_event_date
            print(var['varname'], var['value'], ddf_m[var['varname']].sum())
        except Exception as e:
            print('ERROR', varname, e)

agg_var_cols = {c: [max] for c in var_cols}
agg_var_cols.update({c + '_date': [min, max] for c in var_cols})

summed_cases = ddf_m.groupby('case_number').agg(agg_var_cols)
summed_cases.columns = [p[0] + '_' + p[1] if 'date' in p[0] else p[0] for p in summed_cases.columns.tolist()]
summed_cases = summed_cases[sorted(summed_cases.columns.tolist())]

keyword_cols = lambda val: [c for c in summed_cases.columns if c.startswith(val) and val + '_' not in c and 'date' not in c]

summed_cases['gc_denied'] = summed_cases['long_denied'] > 0
print('Permanent guardianships denied:', summed_cases['gc_denied'].sum())

summed_cases['perm_gc_approved'] = summed_cases[keyword_cols('long')].sum(axis='columns') > 0
print('Permanent guardianships granted:', summed_cases['perm_gc_approved'].sum())

summed_cases['temp_gc_approved'] = summed_cases[keyword_cols('temp_order')].sum(axis='columns') > 0
print('Temporary guardianships granted:', summed_cases['temp_gc_approved'].sum())

summed_cases['limited_gc_approved'] = summed_cases[keyword_cols('limited_order')].sum(axis='columns') > 0
print('Limited guardianships granted:', summed_cases['limited_gc_approved'].sum())

summed_cases['gc_dismissed'] = summed_cases[keyword_cols('dismiss')].sum(axis='columns') > 0
print('Petition guardianship dismissed:', summed_cases['gc_dismissed'].sum())
 
print('Overlap between granted/denied:', (summed_cases['gc_denied'] & summed_cases['perm_gc_approved']).sum())
print('Overlap between temp and perm AGC orders:', (summed_cases['temp_gc_approved'] & summed_cases['perm_gc_approved']).sum())

# how many neither approved or denied
outcome_cols = ['perm_gc_approved', 'temp_gc_approved', 'limited_gc_approved', 'gc_denied', 'gc_dismissed']
cdf_merge = rdf_m.merge(summed_cases, on='case_number', how='right')

missing_df = cdf_merge.copy()
for c in outcome_cols:
    missing_df = missing_df[~missing_df[c]]

print('Missing disposition count:', missing_df.shape[0])

gal_or_casa gal casa appointed 3600
medical_ physicians report filed 4040
medical1 dr. evaluation 2
medical2 psycho evaluation 1
accounting_1 order approving biennial accounting 2353
p1 verified petition for appointment of adult guardian efiled 1
p2 petition for appointment of a guardian conservator 0
p3 petition for appointment of coguardians over person and estate 88
p4 petition to establish guardianship 47115
p5 petitioning court for permanent guardianship 1
pcon conservatorship order for hearing 1
p_emerg petition for emergency temporary guardianship filed 2194
p_emerg1 motion for interim order 3
p_emerg2 petition for emergency appointment of temporary guardian 43
p_emerg3 request for initial guardianship hearing 1
temp_order appointing temporary guardian over person estate 3
temp_order1 order granting temporary guardianship 4557
temp_order2 acceptance of appointment as temporary guardian 0
temp_order3 order appointing temporary guardian of the person and conservator 1
temp_order4 

In [14]:
guardian_no_longer_needed = ['no longer incapac', 'guardian(ship)? (is|was) no longer needed']

ddf_m['no_longer_incapacitated'] = ddf_m.combined_docket_event_description.str.contains(regex_or_concat(guardian_no_longer_needed), regex=True)

In [15]:
# terminate guardianship mentioned
terminate_guardianship_phrase_list = [
    "gu terminated",
    "terminate gu",
    "terminat(e|ing).*guardianship", 
    "guardianship.* terminat"
]
ddf_m['terminate_guardianship'] = (
    ddf_m.combined_docket_event_description.str.contains(regex_or_concat(terminate_guardianship_phrase_list), regex=True) 
    & ~ddf_m.combined_docket_event_description.str.contains('power')
)

In [16]:
ddf_m['dismiss_with_prejudice'] = ddf_m.combined_docket_event_description.str.contains("dismiss.*with prejudice", regex=True)

In [17]:
withdraw_petition_list = [
    "order withdrawing emergency petition",
    "withdraw emergency petition",
    "withdraw.*petition.*guardian",
    "petition.*withdraw.*guardian",
    "withdraw petition for appointment of guradian",
    "withdrawal petition for guarrdianship",
    "motion to withdraw guardianship petition",
    "motion to withdraw emergency verified petition",
    "withdrawing petition for appointment of temporary coguradians",
    "withdrawing guardianship petition",
    "dismissal of guardianship entered. petition withdrawn",
    "motion to withdraw emergency temporary guardianship petition",
    "requesting the guardianship petition be withdrawn",
    "granting motion to withdraw petition and dismissing case",
    "filed withdrawal of guardianship petition",
    "order issued petition withdrawn. cause dismissed without prejudice"
]
exclude_withdraw_petition_list = [
    'petition for authority t transfer',
    'substitute petition for appointment of guardian over person and estate',
    'withdraw as guardian|third party independent guardian to be appointed',
    'funds',
    'appearance',
    'change residence',
    'protective order',
    'leave',
    'terminate',
    'counsel',
    'ad litem',
    'gal'
]
ddf_m['withdraw_petition'] = ~ddf_m.combined_docket_event_description.str.contains(regex_or_concat(exclude_withdraw_petition_list), regex=True) & ddf_m.combined_docket_event_description.str.contains(regex_or_concat(withdraw_petition_list), regex=True)

### Emergency petition parsing

In [18]:


emergency_petition_phrases = [
    'emergency petition for appointment of guardian',
    'emergency petition for temporary guardianship',
    'emergency petition for guardianship', 
    'emergency petition for guardian',
    'emergency petition for temporary appointment of guardian',
    'emergency petition for appointment of temporary guardian',
    'petition for appointment of emergency guardianship',
    'petition for appointment of emergency temporary guardian',
    'petition for the emergency appointment of guardian',
    'petition for emergency temporary guardianship',
    'petition for emergency appointment of temporary guardian',
    'petition for emergency appointment of guardian',
    'petition for emergency temporary appointment of guardian',
    'petition for emergency guardianship',
    'petition for guardianship emergency',
    'petition for temporary emergency guardianship',
    'petition for the appointment of temporaray emergency guardian',
    'petition to establish guardianship filed emergency',
    'petition for emergency gu',
    'emergency temporary gu',
    'petition for appointment of emergency and ongoing guardian',
    'appointment of an emergency temporary guardian',
    'emergency petition for appointment of coguardians',
    'petition for appointment of emergency temporary guardina',
    'emergency petition for appointment of temporary coguardians',
    'temporary appointment of an emergency guardian',
    'petition for appointment of temporary emergency guardian',
    'petition for emergency temporary appointment of coguardians',
    'petition for appointment of emergency coguardians',
    'petition for temporary appointment of emergency coguardians',
    'petition for appointment of emergency temp guardian',
    'petition for appointment of emergency guardian',
    'petition for emergency appointment of coguardians',
    'petition ofr emergency appointment of coguardian',
    'appointment of emergency temporary and permanent guardianship',
    'petition for emergency appoinment of temporary guardian',
    'petition for the appointment of emergency guardianship'
]
ddf_m['emergency_petition_phrase'] = ddf_m.combined_docket_event_description.str.contains('(' + '|'.join(emergency_petition_phrases) + ')', regex=True)
cases_w_emergency_petition = ddf_m[ddf_m['emergency_petition_phrase']].case_number.unique().tolist()

# this displays the distinct variants of form "petition.... emergency..." that are *excluded* 
# from being considered an emergency petitoin
emerg_variants = ddf_m[
    ddf_m.combined_docket_event_description.str.contains('petition [\w ]*emergency', regex=True)
    & ~ddf_m.combined_docket_event_description.str.contains(regex_or_concat(emergency_petition_phrases), regex=True)

].drop_duplicates('combined_docket_event_description') 

print('Distinct variants excluding phrases above:', emerg_variants.shape[0])


Distinct variants excluding phrases above: 126


### Regular petition for guardianship

In [19]:
petition_filed_phrases = [
    'petition for appointment of coguardians',
    'petition to establish guardianship',
    'petition for guardianship',
    'petition for appointment of guardian',
    'petition to appoint cogu',
    'petition for the appointment of guardian',
    'petition to appoint coguardians'
]
ddf_m['nonemergency_petition_phrase'] = (
    ddf_m.combined_docket_event_description.str.contains('(' + '|'.join(petition_filed_phrases) + ')', regex=True)
    & ~ddf_m.combined_docket_event_description.str.contains('(emergency|temp|ad litem)', regex=True)
)

cases_that_have_reg_petition_filed = ddf_m[ddf_m['nonemergency_petition_phrase']].case_number.unique().tolist()

### Death

In [20]:
death_inclusion_phrases = [
    'death of ward',
    'death of protected person',
    'death of the protected person',
    'protected persons death',
    'wards death',
    'incapacitated adult has deceased',
    'death of incapacitated person',
    'death certificate',
    'death certificate filed',
    'incapacitated adult is now deceased',
    'incapacitated adult minor is now deceased',
    'ward is now deceased',
    'death of the ward',
    'terminate guardianship.*(death|deceased)',
    '(death|deceased).*terminate guardianship',
    'protected party passed away',
    'ward passed away',
    'dismiss guardianship.*(death|deceased)',
    '(death|deceased).*dismiss guardianship',
    'affidavit of death',
    'obituary'
]

death_exclusion_phrases = [
    'deceased guardian',
    'deceased coguardian',
    'guardians death', 
    'coguardians death',
    'death of coguardian',
    'death of interested person',
]
ddf_m['contains_death_or_deceased'] = ddf_m.combined_docket_event_description.str.contains('(death|deceased)')

death_inclusion_phrases_concat = '(' + '|'.join(death_inclusion_phrases) + ')'
ddf_m['death_inclusion_method'] = ddf_m.contains_death_or_deceased & ddf_m.combined_docket_event_description.str.contains(death_inclusion_phrases_concat, regex=True)

death_exclusion_phrases_concat = '(' + '|'.join(death_exclusion_phrases) + ')'
ddf_m['death_exclusion_method'] = ddf_m.contains_death_or_deceased & ~ddf_m.combined_docket_event_description.str.contains(death_exclusion_phrases_concat, regex=True)

### Guardianship Scope

#### Person Only

In [21]:
ddf_m['reference_to_guardian_of_person_only'] = (
    ddf_m.combined_docket_event_description.str.contains("guardians?(hip)? of person", regex=True)
    & ~ddf_m.combined_docket_event_description.str.contains("estate")
    & ~ddf_m.combined_docket_event_description.str.contains("petition")
)


#### Estate Only

In [22]:
ddf_m['reference_to_guardian_of_estate_only'] = (
    ddf_m.combined_docket_event_description.str.contains("guardians?(hip)? of estate(?! and person)", regex=True)
    & ~ddf_m.combined_docket_event_description.str.contains("petition")
)

#### Person and Estate

In [23]:
ddf_m['reference_to_guardian_of_person_and_estate'] = (
    ddf_m.combined_docket_event_description.str.contains("(guardians?(hip)? of person and estate|guardians?(hip)? of estate and person)", regex=True)
    & ~ddf_m.combined_docket_event_description.str.contains("petition")
)

### CHINS

In [24]:
ddf_m['chins_case'] = ddf_m.combined_docket_event_description.str.contains("chins yes", regex=True)

### Transmit to GU Registry

In [25]:
transmit_to_gu_registry = ddf_m.combined_docket_event_description.str.startswith("added to transmit to gu registry")
ddf_m['gu_registry_docket_event'] = transmit_to_gu_registry
ddf_m['gu_registry_date'] = ddf_m.docket_event_comment.str.extract('Date of Order\: ([\d\-\/]+)')
ddf_m['gu_registry_type_permanent'] = transmit_to_gu_registry & ddf_m.docket_event_comment.str.contains('Permanent')
ddf_m['gu_registry_type_temporary'] = transmit_to_gu_registry & ddf_m.docket_event_comment.str.contains('Temporary')
ddf_m['gu_registry_scope_person_person_and_estate'] = (
    transmit_to_gu_registry 
    & ddf_m.docket_event_comment.str.contains('Guardian of Person and Estate')
)
ddf_m['gu_registry_scope_person_only'] = (
    transmit_to_gu_registry 
    & ddf_m.docket_event_comment.str.contains('Guardian of Person')
    & (ddf_m.docket_event_comment.str.contains('Estate') == False)
)
ddf_m['gu_registry_scope_estate_only'] = (
    transmit_to_gu_registry 
    & ddf_m.docket_event_comment.str.contains('Guardian of Estate')
    & (ddf_m.docket_event_comment.str.contains('Person') == False)
)


# transmit_to_gu_registry_df = ddf_m[
#     & ddf_m.case_number.isin(cdfm[cdfm.guardian_name_ir.isnull()].case_number.unique().tolist())
# ]
# print(transmit_to_gu_registry_df.shape)
# transmit_to_gu_registry_cases = transmit_to_gu_registry_df.case_number.unique().tolist()
# transmit_to_gu_registry_df

### Flagging Temporary/Permanent Guardian + "Granted Phrase" Approach

In [26]:
# flag explicit mention of temporary or permanent guardian
ddf_m['temp_guardian'] = ddf_m.combined_docket_event_description.str.contains('temp(orary)?[\w ]* (co)?guardian', regex=True)
ddf_m['perm_guardian'] = ~ddf_m['temp_guardian'] & ddf_m.combined_docket_event_description.str.contains('(permanent[\w ]* (co)?guardian)', regex=True)
ddf_m['guardian'] = ddf_m.combined_docket_event_description.str.contains('(co)?guardians?(^hip)', regex=True)

# dataframe with concatenated 
granted_list = [
    ("(?:(?:temporary|permanent) )?(?:co)?guardianship (?:(?:is|was) )?granted", False),
    ("order (?:issued )?(?:for|of|on)? ?(?:appoint|granting|establishing|approving)(?! (?:motion|order|continuance|hearing|agreement|setting)).*?guardians?(hip)?", True), 
    # ("guardian appointed", True),
    ("grant(s|ed) (?:(?:permanent|temporary) )?(?:co)?guardians?(hip)?", False),
    ("letters of (?:(?:permanent|temporary) )?guardianship (?:issued|sent|filed|ordered)", False),
    # ("(?:remove|terminate) guardian", False),
    # ("successor guardian", False),
    ("order granting petition to establish guardianship", False),
    ("order granting temporary guardianship", False),
    ("guardian status report filed", False),
    ("order of appointment[\w ]*guardian", False),
    ("(?:is|are) (?:hereby )?appoint(?:s|ed|ing)? (?:as )?(?:the )?(?:said )?(?:temporary|permanent)? ?(?:co)?guardian", False),
    ("(?:petition for|appointment of) guardian[\w ]+granted", True),
    ("appoint[\w ]* as (?:temporary|permanent) (?:co)?guardian", True),
    ("court grants guardianship", True)
]

def turn_regex_to_file_name(text):
    return re.sub('[^\w ]', '', text).replace(' ', '_')
    
# look at "guardian physician, ________"
ddf_m['granted'] = False
ddf_m['granted_text'] = None
for g in granted_list:
    if g[1]:
        query = (
            ddf_m.combined_docket_event_description.str.contains(g[0], regex=True)
            & ~ddf_m.combined_docket_event_description.str.contains("(not granted|reject|expenditure|accounting|terminat|fees|proposed|assets|schedul|real estate|deny|denie|withdraw|dismiss|ad litem| gal |oath of.*guardian appointed|setting hearing|for hearing|final accounting)", regex=True)
            & ~ddf_m.combined_docket_event_description.str.startswith("petition to")
            & ~ddf_m.combined_docket_event_description.str.startswith("oath filed")
        )
    else:
        query = (
            ddf_m.combined_docket_event_description.str.contains(g[0], regex=True)
            & ~ddf_m.combined_docket_event_description.str.contains("(ad litem| gal |oath of.*guardian appointed|dismiss|terminat)", regex=True)
            & ~ddf_m.combined_docket_event_description.str.startswith("oath filed")
        )
    print('\nPattern:',  g)
    print('Results:', query.sum())
    ddf_m.loc[query, 'granted'] = True
    
    query_matches = ddf_m.loc[query].combined_docket_event_description.str.extract("(" + g[0] + ")")
    
    print('Unique matches:', query_matches[0].unique().tolist()[:10])
    ddf_m.loc[query, 'granted_text'] = query_matches[0]
    ddf_m.drop_duplicates('granted_text')[['case_number', 'combined_docket_event_description', 'granted_text']].to_csv(f'{DATA_PATH}/intermediate_datasets/{turn_regex_to_file_name(g[0])}.csv', index=False)

ddf_m[['granted_phrase_date', 'perm_granted_phrase_date', 'temp_granted_phrase_date', 'nontemp_granted_phrase_date']] = None

# sometimes "order filed" or "file stamp" on event differ from the date associated to the event, so we replace
# the docket date with these alternate (more accurate) 
ddf_m['adjusted_event_date'] = ddf_m['docket_event_date']
should_overwrite_date = ~ddf_m.docket_event_subdate.isnull() & ddf_m.granted & (ddf_m['docket_event_date'] != ddf_m['docket_event_subdate'])
ddf_m.loc[should_overwrite_date, 'adjusted_event_date'] = ddf_m[should_overwrite_date].docket_event_subdate

ddf_m.loc[ddf_m.granted, 'granted_phrase_date'] = ddf_m[ddf_m.granted].adjusted_event_date
ddf_m.loc[ddf_m.granted & ddf_m.temp_guardian, 'temp_granted_phrase_date'] = ddf_m[ddf_m.granted & ddf_m.temp_guardian].adjusted_event_date
ddf_m['min_temp_granted_date'] = ddf_m.groupby('case_number')['temp_granted_phrase_date'].transform('min')

# if temp order is granted, then ambiguous orders after that date should be considered permanent if also
# none are explicitly permanent
perm_granted_events = ddf_m.granted & ddf_m.perm_guardian
ddf_m.loc[perm_granted_events, 'perm_granted_phrase_date'] = ddf_m[perm_granted_events].adjusted_event_date
ddf_m['min_perm_granted_date'] = ddf_m.groupby('case_number')['perm_granted_phrase_date'].transform('min')
ddf_m['perm_guardian_inferred'] = False

# SETTING PERM GRANTED DATE
no_temp_petition_filed = ~ddf_m.case_number.isin(cases_w_emergency_petition)
no_temp_appt_date = ddf_m.min_temp_granted_date.isnull()
not_a_temp_appt_order = ~ddf_m['temp_guardian'] & ddf_m.combined_docket_event_description.str.contains('(order granting petition to establish guardianship|letters of guardianship)', regex=True)
event_after_one_week_post_temp_appt_date = ddf_m.adjusted_event_date > (ddf_m.min_temp_granted_date + timedelta(days=7))

# if no indication of temp petition/order, then generic petition granted is permanent
ddf_m.loc[no_temp_petition_filed & no_temp_appt_date & not_a_temp_appt_order, ['perm_guardian', 'perm_guardian_inferred']] = True
ddf_m.loc[~no_temp_appt_date & event_after_one_week_post_temp_appt_date & not_a_temp_appt_order, ['perm_guardian', 'perm_guardian_inferred']] = True
perm_granted_events = ddf_m.granted & ddf_m.perm_guardian_inferred
ddf_m.loc[perm_granted_events, 'perm_granted_phrase_date'] = ddf_m[perm_granted_events].adjusted_event_date


# no explicit mention of perm/temp but more than X days after temp order granted
not_temp = ddf_m.granted & ~ddf_m.temp_guardian & (ddf_m.adjusted_event_date > (ddf_m.min_temp_granted_date + timedelta(days=14)))
ddf_m.loc[ddf_m.granted & ~ddf_m.temp_guardian, 'nontemp_granted_phrase_date'] = ddf_m[not_temp].adjusted_event_date

# combine granted, perm/temp flags to a case level (from docket entry level)
granted_guard_df = ddf_m.groupby('case_number').agg(
    {
        'granted': ['sum'], 
        'guardian': ['sum'],
        'perm_guardian': ['sum'], 
        'temp_guardian': ['sum'], 
        'granted_text': [concat],
        'granted_phrase_date': ['min'],
        'perm_granted_phrase_date': ['min'],
        'perm_guardian_inferred': ['max'],
        'temp_granted_phrase_date': ['min'],
        'nontemp_granted_phrase_date': ['min'],
        'emergency_petition_phrase': ['max'],
        'nonemergency_petition_phrase': ['max'],
        'death_inclusion_method': ['max'],
        'death_exclusion_method': ['max'],
        'contains_death_or_deceased': ['max'],
        'no_longer_incapacitated': ['max'],
        'terminate_guardianship': ['max'],
        'dismiss_with_prejudice': ['max'],
        'withdraw_petition': ['max'],
        'reference_to_guardian_of_estate_only': ['max'],
        'reference_to_guardian_of_person_only': ['max'],
        'reference_to_guardian_of_person_and_estate': ['max'],
        'chins_case': ['max'],
        'gu_registry_date': [concat],
        'gu_registry_type_permanent': ['max'],
        'gu_registry_type_permanent': ['max'],
        'gu_registry_scope_person_person_and_estate': ['max'],
        'gu_registry_scope_person_only': ['max'],
        'gu_registry_scope_estate_only': ['max'],
        'gu_registry_docket_event': ['max']

    }
).reset_index()
granted_guard_df.columns = ['case_number'] + [c[0] + '_' + c[1] if 'date' in c[0] else c[0] for c in granted_guard_df.columns if c[0] != 'case_number']
granted_guard_df['has_granted_phrase_on_case'] = (granted_guard_df.granted > 0)
granted_guard_df['guardian_mentioned_on_case'] = (granted_guard_df.guardian > 0)
granted_guard_df['perm_guardian_mentioned_on_case'] = (granted_guard_df.perm_guardian > 0)
granted_guard_df['temp_guardian_mentioned_on_case'] = (granted_guard_df.temp_guardian > 0)
granted_guard_df['emergency_petition_mentioned_on_case'] = (granted_guard_df.emergency_petition_phrase > 0)
granted_guard_df['nonemergency_petition_mentioned_on_case'] = (granted_guard_df.nonemergency_petition_phrase > 0)


print('\n\nNumber of cases with `granted`:', granted_guard_df[granted_guard_df.granted > 0].case_number.nunique())
print('Number of cases with `granted` + permanent guardianship mentioned:', 
    granted_guard_df[
        (granted_guard_df.granted > 0)
        & (granted_guard_df.perm_guardian > 0)
    ].case_number.nunique()
)
print('Number of cases with `granted` + temporary guardianship mentioned:', 
    granted_guard_df[
        (granted_guard_df.granted > 0)
        & (granted_guard_df.temp_guardian > 0)
    ].case_number.nunique()
)

# PARSING GUARDIANSHIP with GUARDIAN as PARTY on profile
cases_w_guardian_party = pdf[pdf.party_type == 'Guardian'].case_number.unique().tolist()
cdf_merge['guardian_in_party_info'] = cdf_merge.case_number.isin(cases_w_guardian_party)
cases_w_guardian_party_but_not_in_registry = rdf[rdf.case_number.isin(cases_w_guardian_party) & rdf.court_ir.isnull()]

print('Cases with Guardian as party in docket:', len(cases_w_guardian_party))
print('Cases with Guardian as party but not in registry:', cases_w_guardian_party_but_not_in_registry.shape[0])

cdfm = cdf_merge.merge(granted_guard_df, on='case_number', how='left')
cdfm['petitioner_is_institution'] = cdfm['petitioner_names'].str.lower().str.contains('(center|hospital|elder|adult guardianship program|veterans administration|health|healthcare|care|community|home|services|charities|office|dhss|state|department|dept|institute|foundation|counseling|behavioral|facility|associate|mentor|llc|village|program| inc|county|corp)', regex=True)
cdfm['has_revoked_guardian_ir'] = cdfm.guardian_name_ir.str.contains('Revoke', na=False)

cdfm = cdfm.rename(columns={
    'issue_date_ir': 'decision_date_granted_ir',
    'issue_date_ir_min': 'decision_date_granted_ir_min',
    'raw_case_status': 'case_status_raw',
    'court': 'court_name',
    'c2dp_case_key': 'case_key',
    'clean_case_type': 'case_type',
    'long_denied_date_min': 'decision_date_deny'
})

print('Number of cases without granted phrase/denied docket entry/dismissed docket entry:', cdfm[~cdfm['has_granted_phrase_on_case'] & ~cdfm['gc_denied'] & ~cdfm['gc_dismissed']].shape[0])
print('Number of cases without docket entry/denied docket entry/dismissed docket entry:', cdfm[~cdfm['perm_gc_approved'] & ~cdfm['temp_gc_approved'] & ~cdfm['gc_denied'] & ~cdfm['gc_dismissed']].shape[0])
print('Number of cases that have "Guardian" in party but not in registry or granted phrase:', cdfm[cdfm.guardian_in_party_info & ~cdfm.has_granted_phrase_on_case & cdfm.respondent_name_ir.isnull()].shape[0])

cdfm_types = cdfm.dtypes.to_dict()
for k, v in cdfm_types.items():
    if v.type == np.bool_:
        cdfm[k] = cdfm[k].astype(int)

cdfm.drop(columns=['granted', 'perm_guardian', 'temp_guardian']).to_csv(f'{DATA_PATH}/deliverables/indiana_guardianship.csv', index=False)


Pattern: ('(?:(?:temporary|permanent) )?(?:co)?guardianship (?:(?:is|was) )?granted', False)
Results: 415
Unique matches: ['guardianship granted', 'temporary guardianship is granted', 'guardianship is granted', 'permanent guardianship granted', 'temporary guardianship granted', 'permanent guardianship is granted', 'coguardianship granted', 'guardianship was granted']

Pattern: ('order (?:issued )?(?:for|of|on)? ?(?:appoint|granting|establishing|approving)(?! (?:motion|order|continuance|hearing|agreement|setting)).*?guardians?(hip)?', True)
Results: 24094
Unique matches: ['order granting petition to establish guardianship', 'order appointing emergency temporary guardian', 'order appointing guardian', 'order approving status report guardians', 'order appointing coguardians', 'order granting petition for appointment of temporary guardian', 'order granting petition for appointment of permanent guardian', 'order granting petition for appointment of guardian', 'order granting temporary guar

In [27]:
print('Number of rows in final df: ', cdfm.shape[0])

for p in cdfm.columns:
    print(p)

Number of rows in final df:  9872
case_key
fips
state
county
case_number
last_collected
case_type
date_filed
case_status_raw
last_docket_date
last_docket
court_ir
respondent_name_ir
respondent_type_ir
respondent_birth_year_ir
guardianship_type_ir
guardian_name_ir
guardianship_scope_ir
decision_date_granted_ir
expiration_date_ir
decision_date_granted_ir_min
judge
file_year
approx_age_at_filing
adult_guardianship_docket
minor_guardianship_docket
minor_guardian_registry
minor_guardian_dob
maybe_minor_guardian_case
respondent_name
respondent_attorney
respondent_gender
respondent_race
respondent_count
respondent_concat_names
petitioner_attorney
petitioner_gender
petitioner_race
petitioner_count
petitioner_names
accounting_1
accounting_1_date_max
accounting_1_date_min
death
death1
death1_date_max
death1_date_min
death2
death2_date_max
death2_date_min
death3
death3_date_max
death3_date_min
death4
death4_date_max
death4_date_min
death5
death5_date_max
death5_date_min
death6
death6_date_max
dea

In [28]:
cdfm[cdfm.guardian_name_ir.isnull() & (cdfm.gu_registry_docket_event == 1)].shape

(195, 251)

## Pulling Guardianship Parsing Together

In [29]:
dpdf = cdfm[[
    'case_number', 
    'decision_date_granted_ir', 
    'decision_date_granted_ir_min', 
    'guardianship_type_ir', 
    'temp_granted_phrase_date_min', 
    'nontemp_granted_phrase_date_min', 
    'perm_granted_phrase_date_min', 
    'granted_phrase_date_min', 
    'perm_guardian_inferred', 
    'granted_text', 
    'emergency_petition_mentioned_on_case',
    'nonemergency_petition_mentioned_on_case',
]]

Plan:

temporary petition was filed
non-temp petition was filed
temporary guardianship granted

is non-temp petition ever followed by temp guardianship?
what does temp then permanent look like?

### Perm Guardianship Date Parsing

In [30]:
perm_guardianship = dpdf[(dpdf.guardianship_type_ir == 'Permanent')]

threshold_days = 14
perm_guardianship[perm_guardianship.issue_date_ir_min.between(perm_guardianship.perm_granted_phrase_date_min - timedelta(days=threshold_days), perm_guardianship.perm_granted_phrase_date_min + timedelta(days=threshold_days))].shape

AttributeError: 'DataFrame' object has no attribute 'issue_date_ir_min'

In [None]:
perm_guardianship[~perm_guardianship.perm_granted_phrase_date_min.isnull() | ~perm_guardianship.granted_phrase_date_min.isnull()].shape

In [None]:
perm_guardianship.shape

In [None]:
# why are temp_granted_phrase_date the same as perm date sometimes? B
# Answer: Because retroactively change the date
dpdf[(dpdf.temp_granted_phrase_date_min == dpdf.perm_granted_phrase_date_min)].shape

### Temp Guardianship Date Parsing

In [None]:
temp_guardianship = dpdf[(dpdf.guardianship_type_ir == 'Temporary')]

threshold_days = 14
temp_guardianship[temp_guardianship.issue_date_ir_min.between(
        temp_guardianship.temp_granted_phrase_date_min - timedelta(days=threshold_days), 
        temp_guardianship.temp_granted_phrase_date_min + timedelta(days=threshold_days)
    )
].shape

In [None]:
temp_guardianship[
    temp_guardianship.temp_granted_phrase_date_min.isnull() 
    # & temp_guardianship.nonemergency_petition_mentioned_on_case
]

In [None]:
temp_guardianship.shape

In [None]:
ddf_m[ddf_m.case_number == '05C01-1812-GU-000018']

## Non-Granted Outcomes

## What happens if not granted?

* 03D01-1706-GU-003466: Order Denying ORDER ON PLAINTIFF'S PETITION FOR GUARDIANSHIP OF BURTON FRANKLIN WAS DENIED

* 02D01-1606-GU-000140: foreign guardianship
* 02D01-1612-GU-000291: the Court denies the pending Petition For Appointment of Guardian.
* 02D02-1604-GU-000094: "Motion To Withdraw Petition Of Guardianship" OR "Cause dismissed"
* 02D02-1609-GU-000211: Mentions "petition to terminate guardianship"
* 02D02-1611-GU-000267Petitioner granted 30 days to show cause why this matter should not be dismissed under TR 41(e).


# Important: Quality Assurance

Use each method (registry, exact phrases, and granted/flag) to validate each other.

Registry is definitely correct, but may only cover the most recent verdict.

Questions to answer:

* Are registry cases missing at random? Meaning that we can use ratios of permanent/temporary, phrases, and other insights from registry labeled cases to label missing ones?
* How often do methods agree? When do they disagree? What are the phrases that are wrong?

* In the final dataframe, how often are contradictory verdicts? Dismissed and granted? Denied and granted? Temporary and permanent?

* 

## Are registry cases missing at random?

In [None]:
# cdfm[cdfm.court.isnull()].county.value_counts()/cdfm.county.value_counts()

## Validating w/ Registry Outcomes

In [None]:
perm_registry = cdfm.guardianship_type_ir == 'Permanent'
temp_registry = cdfm.guardianship_type_ir == 'Temporary'

in_registry = perm_registry | temp_registry
has_docket_entry_with_guardian_granted_phrase = cdfm.has_granted_phrase_on_case
guardianship_phrase_in_registry = (in_registry & has_docket_entry_with_guardian_granted_phrase)
guardianship_phrase_but_not_in_registry = (~in_registry & has_docket_entry_with_guardian_granted_phrase)

has_perm_docket_desc = cdfm.perm_gc_approved
has_temp_docket_desc = cdfm.temp_gc_approved
has_granted_docket_desc = (has_perm_docket_desc | has_temp_docket_desc)


print('\nGranted Phrases vs Registry:')
print(pd.crosstab(has_docket_entry_with_guardian_granted_phrase, in_registry))

print('\nGranted Docket Entry vs Registry:')
print(pd.crosstab(has_granted_docket_desc, in_registry))

print('\nGranted Docket Entry vs Granted Phrase:')
print(pd.crosstab(has_granted_docket_desc, has_docket_entry_with_guardian_granted_phrase))

print('\nGranted Temp Docket Entry vs Registry Temp:')
print(pd.crosstab(has_temp_docket_desc, temp_registry))

print('\nGranted Perm Docket Entry vs Registry Perm:')
print(pd.crosstab(has_perm_docket_desc, perm_registry))

print('\nAre petition denials all missing registry entries?')

### Registry says true but no granted phrase on case

In [None]:
cdfm[~cdfm.has_granted_phrase_on_case & in_registry].to_csv(f'{DATA_PATH}/intermediate_datasets/registry_but_no_granted_phrase.csv')

In [None]:
cdfm[~cdfm.has_granted_phrase_on_case & in_registry]

In [None]:
registry_but_no_granted_phrase = cdfm[~cdfm.has_granted_phrase_on_case & in_registry].case_number.unique().tolist()
ddf_m[ddf_m.case_number.isin(registry_but_no_granted_phrase)]

In [None]:
registry_but_no_granted_phrase

In [None]:
ddf_m[ddf_m.combined_docket_event_description.str.startswith('order of appointment')]

### Missing Petition Phrase

In [None]:
# what about the cases that don't have emergency/non-emergency petition filed
ddf_m[
    ~ddf_m.case_number.isin(cases_that_have_reg_petition_filed) 
    & ~ddf_m.case_number.isin(cases_w_emergency_petition) 
    & ddf_m.combined_docket_event_description.str.contains('petition filed')
].drop_duplicates('combined_docket_event_description').shape