In [1]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", 500)
pd.set_option("display.min_rows", 200)
pd.set_option('display.max_colwidth', None)

pd.options.mode.chained_assignment = None
import re 
from datetime import timedelta, datetime
import sys
sys.path.append('../../..')

from src.box import read_box_df

ACL Questions:

* Characteristics of courts and cases, such as which courts adjudicate guardianships, whether specialized judges are assigned these matters, characteristics of petitioners; characteristics of appointed guardians; the incidence and frequency of ordering imposition of guardianship, versus revocation or orders limiting, denying, or revoking plenary guardianship;
* Prevalence of legal representation for guardian, and type of civil legal representation for proposed protected persons;
* What can be understood about the guardianship process from a review of court proceedings, such as the evidence required to impose guardianship; level of involvement of the proposed protected person in the proceeding; alternatives to guardianship proposed or considered; frequency of cases involving revocation or limits to the guardian’s power; and, duration and extent of proceedings prior to imposing a guardianship;
* Guardianship monitoring instructions and/or oversight in the proceedings.


Data Documentation Questions:

* For each state, an inventory and summary of what court docket information and/or data the state produces/releases on guardianship.
* For each state, an inventory and summary of what court docket information and/or data was extracted for this project.
* For each state, a summary of variables that were constructed and how, validation processes completed, and considerations for using/not using variable for subsequent analyses.

In [2]:
DATA_PATH='../data/indiana'
ddf = pd.read_csv(f'{DATA_PATH}/indiana_guardianship_dockets.csv')
cdf = pd.read_csv(f'{DATA_PATH}/indiana_clean_agc_filings.csv')
pdf = pd.read_csv(f'{DATA_PATH}/indiana_guardianship_parties.csv')
rdf = pd.read_csv(f'{DATA_PATH}/indiana_registry_scrape_total.csv')
rdf = rdf.drop(columns=['Unnamed: 0.1', 'Unnamed: 0'])
# cdf_all =  pd.read_csv(f'{DATA_PATH}/indiana_guardianship_cases.csv')

# drop one duplicate case
cdf = cdf[cdf.case_number != '10C018001GU000007']
pdf = pdf[pdf.case_number != '10C018001GU000007']

cdf['case_number'] = cdf['case_number'].str.upper()

# reduce columns
cdf = cdf[['c2dp_case_key', 'fips', 'state', 'county', 'case_number', 'last_collected', 'clean_case_type', 'date_filed', 'raw_case_status', 'last_docket_date', 'last_docket']]
# cdf_all['case_number'] = cdf_all['case_number'].str.upper()
pdf['case_number'] = pdf['case_number'].str.upper()
ddf['case_number'] = ddf['case_number'].str.upper()
rdf['case_number'] = rdf['case_number'].str.upper()

# cleaning duplicate entries (TODO: remove this ugly logic!)
dedup_str = lambda x: '; '.join(list(set([c.strip() for c in x.split(';') if c and c.replace(' ', '')]))) if x and x==x else ''

rdf['guardianship_scope'] = rdf['guardianship_scope'].str.strip().str.replace('\s+', ' ', regex=True).apply(dedup_str)
rdf['issue_date'] = rdf['issue_date'].str.strip().str.replace('\s+', ' ', regex=True).apply(dedup_str)

get_min_issue_date = lambda x: min([datetime.strptime(c, '%m/%d/%Y') for c in x.split('; ')]) if x else None
rdf['issue_date_min'] = rdf['issue_date'].apply(get_min_issue_date)
rdf['expiration_date'] = rdf['expiration_date'].str.strip().str.replace('\s+', ' ', regex=True).apply(dedup_str)

rdf.columns = [
    'case_number', 
    'court_ir', 
    'respondent_name_ir', 
    'respondent_type_ir', 
    'respondent_birth_year_ir', 
    'guardianship_type_ir',
    'guardian_name_ir',
    'guardianship_scope_ir', 
    'issue_date_ir',
    'expiration_date_ir',
    'issue_date_ir_min'
]
rdf.loc[rdf.respondent_birth_year_ir == 'nown', 'respondent_birth_year_ir'] = None
rdf['respondent_birth_year_ir'] = rdf.respondent_birth_year_ir.str.replace('.0', '', regex=False)
rdf.loc[~rdf.respondent_birth_year_ir.isnull(), 'respondent_birth_year_ir'] = rdf[~rdf.respondent_birth_year_ir.isnull()]['respondent_birth_year_ir'].astype(int)

rdf_m = cdf.merge(rdf, on='case_number', how='left')

print('Number of cases in database:', cdf.case_number.nunique())
print('Number of cases in results scrape:', rdf.case_number.nunique())
print('Number of cases not in registry:', rdf.court_ir.isnull().sum())
print('Percentage of missing cases:', round(rdf.court_ir.isnull().sum()/rdf.shape[0]*100))

print('\nTypes of guardianship counts:\n', rdf.guardianship_type_ir.value_counts())

def clean_text(df, colname):
    df[colname] = df[colname].str.lower()
    space_replace_list = ['\n', '\/']
    for sp in space_replace_list:
        df[colname] = df[colname].str.replace(sp, ' ', regex=True)
    df[colname] = df[colname].str.replace("[^A-Za-z\. ]", '', regex=True)
    df[colname] = df[colname].str.replace("\s+", ' ', regex=True)
    df[colname] = df[colname].str.replace(" co\s+gu", ' cogu', regex=True)
    return df[colname].str.strip()

ddf['combined_docket_event_description'] = ddf['docket_event_description'].fillna('') + ' ' + ddf['docket_event_comment'].fillna('')
ddf['combined_docket_event_description'] = clean_text(ddf, 'combined_docket_event_description')
ddf['docket_event_date'] = pd.to_datetime(ddf.docket_event_date)

# fix out of bounds date on subdates and convert to datetime
ddf.loc[ddf.docket_event_subdate == '06/22/2916', 'docket_event_subdate'] = '06/22/2016'
ddf['docket_event_subdate'] = pd.to_datetime(ddf['docket_event_subdate'])

# judge
# NOTE THAT WE ARE REMOVING SUFFIX WHICH MAY IDENTIFY TYPE OF JUDGE
ddf_judge = ddf.copy()
ddf_judge['docket_event_judge'] = ddf_judge['docket_event_judge'].str.replace('( -|- ).*', '', regex=True).str.replace('.', '')
judge_df = ddf_judge[~ddf_judge.docket_event_judge.isnull()].drop_duplicates(['case_number', 'docket_event_judge']).groupby('case_number')['docket_event_judge'].apply(lambda x: '; '.join(x)).reset_index()
judge_df.rename(columns={'docket_event_judge': 'judge'}, inplace=True)
rdf_m = rdf_m.merge(judge_df, on='case_number', how='left')

# FILTER DDF TO ONLY INCLUDE CASES MISSING REGISTRY
cases_not_found_in_registry = rdf[rdf.court_ir.isnull()].case_number.unique().tolist()
ddf_m = ddf[[
    'case_number', 
    'docket_event_date', 
    'docket_event_description', 
    'docket_event_comment', 
    'docket_event_judge', 
    'docket_event_subdate_label', 
    'docket_event_subdate', 
    'combined_docket_event_description'
]].copy()

# cleaning parties
pdf.rename(columns={'party_extconncodedesc': 'party_type'}, inplace=True)
pdf['party_registry_id'] = pdf['party_oans'].str.extract('((?<=Registry\, number\=)\d+)')
pdf['party_race'] = pdf.party_description.str.extract('(Multiracial|Asian|White|Indian|Black|Native Hawaiian or Other Pacific Islander|Other)')
pdf['party_gender'] = pdf.party_description.str.extract('(Male|Female)')

# to add respondent year of birth
pdf_w_yob = pdf[['case_number', 'party_partykey', 'party_type']].merge(rdf[['case_number', 'respondent_birth_year_ir']], on='case_number', how='left')
pdf_w_yob = pdf_w_yob[pdf_w_yob.party_type.isin(['Incapacitated Adult/Child', 'Respondent'])]
pdf_w_yob = pdf_w_yob[~pdf_w_yob.respondent_birth_year_ir.isnull()]
pdf = pdf.merge(pdf_w_yob[['party_partykey', 'respondent_birth_year_ir']], on='party_partykey', how='left')

# for labeling docket entries
vardf = pd.read_csv(f'{DATA_PATH}/indiana_2024_variables.csv')
vardf['value'] = clean_text(vardf, 'value')
var_dicts = vardf.to_dict('records')
var_cols = vardf[~vardf.value.isnull()].varname.unique().tolist()

Number of cases in database: 9855
Number of cases in results scrape: 9861
Number of cases not in registry: 1831
Percentage of missing cases: 19

Types of guardianship counts:
 guardianship_type_ir
Permanent    7208
Temporary     836
Name: count, dtype: int64


In [3]:
def concat(x):
    nonnull = list(set([c for c in x if c and c == c]))
    return '; '.join(nonnull) if nonnull else None

regex_or_concat = lambda lst: "(" + "|".join(lst) + ")"


## Minor Guardianship Cases

In [4]:
# for when explicitly labeled in registry as minor
minor_registry_cases = rdf[rdf.respondent_type_ir == 'Incapacitated  Minor'].case_number.unique().tolist()
print('Number of minor cases (by registry type):', len(minor_registry_cases))

# looking at approx age of respondent for minor guardianships
rdf_m['file_year'] = pd.to_datetime(rdf_m['date_filed']).dt.year
has_birth_year = ~rdf_m.respondent_birth_year_ir.isnull() & (rdf_m.respondent_birth_year_ir != 'nown')
rdf_m.loc[has_birth_year, 'approx_age_at_filing'] = rdf_m.loc[has_birth_year]['file_year'] - rdf_m[has_birth_year]['respondent_birth_year_ir'].astype(int)
minor_dob_cases = rdf_m[rdf_m.approx_age_at_filing < 17].case_number.unique().tolist()
print('Number of minor cases (by registry DOB):', len(minor_dob_cases))

# docket parsing for minor guardianships
ddf_m['minor_guardian'] = (
    ddf_m.combined_docket_event_description.str.contains('(minor guardian|guardian.*minor|(person|estate) of a minor)', regex=True) 
    & ~ddf_m.combined_docket_event_description.str.contains('(expiration|adult)', regex=True)
)
ddf_m['adult_guardian'] = (
    ddf_m.combined_docket_event_description.str.contains('(adult guardian|guardian.* adult)', regex=True) 
)
minor_docket_cases = ddf_m[ddf_m.minor_guardian].case_number.unique().tolist()
adult_docket_cases = ddf_m[ddf_m.adult_guardian].case_number.unique().tolist()
print('Number of minor cases (parsing docket text):', ddf_m[ddf_m.minor_guardian].case_number.nunique())

minor_cases = list(set(minor_registry_cases + minor_docket_cases + minor_dob_cases))
print('Total distinct minor guardianship cases:', len(minor_cases))

rdf_m['adult_guardianship_docket'] = rdf_m.case_number.isin(adult_docket_cases)
rdf_m['minor_guardianship_docket'] = rdf_m.case_number.isin(minor_docket_cases)
rdf_m['minor_guardian_registry'] = rdf_m.case_number.isin(minor_registry_cases)
rdf_m['minor_guardian_dob'] = rdf_m.case_number.isin(minor_dob_cases)
rdf_m['maybe_minor_guardian_case'] = rdf_m.case_number.isin(minor_cases)

# writing to CSV for sending to Punya
# ddf_m[ddf_m.minor_guardian][['case_number', 'raw_case_type', 'date_filed', 'date_closed', 'docket_event_date', 'docket_event_description', 'docket_event_comment', 'combined_docket_event_description']].to_csv(f'{DATA_PATH}/intermediate_datasets/docket_minor_guardianship.csv', index=False)
# rdf[rdf.respondent_type_ir == 'Incapacitated  Minor'].to_csv(f'{DATA_PATH}/intermediate_datasets/registry_minor_guardianship.csv', index=False)
# rdf_m[rdf_m.approx_age_at_filing < 17].to_csv(f'{DATA_PATH}/intermediate_datasets/dob_minor_guardianship.csv', index=False)

Number of minor cases (by registry type): 29
Number of minor cases (by registry DOB): 32
Number of minor cases (parsing docket text): 193
Total distinct minor guardianship cases: 202


## Basic indiana_guardianship_filings Parsing

In [5]:
# output_cdf = rdf_m.rename(columns={
#     'guardian(s)': 'guardian_names', 
#     'issue_date_ir': 'guardianship_issue_date',
#     'expiration_date': 'guardianship_status',
#     'clean_case_type': 'casetype',
#     'raw_case_status': 'casestatus',
#     'court_ir': 'court',
#     'c2dp_case_key': 'case_key',
#     'date_filed': 'datefiled',
#     'judge': 'casejudge',
#     'respondent_birth_year_ir': 'r_birth_year'
# }).copy()

# # output_cdf['guardianship_status'] = output_cdf['guardianship_status'].fillna('Active')
# output_cdf = output_cdf[~output_cdf.case_number.isin(minor_cases)]
# output_cdf = output_cdf[[
#     'case_key',
#     'datefiled',
#     'court',
#     'county',
#     'casejudge',
#     'casetype',
#     # 'casestatus',
#     'r_birth_year'
    
# ]]
# output_cdf

In [6]:
# output_cdf.to_csv(f'{DATA_PATH}/deliverables/indiana_guardianship_filings.csv', index=False)

## Basic indiana_guardianship_parties Parsing

In [7]:
# output_pdf = pdf.rename(columns={
#     'respondent_birth_year_ir': 'party_birth_year',
# })
# output_pdf = output_pdf[~output_pdf.case_number.isin(minor_cases)]
# output_pdf = output_pdf[[
#     'case_key', 
#     'party_type', 
#     'party_name', 
#     'party_gender',
#     'party_race',
#     'party_zip',
#     'party_attorneys',
#     'party_birth_year'
# ]]
# print(output_pdf.case_key.nunique())
# output_pdf.to_csv(f'{DATA_PATH}/deliverables/indiana_guardianship_parties.csv', index=False)

## Merging Party Information onto Case

### Respondent

In [8]:
pdf_respondent = pdf[pdf.party_type.isin(['Incapacitated Adult/Child'])].sort_values(['case_number', 'party_attorneys']).drop_duplicates(['case_number', 'party_name'], keep='first')
pdf_respondent['count'] = pdf_respondent.groupby('case_number')['case_number'].transform('count')
pdf_respondent['concat_names'] = pdf_respondent.groupby('case_number')['party_name'].transform(concat)
pdf_respondent = pdf_respondent.drop_duplicates(['case_number'], keep='first')[['case_number', 'party_name', 'party_attorneys', 'party_gender', 'party_race', 'count', 'concat_names']]
pdf_respondent.columns = ['case_number', 'respondent_name', 'respondent_attorney', 'respondent_gender', 'respondent_race', 'respondent_count', 'respondent_concat_names']
print("Number of cases that have more than 1 incapacitated person:", pdf_respondent[pdf_respondent['respondent_count'] > 1].shape[0])
rdf_m = rdf_m.merge(pdf_respondent, on='case_number', how='left')

Number of cases that have more than 1 incapacitated person: 44


### Petitioner

In [9]:
pdf_petitioner = pdf[pdf.party_type.isin(['Petitioner'])].sort_values(['case_number', 'party_attorneys']).drop_duplicates(['case_number', 'party_name'], keep='first')
pdf_petitioner['count'] = pdf_petitioner.groupby('case_number')['case_number'].transform('count')
pdf_petitioner['concat_names'] = pdf_petitioner.groupby('case_number')['party_name'].transform(concat).apply(dedup_str)
pdf_petitioner['concat_gender'] = pdf_petitioner.groupby('case_number')['party_gender'].transform(concat).apply(dedup_str)
pdf_petitioner['concat_race'] = pdf_petitioner.groupby('case_number')['party_race'].transform(concat).apply(dedup_str)
pdf_petitioner['concat_attorneys'] = pdf_petitioner.groupby('case_number')['party_attorneys'].transform(concat).apply(dedup_str)

pdf_petitioner = pdf_petitioner.drop_duplicates(['case_number'], keep='first')[['case_number', 'concat_attorneys', 'concat_gender', 'concat_race', 'count', 'concat_names']]
pdf_petitioner.columns = ['case_number', 'petitioner_attorney', 'petitioner_gender', 'petitioner_race', 'petitioner_count', 'petitioner_names']
rdf_m = rdf_m.merge(pdf_petitioner, on='case_number', how='left')

## Searching Docket Text

### Phrases from CSV

In [10]:
check_values = lambda val: (ddf_m.docket_event_description.str.lower() == val) | ddf_m.docket_event_comment.str.lower().str.contains(val)
check_partial_values = lambda val: ddf_m.combined_docket_event_description.str.lower().str.contains(val, na=False)
ddf_m['docket_event_date'] = pd.to_datetime(ddf_m.docket_event_date)
for var in var_dicts:
    if var['varname'] and (var['value'] == var['value']):
        try:
            varname = var['varname']
            varname_date = varname + '_date'
            val = var['value'].lower().strip()
            ddf_m[varname] = check_partial_values(val)
            ddf_m[varname_date] = None
            ddf_m.loc[ddf_m[varname], varname_date] = ddf_m[ddf_m[varname]].docket_event_date
            print(var['varname'], var['value'], ddf_m[var['varname']].sum())
        except Exception as e:
            print('ERROR', varname, e)

agg_var_cols = {c: [max] for c in var_cols}
agg_var_cols.update({c + '_date': [min, max] for c in var_cols})

summed_cases = ddf_m.groupby('case_number').agg(agg_var_cols)
summed_cases.columns = [p[0] + '_' + p[1] if 'date' in p[0] else p[0] for p in summed_cases.columns.tolist()]
summed_cases = summed_cases[sorted(summed_cases.columns.tolist())]

keyword_cols = lambda val: [c for c in summed_cases.columns if c.startswith(val) and val + '_' not in c and 'date' not in c]

summed_cases['gc_denied'] = summed_cases['long_denied'] > 0
print('Permanent guardianships denied:', summed_cases['gc_denied'].sum())

summed_cases['perm_gc_approved'] = summed_cases[keyword_cols('long')].sum(axis='columns') > 0
print('Permanent guardianships granted:', summed_cases['perm_gc_approved'].sum())

summed_cases['temp_gc_approved'] = summed_cases[keyword_cols('temp_order')].sum(axis='columns') > 0
print('Temporary guardianships granted:', summed_cases['temp_gc_approved'].sum())

summed_cases['limited_gc_approved'] = summed_cases[keyword_cols('limited_order')].sum(axis='columns') > 0
print('Limited guardianships granted:', summed_cases['limited_gc_approved'].sum())

summed_cases['gc_dismissed'] = summed_cases[keyword_cols('dismiss')].sum(axis='columns') > 0
print('Petition guardianship dismissed:', summed_cases['gc_dismissed'].sum())
 
print('Overlap between granted/denied:', (summed_cases['gc_denied'] & summed_cases['perm_gc_approved']).sum())
print('Overlap between temp and perm AGC orders:', (summed_cases['temp_gc_approved'] & summed_cases['perm_gc_approved']).sum())

# how many neither approved or denied
outcome_cols = ['perm_gc_approved', 'temp_gc_approved', 'limited_gc_approved', 'gc_denied', 'gc_dismissed']
cdf_merge = rdf_m.merge(summed_cases, on='case_number', how='right')

missing_df = cdf_merge.copy()
for c in outcome_cols:
    missing_df = missing_df[~missing_df[c]]

print('Missing disposition count:', missing_df.shape[0])

gal_or_casa gal casa appointed 3600
medical_ physicians report filed 4040
medical1 dr. evaluation 2
medical2 psycho evaluation 1
accounting_1 order approving biennial accounting 2353
p1 verified petition for appointment of adult guardian efiled 1
p2 petition for appointment of a guardian conservator 0
p3 petition for appointment of coguardians over person and estate 89
p4 petition to establish guardianship 47115
p5 petitioning court for permanent guardianship 1
pcon conservatorship order for hearing 1
p_emerg petition for emergency temporary guardianship filed 2194
p_emerg1 motion for interim order 3
p_emerg2 petition for emergency appointment of temporary guardian 43
p_emerg3 request for initial guardianship hearing 1
temp_order appointing temporary guardian over person estate 3
temp_order1 order granting temporary guardianship 4557
temp_order2 acceptance of appointment as temporary guardian 0
temp_order3 order appointing temporary guardian of the person and conservator 1
temp_order4 

### No Longer Incapacitated

In [11]:
guardian_no_longer_needed = ['no longer incapac', 'guardian(ship)? (is|was) no longer needed']

ddf_m['no_longer_incapacitated'] = ddf_m.combined_docket_event_description.str.contains(regex_or_concat(guardian_no_longer_needed), regex=True)

### Terminated Guardianship

In [12]:
# terminate guardianship mentioned
terminate_guardianship_phrase_list = [
    "gu terminated",
    "terminate gu",
    "terminat(e|ing).*guardianship", 
    "guardianship.* terminat"
]
exclude_terminate_guardianship_phrase_list = [
    'order terminating prior guardianships'
]
ddf_m['terminate_phrase_on_case'] = (
    ddf_m.combined_docket_event_description.str.contains(regex_or_concat(terminate_guardianship_phrase_list), regex=True) 
    & ~ddf_m.combined_docket_event_description.str.contains(regex_or_concat(exclude_terminate_guardianship_phrase_list), regex=True) 
    & ~ddf_m.combined_docket_event_description.str.contains('power')
)



In [13]:
regex_or_concat(terminate_guardianship_phrase_list)

'(gu terminated|terminate gu|terminat(e|ing).*guardianship|guardianship.* terminat)'

### Case Created in Error

In [14]:
ddf_m['error'] = ddf_m.combined_docket_event_description.str.contains('case created in error')

### Dismissed Petitions

In [15]:
dismiss_list = [
    "death.*(withdraw|close|end|final|dismiss)",
    "(withdraw|close|end|final|dismiss).*death",
    "issues order on lack of jurisdiction",
    "order issued guardianship is dismissed",
    "order granting motion to withdraw petition for coguardians",
    "granting motoin to withdraw petition",
    "dismisses this cause",
    "order of e dismissal",
    "petition is dismissed",
    "order withdrawing petition",
    "petition withdrawn",
    "order to dismiss petition",
    "dismiss(ed)? the case",
    "order granting motion to withdraw petition",
    "order granting motion to withdraw petition for appointment of a guardian",
    "granting petitioners motion to dismiss",
    "approving withdrawal of petitions",
    "granting (verifi?ed )?(petition|motion) to withdraw (verified )?petition for (the )?(appointment )?(of )?(a )?guardian",
    "order of dismissal",
    "approv(e|ed|ing) withdraw(al)? of petition",
    "(dismiss|withdraw)[\w ]*petition for (appointment of )?guardianship",
    "petition for (appointment of )?guardianship[\w ]*(dismiss|withdraw)",
    "petition to appoint guardian[\w ]*(dismiss|withdraw)",
    "(dismiss|withdraw)[\w ]*petition to appoint guardian",
    "dismissed without prejudice",
    "order dismissing petition and closing case",
    "dismiss[\w ]*with prejudice",
    "motion to withdraw emergency petition",
    "petitioner s motion to dismiss has been filed and accepted by the court",
    "order to dismiss filed on behalf of petitioner",
    "dismiss cause of action",
    "cause is dismissed",
    "withdraw petition to appoint guardian",
    "motion to dismiss per indiana trial rule e",
    "motion to dismiss guardianship",
    "motion for e dismissal",
    "motion for dismissal of guardianship",
    "motion to dismiss petition for appointment of guardian",
    "motion to dismiss petition for appointment of guardian",
    "cause ordered dismissed",
    "cause dismissed",
    "dismissing cause of action",
    "dismisses this cause of action",
    "dismiss(ing)? cause",
    "dismiss(ing)? case",
    "case dismissed",
    "order dismissing guardianship",
    "court removes this (cause|matter) from the active docket",
    "cause removed from active docket",
    "guardianship dismissed",
    "reason dismissal judge?ment",
    "dismiss guardianship",
    "petition for appointment of guardianship and objection to dismissal",
    "order dismissing guardianship petition",
    "motion to dismiss petition for appointment of coguardians ",
    "prospective guardian wishes to dismiss the case",
    "dismiss the appointment of temporary guardian",
    "voluntary dismissal of his petition for guardianship",
    "order issued on dismissal of guardianship petition",
    "order of dismissal of guardianship petition",
    "order issued dismissing guardianship",
    "petition to dismiss petition for guardianship granted",
    "voluntary dismissal of petition for appointment of guardian",
    "order granting motion to dismiss and vacating hearing on petition to appoint guardian",
    "dismiss(ing)? petition to appoint guardian",
    "motion to dismiss petition for guardianship",
    "order granting motion to dismiss",
    "petition to dismiss petition for appointment of guardian",
    "order granting motion to dismiss hearing and guardianship",
    "motion to dismiss verified petition for appointment of guardian",
    "dismiss petition for appointment of guardian",
    "dismissing petition for appointment of guardian",
    "grants dismissal of guardianship",
    "order granting motion to strike",
    "motion for voluntary dismissal",
    "order granting motion to dismiss petition to appoint guardian",  
    "order granting withdraw of pettion",
    "for the purpose of dismissing case",
    "motion to dismiss petition to appoint guardian",
    "dismissing petition for guardianship",
    "dismiss her petition for guardianship",
    "dismisses petition for guardianship",
    "motion to dismiss the petition for appointment of permanent coguardians",
    "motion to dismiss filed withdrawal of guardianship petition",
    "order dismissing emergency petition for guardian",
    "guardianship petition dismissed",
    "order dismissing petition to establish guardianship",
    "dismiss petition for apptmt of a guardian",
    "motion to dismiss and withdraw petition for appointment of guardian",
    "motion to dismiss petition for appointment of permanent guardian",
    "petition for coguardians over an adult filed is dismissed",
    "motion to dismiss the guardianship without prejudice",
    "motion to dismiss petition to establish guardianship",
    "motion to dismiss petition for appointment of (temporary|permanent)? guardian",
    "order of dismissal another court currently has a pending guardianship proceeding",
    "motion for voluntary withdrawal and dismissal of guardianship",
    "order granting petitioners voluntary motion to dismiss",
    "motion to dismiss verified petition for temporary appointment of guardianship",
    "order of dismissal ward passed away",
    "order dismissing petitioners petition for appointment of guardian",
    "motion to dismiss petition for temporary and permanent guardianship",
    "motion to dismiss emergency guardianship",
    "matter (is )?ordered closed",
    "dismissal under trial rule",
    "for the purpose of dismissing case",
    "order granting petition to withdraw guardianship petition",
    "order granting motion to withdraw motion for coguardianship",
    "order granting motion to dimiss",
    "court now dismisses the emergency petition to appoint temporary and permanent guardian"
]
exclude_dismiss_list = [
    "dismiss hearing",    
]
ddf_m['dismissed_phrase_on_case'] = (
    ddf_m.combined_docket_event_description.str.contains(regex_or_concat(dismiss_list), regex=True)
    & ~ddf_m.combined_docket_event_description.str.contains(regex_or_concat(exclude_dismiss_list), regex=True) 
)


GENERAL_PHRASE= regex_or_concat(['guardian[\w ]*dismiss', 'dismiss[\w ]*guardian'])
dismiss_variants = ddf_m[
    ddf_m.combined_docket_event_description.str.contains(GENERAL_PHRASE, regex=True)
    & ~ddf_m.combined_docket_event_description.str.contains(regex_or_concat(dismiss_list), regex=True)
].drop_duplicates('combined_docket_event_description') 

print('Dismiss variants excluding dismiss_list matches:', dismiss_variants.case_number.nunique())

# dismiss_variants[['case_number', 'docket_event_description', 'docket_event_comment', 'combined_docket_event_description', 'docket_event_date',  'docket_event_judge', 'docket_event_subdate_label', 'docket_event_subdate']]
ddf_m[ddf_m.dismissed_phrase_on_case][[
    'case_number', 
    'docket_event_description', 
    'docket_event_comment', 
    'combined_docket_event_description', 
    'docket_event_date'
]].to_csv('dismissal_docket_entries.csv')

Dismiss variants excluding dismiss_list matches: 175


In [16]:
ddf_m[ddf_m.dismissed_phrase_on_case][[
    'case_number', 
    'docket_event_description', 
    'docket_event_comment', 
    'combined_docket_event_description', 
    'docket_event_date'
]].case_number.nunique()

1399

### Foreign Guardianship

In [17]:
ddf_m['foreign_guardianship'] = ddf_m.combined_docket_event_description.str.contains('foreign (co)?guardian', regex=True)

### Ex Parte Petitions

In [18]:
ddf_m[ddf_m.combined_docket_event_description.str.contains('ex ?parte', regex=True)]

Unnamed: 0,case_number,docket_event_date,docket_event_description,docket_event_comment,docket_event_judge,docket_event_subdate_label,docket_event_subdate,combined_docket_event_description,minor_guardian,adult_guardian,gal_or_casa,gal_or_casa_date,medical_,medical__date,medical1,medical1_date,medical2,medical2_date,accounting_1,accounting_1_date,p1,p1_date,p2,p2_date,p3,p3_date,p4,p4_date,p5,p5_date,pcon,pcon_date,p_emerg,p_emerg_date,p_emerg1,p_emerg1_date,p_emerg2,p_emerg2_date,p_emerg3,p_emerg3_date,temp_order,temp_order_date,temp_order1,temp_order1_date,temp_order2,temp_order2_date,temp_order3,temp_order3_date,temp_order4,temp_order4_date,temp_order5,temp_order5_date,temp_order6,temp_order6_date,temp_order7,temp_order7_date,temp_order8,temp_order8_date,limited_order1,limited_order1_date,limited_order2,limited_order2_date,limited_order3,limited_order3_date,dismiss_temp,dismiss_temp_date,long1,long1_date,long2,long2_date,long3,long3_date,long5,long5_date,long6,long6_date,long7,long7_date,long8,long8_date,long9,long9_date,long10,long10_date,long11,long11_date,long12,long12_date,long13,long13_date,long15,long15_date,longcons1,longcons1_date,long_denied,long_denied_date,dismiss1,dismiss1_date,dismiss2,dismiss2_date,dismiss3,dismiss3_date,dismiss4,dismiss4_date,dismiss5,dismiss5_date,death,death_date,death1,death1_date,death2,death2_date,death3,death3_date,death4,death4_date,death5,death5_date,death6,death6_date,error,error_date,no_longer_incapacitated,terminate_phrase_on_case,dismissed_phrase_on_case,foreign_guardianship
8563,49D08-2112-GU-042095,2022-01-18,Notice Issued to Parties,Notice of Ex Parte Correspondence,,Date 2,NaT,notice issued to parties notice of ex parte correspondence,False,False,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,False,False,False
9854,85C01-2205-GU-000017,2019-04-30,Notice Issued,The Court has received Correspondence from Pro Se Co-Guardian Tammy Bilby. The Court cannot take action in response to this correspondence because it is not in a proper format and it has been transmitted ex parte.,,Date 2,NaT,notice issued the court has received correspondence from pro se coguardian tammy bilby. the court cannot take action in response to this correspondence because it is not in a proper format and it has been transmitted ex parte.,False,False,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,False,False,False
14153,10C01-1602-GU-000032,2016-03-03,Order Granting Temporary Guardianship,Order GRANTING Petition For Appointment of Ex Parte Temporary Guardian and directing Notice to Alleged Incompetent and Other Interested Persons. M. Lorch to distribute copies.,"Adams, Andrew",Order Signed,2016-03-01,order granting temporary guardianship order granting petition for appointment of ex parte temporary guardian and directing notice to alleged incompetent and other interested persons. m. lorch to distribute copies.,False,False,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,True,2016-03-03 00:00:00,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,False,False,False
14293,22C01-1705-GU-000045,2017-05-23,Petition for Emergency/Temporary Guardianship Filed,Verified Ex Parte Petition for Appointment of Temporary Guardian for Incapacitated Person on Finding of Emergency,,File Stamp,2017-05-22,petition for emergency temporary guardianship filed verified ex parte petition for appointment of temporary guardian for incapacitated person on finding of emergency,False,False,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,True,2017-05-23 00:00:00,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,False,False,False
14305,22C01-1705-GU-000045,2017-12-01,Petition Filed,Verified Ex Parte Petition for Reappointment of Temporary Guardian for Incapacitated Person on Finding of Emergency,,File Stamp,2017-12-01,petition filed verified ex parte petition for reappointment of temporary guardian for incapacitated person on finding of emergency,False,False,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,False,False,False
17750,49D08-2101-GU-001389,2021-05-06,Notice Issued to Parties,Notice of Ex Parte Correspondence,,Date 2,NaT,notice issued to parties notice of ex parte correspondence,False,False,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,False,False,False
17762,49D08-2101-GU-001389,2021-05-19,Notice Issued to Parties,Notice Regarding Ex Parte Correspondence,,Date 2,NaT,notice issued to parties notice regarding ex parte correspondence,False,False,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,False,False,False
17766,49D08-2101-GU-001389,2021-05-21,Notice Issued to Parties,Notice of Ex Parte Correspondence,,Date 2,NaT,notice issued to parties notice of ex parte correspondence,False,False,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,False,False,False
17797,49D08-2101-GU-001389,2023-06-05,Notice Issued to Parties,Notice of Ex parte Correspondence,,Date 2,NaT,notice issued to parties notice of ex parte correspondence,False,False,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,False,False,False
17798,49D08-2101-GU-001389,2023-06-05,Order Issued,Regarding Ex Parte Correspondence,"Kendrick, Melanie - M",Order Signed,2023-06-05,order issued regarding ex parte correspondence,False,False,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,,False,False,False,False


### Revoked Guardianship

In [19]:
keep_cols = [
    'case_number', 
    'docket_event_description', 
    'docket_event_comment', 
    'combined_docket_event_description', 
    'docket_event_date'
]

In [20]:
exclude_revoke_list = [
    "powers? of attorney",
    "poa",
    "attorneyinfact",
    'revoke drivers license',
    "p.o.a."
]

revoke_list = [
    'letters of guardianship are revoked',
    'guardianship letters are revoked',
    'guardianship letters revoked',
    'terminating temporary guardianship and revoking letters',
    'revoking letters of guardianship',
    'revokes letters of guardianship',
    'guardianship over person and estate letters revoked',
    'granting petition to revoke coguardians letters',
    'guardianship is terminated and letters are revoked',
    'guardian released letters oath revoked',
    'letters revoked',
    'revoking letters',
    'letters oath revoked',
    # '',
    # '',
    # '',
    # '',
    # '',
    # '',
    # '',
]

GENERAL_PHRASE= regex_or_concat(['revoke', 'revoking'])
revoke_variants = ddf_m[
    ddf_m.combined_docket_event_description.str.contains(GENERAL_PHRASE, regex=True)
    & ~ddf_m.combined_docket_event_description.str.contains(regex_or_concat(revoke_list), regex=True)
    & ~ddf_m.combined_docket_event_description.str.contains(regex_or_concat(exclude_revoke_list), regex=True)
].drop_duplicates('combined_docket_event_description') 

print('Revoke variants excluding dismiss_list matches:', revoke_variants.case_number.nunique())

revoke_variants[keep_cols]

ddf_m['revoked_phrase_on_case'] = (
    ddf_m.combined_docket_event_description.str.contains(regex_or_concat(revoke_list), regex=True)
    & ~ddf_m.combined_docket_event_description.str.contains(regex_or_concat(exclude_revoke_list), regex=True) 
)
ddf_m[ddf_m.revoked_phrase_on_case][[
    'case_number', 
    'docket_event_description', 
    'docket_event_comment', 
    'combined_docket_event_description', 
    'docket_event_date'
]].to_csv('revoked_docket_entries.csv')

Revoke variants excluding dismiss_list matches: 23


### Petition Denials

In [21]:

denial_list = [
    "order denying petition to establish guardianship",
    "court denies the pending petition for appointment of guardian",
    "order denying motion for appointment",
    "guardianship is denied",
    "no need for appt. of gdn",
    "petition to appoint guardian denied",
    "court finds that the petition does not meet the criteria for a guardian",
    "petition for guardianship[\w ]*denied",
    "denying petition for appointment of guardian",
    "order denying petition for emergency temporary guardianship",
    "order denying petition for[\w ]*guardianship",
    "order on petition for appointment of guardian entered as per signed entry. the petition is denied",
    "denying petition to appoint (temporary|permanent)? (co)?guardian",
    "order denying petition for appointment of (temporary|permanent)? (co)?guardian",
    "the court denied the petition to be appointed guardians",
    "order denying petition for appointment of temporary guardian",
    "order denying guardianship",
    "court denied petition for guardianship",
    "order denying the petition for appointment of guardian",
    "order denying petition for guardian",
    "order denying verified petition for appointment of guardian",
    "denies petitioners request for appointment of guardian",
    "order denying guardianship issued",
    "order issued denying petition for appointment of guardian",
    "guardianship is not established",
    "matter is denied"

    # "",
    # "",
    
]
exclude_denial_list = [
    "ad litem",
    "terminate"
]

ddf_m['denied_phrase_on_case'] = (
    ddf_m.combined_docket_event_description.str.contains(regex_or_concat(denial_list), regex=True)
    & ~ddf_m.combined_docket_event_description.str.contains(regex_or_concat(exclude_denial_list), regex=True) 
)

GENERAL_PHRASE= regex_or_concat(['den(y|ied).*(appoint)?.*guardian'])
deny_variants = ddf_m[
    ddf_m.combined_docket_event_description.str.contains(GENERAL_PHRASE, regex=True)
    & ~ddf_m.combined_docket_event_description.str.contains(regex_or_concat(denial_list), regex=True)
].drop_duplicates('combined_docket_event_description') 

print('Unique variations remaining:', deny_variants.shape[0])
# deny_variants[['case_number', 'docket_event_description', 'docket_event_comment', 'combined_docket_event_description', 'docket_event_date',  'docket_event_judge', 'docket_event_subdate_label', 'docket_event_subdate']]
ddf_m[ddf_m.denied_phrase_on_case][[
    'case_number', 
    'docket_event_description', 
    'docket_event_comment', 
    'combined_docket_event_description', 
    'docket_event_date'
]].to_csv('denied_docket_entries.csv')

Unique variations remaining: 105


In [22]:
regex_or_concat(denial_list)

'(order denying petition to establish guardianship|court denies the pending petition for appointment of guardian|order denying motion for appointment|guardianship is denied|no need for appt. of gdn|petition to appoint guardian denied|court finds that the petition does not meet the criteria for a guardian|petition for guardianship[\\w ]*denied|denying petition for appointment of guardian|order denying petition for emergency temporary guardianship|order denying petition for[\\w ]*guardianship|order on petition for appointment of guardian entered as per signed entry. the petition is denied|denying petition to appoint (temporary|permanent)? (co)?guardian|order denying petition for appointment of (temporary|permanent)? (co)?guardian|the court denied the petition to be appointed guardians|order denying petition for appointment of temporary guardian|order denying guardianship|court denied petition for guardianship|order denying the petition for appointment of guardian|order denying petition f

### Withdrawn Petitions

In [23]:
withdraw_petition_list = [
    "order withdrawing emergency petition",
    "withdraw emergency petition",
    "withdraw.*petition.*guardian",
    "petition.*withdraw.*guardian",
    "withdraw petition for appointment of guradian",
    "withdrawal petition for guarrdianship",
    "motion to withdraw guardianship petition",
    "motion to withdraw emergency verified petition",
    "withdrawing petition for appointment of temporary coguradians",
    "withdrawing guardianship petition",
    "dismissal of guardianship entered. petition withdrawn",
    "motion to withdraw emergency temporary guardianship petition",
    "requesting the guardianship petition be withdrawn",
    "granting motion to withdraw petition and dismissing case",
    "filed withdrawal of guardianship petition",
    "order issued petition withdrawn. cause dismissed without prejudice"
]
exclude_withdraw_petition_list = [
    'petition for authority t transfer',
    'substitute petition for appointment of guardian over person and estate',
    'withdraw as guardian|third party independent guardian to be appointed',
    'funds',
    'appearance',
    'change residence',
    'protective order',
    'leave',
    'terminate',
    'counsel',
    'ad litem',
    'gal'
]
ddf_m['withdraw_petition'] = ~ddf_m.combined_docket_event_description.str.contains(regex_or_concat(exclude_withdraw_petition_list), regex=True) & ddf_m.combined_docket_event_description.str.contains(regex_or_concat(withdraw_petition_list), regex=True)

### Emergency Petitions

In [24]:
emergency_petition_phrases = [
    
    # "EMERGENCY"
    'emergency petition for appointment of guardian',
    'emergency petition for temporary guardianship',
    'emergency petition to appoint temporary guardian',
    'emergency petition for guardianship', 
    'emergency petition for guardian',
    'emergency petition for temporary appointment of guardian',
    'emergency petition for appointment of temporary guardian',
    'petition for appointment of emergency guardianship',
    'petition for appointment of emergency temporary guardian',
    'petition for the emergency appointment of guardian',
    'petition for emergency temporary guardianship',
    'petition for emergency appointment of temporary guardian',
    'petition for emergency appointment of guardian',
    'petition for immediate tempory guardianship',
    'petition for emergency temporary appointment of guardian',
    'petition for emergency guardianship',
    'petition for guardianship emergency',
    'petition for temporary emergency guardianship',
    'petition for the appointment of temporaray emergency guardian',
    'petition to establish guardianship filed emergency',
    'petition for emergency gu',
    'emergency temporary gu',
    'emergency petition for appointment of successor guardian',
    'petition for appointment of emergency and ongoing guardian',
    'appointment of an emergency temporary guardian',
    'emergency petition for appointment of coguardians',
    'petition for appointment of emergency temporary guardina',
    'emergency petition for appointment of temporary coguardians',
    'temporary appointment of an emergency guardian',
    'petition for appointment of temporary emergency guardian',
    'petition for emergency temporary appointment of coguardians',
    'petition for appointment of emergency coguardians',
    'petition for temporary appointment of emergency coguardians',
    'petition for appointment of emergency temp guardian',
    'petition for appointment of emergency guardian',
    'petition for emergency appointment of coguardians',
    'petition ofr emergency appointment of coguardian',
    'petition for emergency appoinment of temporary guardian',
    'petition for the appointment of emergency guardianship',
    'petition for the emergency appointment of a temporary guardian',
    'emergency petition for temp gu',
    'petition for the appointment of temporary emergency guardian',
    'petition for appointment of emergency temporary limited guardian',

    # TEMPORARY (w/o "emergency")
    'petition for immediate temporary guardianship',
    'petition for appointment of temporary guardian',
    'petition for temporary guardian',
    'petition for appointment of immediate temporary guardian',
    'petition for the appointment of temporary guardian',
    'petition for extension of temporary guardian',
    'petition for temp guardianship',
    'petition for immediate appointment of temporary guardian',
    'petition for appointment of temp guardian',
    'petition filed for temporary appointment of guardian',
    'petition for appointment of ex parte temporary guardian'
    'petition for appointment of temporary gu',
    'petition for reappointment of temporary guardian',
    'petition for appointment of temporary coguardians',
    'petition to appoint temp. coguardians',
    'petition to appoint temporary coguardians',
    'petition to establish guardianship temporary guardianship',
    'temporary petition for guardianship',
    'petition for temporary legal guardianship',
    'pet for appointment of temporary guardian',
    'petition for extension of temporary emergency guardianship',
    'emergency petition for appointment of temp coguardians',
    'petition to appoint temporary guardian',
    'petition to establish temporary guardianship',
    'petition to appoint temporary guardian',
    'petition filed for appointment of temporary guardian ',
    'petition for appointment of temporary coguardian',
    'temporary guardianship petition',
    'petition for ex parte order appointing temporary guardian',    
    'petition to establish guardianship temporary',
    'petition for appointment of guardian ad litem and temporary guardian of person',
    'petition for appointment of temporary gu',
    'petition for appointment of temporary appointment of coguardians',
    'petition and for appointment of temporary guardian',
    'petition for appointment of limited and temporary guardian',
    'petition appointing temporary coguardianship',
    'petition on temporary guardianship',
    'petition to establish guardianship filed for appointment of temporary guardianship',
    'temporary guardian of estate and person filed',
    'petition for health care order',
    'motion for appt of temporary guardian',
    
    # combo petition for perm and temp
    'petition for appointment of permanent temporary guardianship',
    'petition for emergency appointment of temporary and permanent coguardians',
    'petition for temporary and permanent guardian',
    'appointment of emergency temporary and permanent guardianship',
    'petition for appointment of temporary and permanent coguardians',     
    'petition for appointment of temporary and permanent guardian',
    'petition filed appointment of temporary and permanent guardian',
    'petition for appointment of termporary and permanent guardian',
    'petition for emergency temporary and permanent guardian',   
    'petition for appointment of temporary and permanent gu',
    'petition for appointment of temporary guardian and petition for appointment of permanent guardian',
    'petition to appoint temporary and permanent guardian'

]
ddf_m['emergency_petition_phrase'] = ddf_m.combined_docket_event_description.str.contains('(' + '|'.join(emergency_petition_phrases) + ')', regex=True)
cases_w_emergency_petition = ddf_m[ddf_m['emergency_petition_phrase']].case_number.unique().tolist()

# this displays the distinct variants of form "petition.... emergency..." that are *excluded* 
# from being considered an emergency petitoin
emerg_variants = ddf_m[
    ddf_m.combined_docket_event_description.str.contains('petition [\w ]*temp', regex=True)
    & ~ddf_m.combined_docket_event_description.str.contains(regex_or_concat(emergency_petition_phrases), regex=True)
].drop_duplicates('combined_docket_event_description') 

print('Distinct variants excluding phrases above:', emerg_variants.shape[0])

Distinct variants excluding phrases above: 389


### Permanent Petitions

In [25]:
petition_filed_phrases = [
    'petition for appointment of coguardians',
    'petition to establish guardianship',
    'petition for guardianship',
    'petition for appointment of (a )?(permanent )?guardian',
    'petition to appoint cogu',
    'petition for the appointment of guardian',
    'petition for the appointment of a guardian',
    'petition to appoint coguardians',
    'petition for permanent guardian',
    'petition to appoint guardian',
    'petition for order appointing guardian',
    'petitionf or appointment of guardian',
    'petition for appointment of permanent coguardians',
    'petition filed for appointment of guardian',
    'petition for permanent guardian of person filed',
    'petition for permanent guardian of estate and person filed',
    'petition to convert temporary guardianship to permanent guardianship',

    # combo petition for perm and temp
    'petition for appointment of permanent temporary guardianship',
    'petition for emergency appointment of temporary and permanent coguardians',
    'petition for temporary and permanent guardian',
    'appointment of emergency temporary and permanent guardianship',
    'petition for appointment of temporary and permanent coguardians',     
    'petition for appointment of temporary and permanent guardian',
    'petition filed appointment of temporary and permanent guardian',
    'petition for emergency temporary and permanent guardianship',
    'petition for appointment of temporary and permanent gu',
    'petition for appointment of temporary guardian and petition for appointment of permanent guardian',
    'petition for appointment of termporary and permanent guardian',
    'petition to appoint temporary and permanent guardian'

]
ddf_m['nonemergency_petition_phrase'] = (
    ddf_m.combined_docket_event_description.str.contains('(' + '|'.join(petition_filed_phrases) + ')', regex=True)
    & (~ddf_m.combined_docket_event_description.str.contains('(emergency|temp)', regex=True) | ddf_m.combined_docket_event_description.str.contains('(permanent)', regex=True))
    & ~ddf_m.combined_docket_event_description.str.contains('(ad litem|health care order)', regex=True)
)

cases_that_have_reg_petition_filed = ddf_m[ddf_m['nonemergency_petition_phrase']].case_number.unique().tolist()

### Transfer

In [26]:
transfer_phrases = ['transfer of case', 'case transfer', 'case venued out', 'cause transfer']
ddf_m['transfer_phrase_on_case'] = ddf_m.combined_docket_event_description.str.contains(regex_or_concat(transfer_phrases), regex=True)

### Death

In [27]:
death_inclusion_phrases = [
    'death of ward',
    'death of protected person',
    'death of the protected person',
    'protected persons death',
    'wards death',
    'incapacitated adult has deceased',
    'death of incapacitated person',
    'death certificate',
    'death certificate filed',
    'incapacitated adult is now deceased',
    'incapacitated adult minor is now deceased',
    'ward is now deceased',
    'death of the ward',
    'terminate guardianship.*(death|deceased)',
    '(death|deceased).*terminate guardianship',
    'protected party passed away',
    'ward passed away',
    'dismiss guardianship.*(death|deceased)',
    '(death|deceased).*dismiss guardianship',
    'affidavit of death',
    'obituary'
]

death_exclusion_phrases = [
    'deceased guardian',
    'deceased coguardian',
    'guardians death', 
    'coguardians death',
    'death of coguardian',
    'death of interested person',
]
ddf_m['contains_death_or_deceased'] = ddf_m.combined_docket_event_description.str.contains('(death|deceased|passed away|funeral|obituary)')

death_inclusion_phrases_concat = '(' + '|'.join(death_inclusion_phrases) + ')'
ddf_m['death_inclusion_method'] = ddf_m.contains_death_or_deceased & ddf_m.combined_docket_event_description.str.contains(death_inclusion_phrases_concat, regex=True)

death_exclusion_phrases_concat = '(' + '|'.join(death_exclusion_phrases) + ')'
ddf_m['death_phrase_on_case'] = ddf_m.contains_death_or_deceased & ~ddf_m.combined_docket_event_description.str.contains(death_exclusion_phrases_concat, regex=True)

ddf_m[ddf_m.contains_death_or_deceased][[
    'case_number', 
    'docket_event_description', 
    'docket_event_comment', 
    'combined_docket_event_description', 
    'docket_event_date'
]].to_csv('death_docket_entries.csv')

### Guardianship Scope

#### Person Only

In [28]:
ddf_m['reference_to_guardian_of_person_only'] = (
    ddf_m.combined_docket_event_description.str.contains("guardians?(hip)? of (the )?person", regex=True)
    & ~ddf_m.combined_docket_event_description.str.contains("estate")
    & ~ddf_m.combined_docket_event_description.str.contains("petition")
)


#### Estate Only

In [29]:
ddf_m['reference_to_guardian_of_estate_only'] = (
    ddf_m.combined_docket_event_description.str.contains("guardians?(hip)? of (the )?estate", regex=True)
    & ~ddf_m.combined_docket_event_description.str.contains("(petition|person)")
)

#### Person and Estate

In [30]:
ddf_m['reference_to_guardian_of_person_and_estate'] = (
    ddf_m.combined_docket_event_description.str.contains("(guardians?(hip)? of (the )?person and estate|guardians?(hip)? of estate and person)", regex=True)
    & ~ddf_m.combined_docket_event_description.str.contains("petition")
)

### CHINS

In [31]:
ddf_m['chins_case'] = ddf_m.combined_docket_event_description.str.contains("chins yes", regex=True)

### Transmit to GU Registry

In [32]:
transmit_to_gu_registry = ddf_m.combined_docket_event_description.str.startswith("added to transmit to gu registry")
ddf_m['gu_registry_docket_event'] = transmit_to_gu_registry

ddf_m['gu_registry_docket_event_date'] = None
ddf_m.loc[transmit_to_gu_registry, 'gu_registry_docket_event_date'] = ddf_m[transmit_to_gu_registry].docket_event_date
ddf_m['gu_registry_date'] = ddf_m.docket_event_comment.str.extract('Date of Order\: ([\d\-\/]+)')
ddf_m['gu_registry_type_permanent'] = transmit_to_gu_registry & ddf_m.docket_event_comment.str.contains('Permanent')
ddf_m['gu_registry_type_temporary'] = transmit_to_gu_registry & ddf_m.docket_event_comment.str.contains('Temporary')
ddf_m['gu_registry_scope_person_person_and_estate'] = (
    transmit_to_gu_registry 
    & ddf_m.docket_event_comment.str.contains('Guardian of Person and Estate')
)
ddf_m['gu_registry_scope_person_only'] = (
    transmit_to_gu_registry 
    & ddf_m.docket_event_comment.str.contains('Guardian of Person')
    & (ddf_m.docket_event_comment.str.contains('Estate') == False)
)
ddf_m['gu_registry_scope_estate_only'] = (
    transmit_to_gu_registry 
    & ddf_m.docket_event_comment.str.contains('Guardian of Estate')
    & (ddf_m.docket_event_comment.str.contains('Person') == False)
)


# transmit_to_gu_registry_df = ddf_m[
#     & ddf_m.case_number.isin(cdfm[cdfm.guardian_name_ir.isnull()].case_number.unique().tolist())
# ]
# print(transmit_to_gu_registry_df.shape)
# transmit_to_gu_registry_cases = transmit_to_gu_registry_df.case_number.unique().tolist()
# transmit_to_gu_registry_df

### Flagging Temporary/Permanent Guardian + "Granted Phrase" Approach

In [80]:
# flag explicit mention of temporary or permanent guardian
ddf_m['temp_guardian'] = ddf_m.combined_docket_event_description.str.contains('(health ?care representative appointed|temp(orary)?[\w ]* (co)?guardian|extension of guardianship)', regex=True)
ddf_m['perm_guardian'] = ddf_m.combined_docket_event_description.str.contains('perm(anent)?[\w ]* (co)?guardian', regex=True)
ddf_m['guardian'] = ddf_m.combined_docket_event_description.str.contains('(co)?guardians? ', regex=True)

explicit_granted_list = [
    ("(?:(?:temporary|permanent) )?(?:co)?guardianship (?:(?:is|was) )?granted", False),
    ("grant(?:s|ed|ing) (?:(?:permanent|temporary) )?(?:co)?guardians?hip", False),
    ("letters of (?:(?:permanent|temporary) )?guardianship (?:issued|sent|filed|ordered)", False),
    ("letters of (?:permanent|temporary)? ?gaurdianship", False),
    ("order granting petition to establish guardianship", False),
    ("order granting temporary guardianship", False),
    ("order approving order of (?:temporary|permanent) ?guardianship", False),
    ("order appointing (?:temporary|permanent)? ?guardian", False),
    ("petition for guardianship. evidence heard. granted", False),
    ("(?:is|are) (?:hereby )?appoint(?:s|ed|ing)? (?:as )?(?:the )?(?:said )?(?:temporary|permanent)? ?(?:co)?guardian", False),
    ("court grants guardianship", True),
    ('order granting extension of guardianship', False),
    ("grants the petition to establish guardianship", False),
    ("ct\.? appts\.?[\w ]*gdn", False),
]
implied_granted_list = [ 
    ("order (?:issued )?(?:for|of|on)? ?(?:appoint|granting|establishing|approving)(?! (?:motion|order|continuance|hearing|agreement|setting|transfer)).*?guardians?(hip)?", True), 
    ("(?:petition for|appointment of) guardian[\w ]+granted", True),
    ("appoint[\w ]* as (?:temporary|permanent)? ?(?:co)?guardian", False),
    ('added to transmit to gu registry.*(temporary|permanent)', False),
    ("(health ?care representative appointed|order appointing healthcare rep)", False),
    ("order of appointment[\w ]*guardian", False),
    ("order granting motion for appointment", False),
    
    # maybe should remove
    ("successor guardian", True),
    ("(?:guardian status report filed|status report filed by guardian|order approving status report)", False),
]
granted_list = explicit_granted_list + implied_granted_list

def turn_regex_to_file_name(text):
    return re.sub('[^\w ]', '', text).replace(' ', '_')
    
# look at "guardian physician, ________"
ddf_m['granted'] = False
ddf_m['granted_text'] = None

exclude_tighter_constraints = "(^petition to|^oath filed|expedited hearing|not granted|reject|release|expenditure|accounting|terminat|fees|proposed|assets|schedul|real estate|deny|denie|withdraw|dismiss|ad litem| gal |oath of.*guardian appointed|setting hearing|for hearing|hear\.|final accounting|subject to being)"
exclude_looser_constraints = "(^petition to|^oath filed|revised|ad litem| gal |oath of.*guardian appointed|dismiss|terminat|denie|deny)"
for g in granted_list:
    if g[1]:
        query = (
            ddf_m.combined_docket_event_description.str.contains(g[0], regex=True)
            & ~ddf_m.combined_docket_event_description.str.contains(exclude_tighter_constraints, regex=True)
        )
    else:
        query = (
            ddf_m.combined_docket_event_description.str.contains(g[0], regex=True)
            & ~ddf_m.combined_docket_event_description.str.contains(exclude_looser_constraints, regex=True)
        )
    print('\nPattern:',  g)
    print('Results:', query.sum())
    ddf_m.loc[query, 'granted'] = True
    
    query_matches = ddf_m.loc[query].combined_docket_event_description.str.extract("(" + g[0] + ")")
    
    print('Unique matches:', query_matches[0].unique().tolist()[:10])
    ddf_m.loc[query, 'granted_text'] = query_matches[0]
    ddf_m.drop_duplicates('granted_text')[['case_number', 'combined_docket_event_description', 'granted_text']].to_csv(f'{DATA_PATH}/intermediate_datasets/{turn_regex_to_file_name(g[0])}.csv', index=False)

ddf_m[['granted_phrase_date', 'perm_granted_phrase_date', 'temp_granted_phrase_date', 'nontemp_granted_phrase_date']] = None

# sometimes "order filed" or "file stamp" on event differ from the date associated to the event, so we replace
# the docket date with these alternate (more accurate) 
ddf_m['adjusted_event_date'] = ddf_m['docket_event_date']
should_overwrite_date = ~ddf_m.docket_event_subdate.isnull() & ddf_m.granted & (ddf_m['docket_event_date'] != ddf_m['docket_event_subdate'])
ddf_m.loc[should_overwrite_date, 'adjusted_event_date'] = ddf_m[should_overwrite_date].docket_event_subdate

ddf_m.loc[ddf_m.granted, 'granted_phrase_date'] = ddf_m[ddf_m.granted].adjusted_event_date
ddf_m.loc[ddf_m.granted & ddf_m.temp_guardian, 'temp_granted_phrase_date'] = ddf_m[ddf_m.granted & ddf_m.temp_guardian].adjusted_event_date
ddf_m['max_temp_granted_date'] = ddf_m.groupby('case_number')['temp_granted_phrase_date'].transform('max')

# if temp order is granted, then ambiguous orders after that date should be considered permanent if also
# none are explicitly permanent
perm_granted_events = ddf_m.granted & ddf_m.combined_docket_event_description.str.contains('permanent')

ddf_m.loc[perm_granted_events, 'perm_granted_phrase_date'] = ddf_m[perm_granted_events].adjusted_event_date
ddf_m['min_perm_granted_date'] = ddf_m.groupby('case_number')['perm_granted_phrase_date'].transform('min')
ddf_m['perm_guardian_inferred'] = False

# SETTING PERM GRANTED DATE
has_perm_petition = ~ddf_m.groupby('case_number')['nonemergency_petition_phrase'].transform('min').isnull()
no_temp_petition_filed = ~ddf_m.case_number.isin(cases_w_emergency_petition)
no_temp_appt_date = ddf_m.max_temp_granted_date.isnull()
not_a_temp_appt_order = (
    ~ddf_m['temp_guardian'] 
    & ddf_m.combined_docket_event_description.str.contains('(order granting petition to establish guardianship|letters of guardianship)', regex=True) 
    & ~ddf_m.combined_docket_event_description.str.contains('(ad litem)', regex=True)
)
    
WAIT_DAYS_AFTER_GRANTING_TEMP = 14
event_after_X_days_post_temp_appt_date = ddf_m.adjusted_event_date > (ddf_m.max_temp_granted_date + timedelta(days=WAIT_DAYS_AFTER_GRANTING_TEMP))
no_explicit_permanent_granted_date = ddf_m['min_perm_granted_date'].isnull()
no_temp_mentioned = ~ddf_m.combined_docket_event_description.str.contains('(temp|emergency)', regex=True)

# if no indication of temp petition/order, then generic petition granted is permanent
ddf_m.loc[no_temp_petition_filed & no_temp_appt_date & not_a_temp_appt_order & no_explicit_permanent_granted_date & no_temp_mentioned, ['perm_guardian', 'perm_guardian_inferred']] = True
ddf_m.loc[has_perm_petition & ~no_temp_appt_date & event_after_X_days_post_temp_appt_date & not_a_temp_appt_order & no_explicit_permanent_granted_date & no_temp_mentioned, ['perm_guardian', 'perm_guardian_inferred']] = True
perm_granted_events = ddf_m.perm_guardian_inferred
ddf_m.loc[perm_granted_events, 'perm_granted_phrase_date'] = ddf_m[perm_granted_events].adjusted_event_date


# no explicit mention of perm/temp but more than X days after temp order granted
not_temp = ddf_m.granted & ~ddf_m.temp_guardian & (ddf_m.adjusted_event_date > (ddf_m.max_temp_granted_date + timedelta(days=WAIT_DAYS_AFTER_GRANTING_TEMP)))
ddf_m.loc[ddf_m.granted & ~ddf_m.temp_guardian, 'nontemp_granted_phrase_date'] = ddf_m[not_temp].adjusted_event_date

# TODO: CLEAN THIS UP!!!!
# DENIAL DATE
denied_guardianship = ddf_m['denied_phrase_on_case'] 
ddf_m['denied_phrase_date'] = None
ddf_m.loc[denied_guardianship, 'denied_phrase_date'] = ddf_m[denied_guardianship].adjusted_event_date

# DISMISS DATE
dismiss_guardianship = ddf_m['dismissed_phrase_on_case'] 
ddf_m['dismissed_phrase_date'] = None
ddf_m.loc[dismiss_guardianship, 'dismissed_phrase_date'] = ddf_m[dismiss_guardianship].adjusted_event_date

# TERMINATION DATE
terminate_guardianship = ddf_m['terminate_phrase_on_case'] 
ddf_m['terminate_phrase_date'] = None
ddf_m.loc[terminate_guardianship, 'terminate_phrase_date'] = ddf_m[terminate_guardianship].adjusted_event_date

# DEATH DATE
death_event = ddf_m['death_phrase_on_case'] 
ddf_m['death_phrase_date'] = None
ddf_m.loc[death_event, 'death_phrase_date'] = ddf_m[death_event].adjusted_event_date

# TRANSFER
transfer_event = ddf_m['transfer_phrase_on_case'] 
ddf_m['transfer_phrase_date'] = None
ddf_m.loc[death_event, 'transfer_phrase_date'] = ddf_m[transfer_event].adjusted_event_date

# TEMP PETITION DATE
temp_pet_event = ddf_m['emergency_petition_phrase'] 
ddf_m['emergency_petition_phrase_date'] = None
ddf_m.loc[temp_pet_event, 'emergency_petition_phrase_date'] = ddf_m[temp_pet_event].adjusted_event_date

# PERM PETITION DATE
perm_pet_event = ddf_m['nonemergency_petition_phrase'] 
ddf_m['nonemergency_petition_phrase_date'] = None
ddf_m.loc[perm_pet_event, 'nonemergency_petition_phrase_date'] = ddf_m[perm_pet_event].adjusted_event_date

# combine granted, perm/temp flags to a case level (from docket entry level)
granted_guard_df = ddf_m.groupby('case_number').agg(
    {
        'granted': ['sum'], 
        'guardian': ['sum'],
        'perm_guardian': ['sum'], 
        'temp_guardian': ['sum'], 
        'granted_text': [concat],
        'granted_phrase_date': ['min'],
        'perm_granted_phrase_date': ['min'],
        'perm_guardian_inferred': ['max'],
        'temp_granted_phrase_date': ['min'],
        'nontemp_granted_phrase_date': ['min'],
        'nonemergency_petition_phrase': ['max'],
        'nonemergency_petition_phrase_date': ['min'],
        'emergency_petition_phrase': ['max'],
        'emergency_petition_phrase_date': ['min'],
        'death_inclusion_method': ['max'],
        'contains_death_or_deceased': ['max'],
        'transfer_phrase_on_case': ['max'],
        'death_phrase_on_case': ['max'],
        'death_phrase_date': ['min'],
        'no_longer_incapacitated': ['max'],
        'terminate_phrase_on_case': ['max'],
        'terminate_phrase_date': ['max'],
        'dismissed_phrase_on_case': ['max'],
        'dismissed_phrase_date': ['max'],
        'denied_phrase_on_case': ['max'],
        'denied_phrase_date': ['max'],
        'foreign_guardianship': ['max'],
        'withdraw_petition': ['max'],
        'reference_to_guardian_of_estate_only': ['max'],
        'reference_to_guardian_of_person_only': ['max'],
        'reference_to_guardian_of_person_and_estate': ['max'],
        'chins_case': ['max'],
        'gu_registry_date': [concat],
        'gu_registry_type_permanent': ['max'],
        'gu_registry_type_permanent': ['max'],
        'gu_registry_scope_person_person_and_estate': ['max'],
        'gu_registry_scope_person_only': ['max'],
        'gu_registry_scope_estate_only': ['max'],
        'gu_registry_docket_event': ['max'],
        'revoked_phrase_on_case': ['max'],
        'error': ['max']

    }
).reset_index()
granted_guard_df.columns = ['case_number'] + [c[0] + '_' + c[1] if 'date' in c[0] else c[0] for c in granted_guard_df.columns if c[0] != 'case_number']
granted_guard_df['has_granted_phrase_on_case'] = (granted_guard_df.granted > 0)
granted_guard_df['guardian_mentioned_on_case'] = (granted_guard_df.guardian > 0)
granted_guard_df['perm_guardian_mentioned_on_case'] = (granted_guard_df.perm_guardian > 0)
granted_guard_df['temp_guardian_mentioned_on_case'] = (granted_guard_df.temp_guardian > 0)
granted_guard_df['emergency_petition_mentioned_on_case'] = (granted_guard_df.emergency_petition_phrase > 0)
granted_guard_df['nonemergency_petition_mentioned_on_case'] = (granted_guard_df.nonemergency_petition_phrase > 0)

# assign generic granted phrase to either temp or perm depending on which petitions were filed (only if one or the other)
unlabeled_but_granted = (
    granted_guard_df.has_granted_phrase_on_case 
    & granted_guard_df.perm_granted_phrase_date_min.isnull()
    & granted_guard_df.temp_granted_phrase_date_min.isnull()
) # these cases have a granted phrase, but parser isn't sure whether it is perm or temp
perm_granted = (
    unlabeled_but_granted 
    & granted_guard_df.nonemergency_petition_mentioned_on_case 
    & ~granted_guard_df.emergency_petition_mentioned_on_case
) # if perm petition but no temp petition, then unlabeled case is probably perm guardianship
granted_guard_df.loc[perm_granted, 'perm_granted_phrase_date_min'] = granted_guard_df[perm_granted].granted_phrase_date_min
temp_granted = (
    unlabeled_but_granted
    & ~granted_guard_df.nonemergency_petition_mentioned_on_case 
    & granted_guard_df.emergency_petition_mentioned_on_case
) # if temp petition but no perm petition, then unlabeled case is probably perm guardianship
granted_guard_df.loc[temp_granted, 'temp_granted_phrase_date_min'] = granted_guard_df[temp_granted].granted_phrase_date_min

# adjust permanent guardianship if both types of petitions are filed but no sign of explicit temp guardianship granted
implied_perm_guardianship_granted = (
    granted_guard_df.nonemergency_petition_mentioned_on_case
    & granted_guard_df.perm_granted_phrase_date_min.isnull()
    & granted_guard_df.temp_granted_phrase_date_min.isnull()
    & ~granted_guard_df.granted_phrase_date_min.isnull()
)
granted_guard_df.loc[implied_perm_guardianship_granted, 'perm_granted_phrase_date_min'] = granted_guard_df[implied_perm_guardianship_granted].granted_phrase_date_min

print('\n\nNumber of cases with `granted`:', granted_guard_df[granted_guard_df.granted > 0].case_number.nunique())
print('Number of cases with `granted` + permanent guardianship mentioned:', 
    granted_guard_df[
        (granted_guard_df.granted > 0)
        & (granted_guard_df.perm_guardian > 0)
    ].case_number.nunique()
)
print('Number of cases with `granted` + temporary guardianship mentioned:', 
    granted_guard_df[
        (granted_guard_df.granted > 0)
        & (granted_guard_df.temp_guardian > 0)
    ].case_number.nunique()
)

# PARSING GUARDIANSHIP with GUARDIAN as PARTY on profile
cases_w_guardian_party = pdf[pdf.party_type == 'Guardian'].case_number.unique().tolist()
cdf_merge['guardian_in_party_info'] = cdf_merge.case_number.isin(cases_w_guardian_party)
cases_w_guardian_party_but_not_in_registry = rdf[rdf.case_number.isin(cases_w_guardian_party) & rdf.court_ir.isnull()]

print('Cases with Guardian as party in docket:', len(cases_w_guardian_party))
print('Cases with Guardian as party but not in registry:', cases_w_guardian_party_but_not_in_registry.shape[0])

cdfm = cdf_merge.merge(granted_guard_df, on='case_number', how='left')
cdfm['petitioner_is_institution'] = cdfm['petitioner_names'].str.lower().str.contains('(center|hospital|elder|adult guardianship program|veterans administration|health|healthcare|care|community|home|services|charities|office|dhss|state|department|dept|institute|foundation|counseling|behavioral|facility|associate|mentor|llc|village|program| inc|county|corp)', regex=True)
cdfm['has_revoked_guardian_ir'] = cdfm.guardian_name_ir.str.contains('Revoke', na=False)



# RENAME COLUMNS
cdfm = cdfm.rename(columns={
    'issue_date_ir': 'decision_date_granted_ir',
    'issue_date_ir_min': 'decision_date_granted_ir_min',
    'raw_case_status': 'case_status_raw',
    'court': 'court_name',
    'c2dp_case_key': 'case_key',
    'clean_case_type': 'case_type',
    'long_denied_date_min': 'decision_date_deny'
})

print('Number of cases without granted phrase/denied docket entry/dismissed docket entry:', cdfm[~cdfm['has_granted_phrase_on_case'] & ~cdfm['denied_phrase_on_case'] & ~cdfm['dismissed_phrase_on_case']].shape[0])
print('Number of cases that have "Guardian" in party but not in registry or granted phrase:', cdfm[cdfm.guardian_in_party_info & ~cdfm.has_granted_phrase_on_case & cdfm.respondent_name_ir.isnull()].shape[0])

cdfm_types = cdfm.dtypes.to_dict()
for k, v in cdfm_types.items():
    if v.type == np.bool_:
        cdfm[k] = cdfm[k].astype(int)

odf = pd.read_csv(f'{DATA_PATH}/deliverables/indiana_overwrite.csv', parse_dates=['temp_granted_phrase_date_min', 'perm_granted_phrase_date_min'])
columns_to_overwrite = [c for c in odf.columns if 'comments' not in c and c != 'case_number']
odf = odf.add_suffix('_overwrite')

cdfm['case_undisposed'] = 0
cdfm = cdfm.merge(odf, left_on='case_number', right_on='case_number_overwrite', how='left')
for c in columns_to_overwrite:
    overwrite_col = c + '_overwrite'
    to_be_replaced = ~cdfm[overwrite_col].isnull() # if overwrite_col is not null, then want it to replace cdfm value
    cdfm.loc[to_be_replaced, c] = cdfm[to_be_replaced][overwrite_col]

cdfm = cdfm.drop(columns=[c for c in cdfm.columns if '_overwrite' in c])

cdfm.drop(columns=['granted', 'perm_guardian', 'temp_guardian']).to_csv(f'{DATA_PATH}/deliverables/indiana_guardianship.csv', index=False)


Pattern: ('(?:(?:temporary|permanent) )?(?:co)?guardianship (?:(?:is|was) )?granted', False)
Results: 414
Unique matches: ['guardianship granted', 'temporary guardianship is granted', 'guardianship is granted', 'permanent guardianship granted', 'temporary guardianship granted', 'permanent guardianship is granted', 'coguardianship granted', 'guardianship was granted']

Pattern: ('grant(?:s|ed|ing) (?:(?:permanent|temporary) )?(?:co)?guardians?hip', False)
Results: 5064
Unique matches: ['grants guardianship', 'granting temporary guardianship', 'grants temporary guardianship', 'grants permanent guardianship', 'granting guardianship', 'grants guardianhip', 'granting coguardianship', 'granting permanent guardianship', 'grants coguardianship', 'granted guardianship']

Pattern: ('letters of (?:(?:permanent|temporary) )?guardianship (?:issued|sent|filed|ordered)', False)
Results: 19496
Unique matches: ['letters of guardianship issued', 'letters of temporary guardianship issued', 'letters of g

In [None]:
# FOR CHECKING OVERWRITES
# odf = pd.read_csv(f'{DATA_PATH}/deliverables/indiana_overwrite.csv')

# odf = odf.merge(cdfm[[
#     'case_number', 
#     'has_granted_phrase_on_case', 
#     'emergency_petition_mentioned_on_case', 
#     'nonemergency_petition_mentioned_on_case', 
#     'granted_phrase_date_min',
#     'temp_granted_phrase_date_min', 
#     'perm_granted_phrase_date_min', 
#     'dismissed_phrase_on_case', 
#     'dismissed_phrase_date_max',
#     'denied_phrase_on_case',
#     'denied_phrase_date_max',
#     'terminate_phrase_on_case',
#     'terminate_phrase_date_max',
#     'death_phrase_date_min',
#     'transfer_phrase_on_case'
# ]], on='case_number', how='left')

# set_col = 'dismissed_phrase_on_case_x'
# data_col = 'dismissed_phrase_on_case_y'
# odf['manual'] = odf['Comments'].str.contains('manual', regex=False) == True

# print('Remaining:', odf[~odf[set_col].isnull() & (odf[set_col] != odf[data_col]) & ~odf.manual].shape[0])
# odf[~odf[set_col].isnull() & (odf[set_col] != odf[data_col]) & ~odf.manual & odf.perm_granted_phrase_date_min_y.isnull()][['case_number', set_col, data_col, 'emergency_petition_mentioned_on_case_y', 'nonemergency_petition_mentioned_on_case', 'granted_phrase_date_min', 'temp_granted_phrase_date_min_y', 'perm_granted_phrase_date_min_y']]

In [84]:
cdfm[cdfm.case_number == '73D01-1605-GU-000036'][[
    'case_number', 
    'guardianship_type_ir',
    'has_granted_phrase_on_case', 
    'emergency_petition_mentioned_on_case', 
    'nonemergency_petition_mentioned_on_case', 
    'granted_phrase_date_min',
    'temp_granted_phrase_date_min', 
    'perm_granted_phrase_date_min', 
    'perm_guardian_inferred',
    'dismissed_phrase_on_case', 
    'dismissed_phrase_date_max',
    'denied_phrase_on_case',
    'denied_phrase_date_max',
    'terminate_phrase_on_case',
    'terminate_phrase_date_max',
    'death_phrase_date_min',
    'transfer_phrase_on_case',
    'granted_text'
]]

Unnamed: 0,case_number,guardianship_type_ir,has_granted_phrase_on_case,emergency_petition_mentioned_on_case,nonemergency_petition_mentioned_on_case,granted_phrase_date_min,temp_granted_phrase_date_min,perm_granted_phrase_date_min,perm_guardian_inferred,dismissed_phrase_on_case,dismissed_phrase_date_max,denied_phrase_on_case,denied_phrase_date_max,terminate_phrase_on_case,terminate_phrase_date_max,death_phrase_date_min,transfer_phrase_on_case,granted_text
9107,73D01-1605-GU-000036,Permanent,1,1,1,2016-06-03 00:00:00,NaT,2016-06-03 00:00:00,0,1,2019-06-26 00:00:00,0,NaT,1,2019-02-19 00:00:00,NaT,0,letters of guardianship issued; order granting petition to establish guardianship; appointment of scout guardianship services as guardian; order appointing guardian


In [83]:
cdfm[
    (cdfm.guardianship_type_ir == 'Temporary') 
    & ~cdfm.perm_granted_phrase_date_min.isnull()
    # & cdfm.perm_guardian_inferred
][[
    'case_number', 
    'perm_guardian_inferred',
    'perm_granted_phrase_date_min', 
    'temp_granted_phrase_date_min', 
    'emergency_petition_mentioned_on_case', 
    'nonemergency_petition_mentioned_on_case', 
    'granted_text'
]].shape

(159, 7)

In [70]:
# PERMANENT IN REGISTRY BUT NOT PERM GRANTED PHRASE

cdfm[
    (cdfm.guardianship_type_ir == 'Permanent') 
    & cdfm.perm_granted_phrase_date_min.isnull()
    & (cdfm.decision_date_granted_ir_min < datetime(2024, 4, 30))
    & (cdfm.decision_date_granted_ir_min > datetime(2016, 5, 2))
][[
    'case_number', 
    'perm_guardian_inferred',
    'perm_granted_phrase_date_min', 
    'temp_granted_phrase_date_min', 
    'granted_phrase_date_min',
    'emergency_petition_mentioned_on_case', 
    'nonemergency_petition_mentioned_on_case', 
    'granted_text',
    'dismissed_phrase_on_case',
    'decision_date_granted_ir_min',
    'guardianship_type_ir'

]].shape

(58, 11)

In [79]:
ddf_m[ddf_m.case_number == '24C01-1603-GU-000157'][['docket_event_date', 'combined_docket_event_description', 'perm_guardian_inferred', 'granted_text', 'temp_guardian', 'max_temp_granted_date']]

Unnamed: 0,docket_event_date,combined_docket_event_description,perm_guardian_inferred,granted_text,temp_guardian,max_temp_granted_date
340989,2016-03-14,case opened as a new filing,False,,False,2016-05-10
340990,2016-03-14,petition for emergency temporary guardianship filed gr filed electronically,False,,True,2016-05-10
340991,2016-03-14,appearance filed pro se,False,,False,2016-05-10
340992,2016-03-14,document filed physicians report,False,,False,2016-05-10
340993,2016-03-22,hearing scheduling activity hearing scheduled for at pm.,False,,False,2016-05-10
340994,2016-03-23,order set for hearing received in clerks office on .,False,,False,2016-05-10
340995,2016-03-23,notice issued to parties along with a copy of the order.,False,,False,2016-05-10
340996,2016-04-13,hearing scheduling activity hearing originally scheduled on at pm was rescheduled to at am. reason courts own motion.,False,,False,2016-05-10
340997,2016-04-14,order of continuance by court received in clerks office on .,False,,False,2016-05-10
340998,2016-04-14,notice issued to parties along with a copy of the order.,False,,False,2016-05-10


In [None]:
print('Number of rows in final df: ', cdfm.shape[0])

for p in cdfm.columns:
    print(p)

## Pulling Guardianship Parsing Together

# Important: Quality Assurance

Use each method (registry, exact phrases, and granted/flag) to validate each other.

Registry is definitely correct, but may only cover the most recent verdict.

Questions to answer:

* Are registry cases missing at random? Meaning that we can use ratios of permanent/temporary, phrases, and other insights from registry labeled cases to label missing ones?
* How often do methods agree? When do they disagree? What are the phrases that are wrong?

* In the final dataframe, how often are contradictory verdicts? Dismissed and granted? Denied and granted? Temporary and permanent?

* 

## Validating w/ Registry Outcomes

In [45]:
perm_registry = cdfm.guardianship_type_ir == 'Permanent'
temp_registry = cdfm.guardianship_type_ir == 'Temporary'

in_registry = perm_registry | temp_registry
has_docket_entry_with_guardian_granted_phrase = cdfm.has_granted_phrase_on_case
guardianship_phrase_in_registry = (in_registry & has_docket_entry_with_guardian_granted_phrase)
guardianship_phrase_but_not_in_registry = (~in_registry & has_docket_entry_with_guardian_granted_phrase)

has_perm_docket_desc = ~cdfm.perm_granted_phrase_date_min.isnull()
has_temp_docket_desc = ~cdfm.temp_granted_phrase_date_min.isnull()
has_granted_docket_desc = (has_perm_docket_desc | has_temp_docket_desc)


print('\nGranted Phrases vs Registry:')
print(pd.crosstab(has_docket_entry_with_guardian_granted_phrase, in_registry))

print('\nGranted Docket Entry vs Registry:')
print(pd.crosstab(has_granted_docket_desc, in_registry))

print('\nGranted Docket Entry vs Granted Phrase:')
print(pd.crosstab(has_granted_docket_desc, has_docket_entry_with_guardian_granted_phrase))

print('\nGranted Temp Docket Entry vs Registry Temp:')
print(pd.crosstab(has_temp_docket_desc, temp_registry))

print('\nGranted Perm Docket Entry vs Registry Perm:')
print(pd.crosstab(has_perm_docket_desc, perm_registry))

print('\nAre petition denials all missing registry entries?')


Granted Phrases vs Registry:
guardianship_type_ir        False  True 
has_granted_phrase_on_case              
0                            1223    129
1                             608   7910

Granted Docket Entry vs Registry:
guardianship_type_ir  False  True 
row_0                             
False                  1218    125
True                    613   7914

Granted Docket Entry vs Granted Phrase:
has_granted_phrase_on_case     0     1
row_0                                 
False                       1330    13
True                          22  8505

Granted Temp Docket Entry vs Registry Temp:
guardianship_type_ir          False  True 
temp_granted_phrase_date_min              
False                          7594     30
True                           1440    806

Granted Perm Docket Entry vs Registry Perm:
guardianship_type_ir          False  True 
perm_granted_phrase_date_min              
False                          2023    153
True                            644   7050
