# Example Running of Algorithm

* Show how to run the regular expressions
* Show how to post-process results using pandas (requires install: `pip install pandas`)

In [None]:
import pandas as pd
import re

import sys
sys.path.append(r'C:\wksp\maltreatment_nlp\src')

from maltreatment_nlp import run

# Run Algorithm

In [None]:
notes = get_my_text_notes()  # function to get text notes (let's assume it returns tuples of `(note_id, note_text)`)

In [None]:
def run_maltreatment(notes):
    for note_id, note in notes:
        yield run.run(note, note_id=note_id)

In [None]:
df = pd.DataFrame.from_records(run_maltreatment(notes))
df['pattern'].value_counts()  # what patterns were found?

# Clean Algorithm Results/Post-processing

In [None]:
# prepare versions of text to write post-processing rules against
df['pre_context'] = df.pre_context.str.lower()
df['termlc'] = df.term.str.lower()
df['post_context'] = df.post_context.str.lower()
df['pre_term_lc'] = df.pre_context + ' ' + df.termlc
df['post_term_lc'] = df.termlc + ' ' + df.post_context
df = df.fillna('')

In [None]:
# some helper filters
family = (
    r'(his|her|their|mom\W?s|dad\W?s)?'
    r'\W*(older|younger|elder)?'
    r'\W*(step\W*)?'
    r'('
    r'father|dad|brother|bro|mom|mother|sis|sister|aunt|uncle|relative|parents?'
    r'|bf|boy\W?friend|girl\W?friend|gf|husband|wife|partner|family'
    r')'
)
by_family = fr'by\W*{family}'
from_family = fr'(by|from)\W*{family}'
to_family = fr'(to|with)\W*{family}'
by_family_rx = re.compile(by_family, re.I)
to_family_rx = re.compile(to_family, re.I)
by_family_filter = df.post_context.str.contains(by_family_rx)
to_family_filter = df.post_context.str.contains(to_family_rx)

In [None]:
print(f'Size before filtering: {df.shape[0]}')

## All Patterns

In [None]:
exclude_terms = (r'\b('
                 r'cardiac|activity|heart|opioid|drug|alcohol|cannabis|marijuana|etoh|asthma|(poly)?substance|panic'
                 r'|ocd'
                 r')\b')

exclude_abuse_template = (df.termlc.isin({'sexually abused', 'abused'})) & (df.pre_context.str.contains('if'))
not_on_file = (df.post_context.str.strip().str[:30].str.contains(r'\b(not on file|negative|none|denies)\b'))
exclude_attack = (df.termlc.str.contains(exclude_terms))
exclude_pre_attack = (df.pre_context.str[-20:].str.contains(exclude_terms))
past_age = (df.post_context.str.strip().str[:30].str.contains(
    r'\b(?:'
    r'when (?:(?:he|she|they) )?was \d+'
    r'|\d+ years ago'
    r'|at age \d+'
    r')\b'
))

df = df[
    ~(exclude_abuse_template
      | not_on_file
      | exclude_attack
      | exclude_pre_attack
      | to_family_filter
      )
]
print(f'Size after general cleaning: {df.shape[0]}')

## Individual Patterns

* Example template

In [None]:
df = df[
    ~((df.pattern == 'EXAMPLE_PAT') &
      (exclude_pattern | another_exclude_pattern)
    )
]

print(f'Size after EXAMPLE_PAT: {df.shape[0]}')


# Exporting Data

In [None]:
df['value'] = 1
result_df = df[['note_id', 'pattern', 'value']].pivot_table(
    index='note_id', columns='pattern', values='value', fill_value=0, aggfunc=sum
).reset_index()
result_df.head()

## Define Case-ness

In [None]:
def is_case(row):
    if row['EXAMPLE_PAT'] == 1:
        return 1
    return 0

In [None]:
result_df['case'] = result_df.apply(is_case, axis=1)

In [None]:
result_df.to_csv('output_data.csv', index=False)
