In [1]:
import pandas as pd
import geopandas as gpd

from IPython.display import HTML, display

import matplotlib.pyplot as plt

This notebook joins and exports DFPS datasets for manual review of violations for potential injuries and other characteristics. It uses data exported from the state data portal on Feb 1, 2018 and searches keywords in the violation narrative field for possible injuries.

- [non-compliance](https://data.texas.gov/Social-Services/DFPS-CCL-Non-Compliance-Data/tqgd-mf4x)
- [operations](https://data.texas.gov/Social-Services/DFPS-CCL-Daycare-and-Residential-Operations-Data/bc5r-88dy)
- [inspections](https://data.texas.gov/Social-Services/DFPS-CCL-Inspection-Investigation-Assessment-Data/m5q4-3y3d)

In [2]:
non_compliance = pd.read_csv('../src/dfps-2018-02-01/non_compliance.csv').drop('Unnamed: 0', axis=1)
operations = pd.read_csv('../src/dfps-2018-02-01/operations.csv').drop('Unnamed: 0', axis=1)
inspections = pd.read_csv('../src/dfps-2018-02-01/assessment.csv').drop('Unnamed: 0', axis=1)

In [3]:
import re
# Get potential injury violations

keywords = [
    'hurt',
    'bruis',
    'injur',
    'fractur',
    'conscious',
    'lacerat'
]

keywords_high = [
    'burn(?! cream)',
    'sprain',
    'concuss',
    'seizure',
    'vomit',
    'tooth (?!brush|paste)',
    'teeth ',
    'responsiv',
]

POTENTIAL_INJURY_KEYWORDS = '|'.join(keywords)

POTENTIAL_INJURY_KEYWORDS_HIGH = '|'.join(keywords_high)

potential_injury_violations = pd.concat([
    non_compliance[
        non_compliance.STANDARD_NUMBER_DESCRIPTION.str.contains(
            'injur',
            regex=True,
            flags=re.IGNORECASE
        ) |
        non_compliance.NARRATIVE.str.contains(
            POTENTIAL_INJURY_KEYWORDS,
            regex=True,
            flags=re.IGNORECASE
        )
    ],
    non_compliance[
        non_compliance.NARRATIVE.fillna('').str.contains(
            POTENTIAL_INJURY_KEYWORDS_HIGH,
            regex=True,
            flags=re.IGNORECASE
        )
    ].query(
        'STANDARD_RISK_LEVEL == "High" | STANDARD_RISK_LEVEL == "Medium High"'
    ),
    non_compliance[
        non_compliance.NARRATIVE.astype(str).str.contains(
            'brok|break(?!fast)',
            regex=True,
            flags=re.IGNORECASE
        )
    ].query('STANDARD_RISK_LEVEL == "High"')
])

In [4]:
# Get all violations from inspections
# where a potential injury occurred.

potential_injury_violations_and_related = non_compliance[
    non_compliance.ACTIVITY_ID.isin(
        potential_injury_violations.ACTIVITY_ID.unique()
    )
]

In [5]:
records_for_manual_review = potential_injury_violations_and_related.merge(
    operations,
    on='OPERATION_ID'
).merge(
    inspections.drop(
        'OPERATION_ID',
        axis=1
    ),
    on='ACTIVITY_ID'
).assign(
    # add a column to signify if violation has a potential injury
    potential_injury = lambda x: x.STANDARD_NUMBER_DESCRIPTION.str.contains(
        'injur',
        regex=True,
        flags=re.IGNORECASE
    ) | x.NARRATIVE.str.contains(
        POTENTIAL_INJURY_KEYWORDS,
        regex=True,
        flags=re.IGNORECASE
    ) | x.NARRATIVE.str.contains(
        POTENTIAL_INJURY_KEYWORDS_HIGH,
        regex=True,
        flags=re.IGNORECASE
    )
).query(
    'OPERATION_TYPE != "Child Placing Agency" & OPERATION_TYPE != "General Residential Operation"'
).where(
    # Only take 2016-02-01 to 2018-02-01
    lambda x: pd.to_datetime(x.ACTIVITY_DATE) >= pd.to_datetime('2016-02-01')
)

This code below analyzes day cares and inspections with violations that were manually determined to have involved injuries.

The `dat_injure` dataframe has a row for every injury detail and related violation for every operation. Because of this, records are by `operation_id` when counting day cares.

In [6]:
dat_injure = pd.read_csv('../src/manual-injuries-mapped.csv')

In [7]:
HTML(
    '<p><strong>{:,}</strong> inspections with an injury</p>'.format(
        len(dat_injure.activity_id.unique())
    )
)

In [8]:
HTML(
    '<p><strong>{:,}</strong> inspections with an injury not reported to parents or licensing</p>'.format(
        len(
            dat_injure.query(
                'related_violation == "Failure To Report Incident"'
            ).activity_id.unique()
        )
    )
)

In [9]:
HTML(
    '<p><strong>{:,}</strong> inspections with an injury that required medical care</p>'.format(
        len(
            dat_injure.query(
                'injury_detail == "Medical care"'
            ).activity_id.unique()
        )
    )
)

In [10]:
HTML(
    '<p><strong>{:,}</strong> inspections with an injury and failure to supervise properly</p>'.format(
        len(
            dat_injure.query(
                'related_violation == "Supervision"'
            ).activity_id.unique()
        )
    )
)

In [11]:
HTML(
    '<p><strong>{:,}</strong> inspections with an injury intentionally caused by daycare caregivers</p>'.format(
        len(
            dat_injure.query(
                'injury_detail == "Intentional"'
            ).activity_id.unique()
        )
    )
)