In [2]:
import pandas as pd
import numpy as np
import re

# Overheating Flag Creation


### Flag Logic Overview: 
- Specific keywords around “heat” (not just “temperature” or “heat”): heat stress, hypothermia, hot weather, cold weather, climatic, etc.
- Regex pattern for detecting recorded temperatures (80.5 degrees, 52 F, 25 Celsius) but not “degrees of separation”
        - Excluding mentions of “180 degrees,” the water temperature for proper sanitization

In [3]:
combined_dir = './aphis-inspection-reports/data/combined/'

In [4]:
# Upload most recent aphis inspection-citations.csv
citations = pd.read_csv(combined_dir + 'inspections-citations.csv')
citations.shape

(38749, 6)

In [5]:
# Overheating and excessive cooling
overheating_keywords = [
    'overheating',
    'heat stress', 
    'f',
    'fahrenheit',
    'celsius',
    'hot weather', 
    'cold weather', 
    'heat stroke', 
    'hypothermia', 
    'frostbite', 
    'low body temperature', 
    'high body temperature', 
    'cold stress', 
    'deg f', 
    'hypothermic',
    'climatic', 
    'extreme heat', 
    'heat index', 
]

In [6]:
# Keyword search
citations['overheating_flag_1'] = citations['narrative'].apply(lambda x: any(word in [word for word in x.lower().split()] for word in overheating_keywords))
citations['overheating_flag_1'].value_counts()

overheating_flag_1
False    38077
True       672
Name: count, dtype: int64

In [7]:
# Search for use of 'degrees' in a temperature context vs. 'degrees of success' etc.
temperature_pattern = re.compile(r'\b\d+(\.\d+)? degrees\b')

citations['overheating_flag_2'] = citations['narrative'].apply(lambda x: bool(temperature_pattern.search(x.lower())))
citations['overheating_flag_2'].value_counts()

overheating_flag_2
False    38190
True       559
Name: count, dtype: int64

In [8]:
# Remove sanitization
overheating_negative_keywords = [
    '180 f', 
    '180 degrees'
]

citations['overheating_neg_flag_1'] = citations['narrative'].apply(lambda x: any(keyword in x.lower() for keyword in overheating_negative_keywords))
citations['overheating_neg_flag_1'].value_counts()

overheating_neg_flag_1
False    38722
True        27
Name: count, dtype: int64

In [9]:
# Combined overheating flag
citations['overheating_flag'] = (citations['overheating_flag_1'] & citations['overheating_flag_1'] & ~citations['overheating_neg_flag_1'])
citations['overheating_flag'].value_counts()

overheating_flag
False    38088
True       661
Name: count, dtype: int64

In [10]:
# Spot-check
citations[citations['overheating_flag'] == True]['narrative'].tolist()

['Two marmosets (male 601 858 271, female 844 827 014), were found dead entrapped in their nest box on August 12,\n2020 at 8:04 am during the morning observation . At some point after the afternoon observation at 2:38 pm on August 11,\nthe 2 animals entered their nest box in their home cage and the door to the nest box closed so they could not get out.\nThe animals were entrapped in the box for ~18 hours. The cause of death was overheating and distress.\nPrimary enclosures must be constructed and maintained so that they protect the nonhuman primate from injury and harm.\nAll the doors on the nest boxes in marmoset caging were removed prior to inspection. Corrected prior to inspection.',
 'During the inspection a room containing bats in 1 cubic foot butterfly transports was noted. The number of bats in\neach enclosure ranged from one to five. The bats were quietly hanging in their enclosures at the time of inspection\nand appeared to be normal. The protocol covering these bats did not d

In [14]:
len(citations[citations['narrative'].str.contains('euthanasia')]['narrative'].tolist())


445

In [16]:
citations['narrative'].notnull().sum()

38749

In [13]:
# Save citations with new flag column
#citations.to_csv('inspections-citations-with-overheating-flag.csv')