In [1]:
import pandas as pd
import numpy as np
import re

# Overheating Flag Creation


### Flag Logic Overview: 
- Specific keywords around “heat” (not just “temperature” or “heat”): heat stress, hypothermia, hot weather, cold weather, climatic, etc.
- Regex pattern for detecting recorded temperatures (80.5 degrees, 52 F, 25 Celsius) but not “degrees of separation”
        - Excluding mentions of “180 degrees,” the water temperature for proper sanitization

In [2]:
combined_dir = './aphis-inspection-reports/data/combined/'

In [3]:
# Upload most recent aphis inspection-citations.csv
citations = pd.read_csv(combined_dir + 'inspections-citations.csv')
citations.shape

(38749, 6)

In [4]:
# Overheating and excessive cooling
overheating_keywords = [
    'overheating',
    'heat stress', 
    'f',
    'fahrenheit',
    'celsius',
    'hot weather', 
    'cold weather', 
    'heat stroke', 
    'hypothermia', 
    'frostbite', 
    'low body temperature', 
    'high body temperature', 
    'cold stress', 
    'deg f', 
    'hypothermic',
    'climatic', 
    'extreme heat', 
    'heat index', 
]

In [5]:
# Keyword search
citations['overheating_flag_1'] = citations['narrative'].apply(lambda x: any(word in [word for word in x.lower().split()] for word in overheating_keywords))
citations['overheating_flag_1'].value_counts()

overheating_flag_1
False    38077
True       672
Name: count, dtype: int64

In [6]:
# Search for use of 'degrees' in a temperature context vs. 'degrees of success' etc.
temperature_pattern = re.compile(r'\b\d+(\.\d+)? degrees\b')

citations['overheating_flag_2'] = citations['narrative'].apply(lambda x: bool(temperature_pattern.search(x.lower())))
citations['overheating_flag_2'].value_counts()

overheating_flag_2
False    38190
True       559
Name: count, dtype: int64

In [7]:
# Remove sanitization
overheating_negative_keywords = [
    '180 f', 
    '180 degrees'
]

citations['overheating_neg_flag_1'] = citations['narrative'].apply(lambda x: any(keyword in x.lower() for keyword in overheating_negative_keywords))
citations['overheating_neg_flag_1'].value_counts()

overheating_neg_flag_1
False    38722
True        27
Name: count, dtype: int64

In [8]:
# Combined overheating flag
citations['overheating_flag'] = (citations['overheating_flag_1'] & citations['overheating_flag_1'] & ~citations['overheating_neg_flag_1'])
citations['overheating_flag'].value_counts()

overheating_flag
False    38088
True       661
Name: count, dtype: int64

In [9]:
# Spot-check for positives
citations[citations['overheating_flag'] == True]['narrative'].sample(100).tolist()

['Transport enclosures housing chinchillas did have adequate rims or other devices on the exterior of the outside\nwalls to provide a minimum air circulation space of .75 inches. The lid of the transport enclosures only provided .25\ninches of ventilation space on the sides. Transport enclosures for chinchillas must have projecting rims or other\ndevices on the exterior of the outside walls which provide a minimum air circulation space of .75 inches between the\ntransport enclosure and any adjacent cargo wall.\nFailure to provide a circulation space of at least .75 inches on the sides of transport enclosures could put animals at\nrisk of overheating or having reduced air circulation. Carriers and intermediate handlers must not transport animals\nunless there are adequate projecting rims or other devices that provide at least .75 inches of air circulation on the\nexterior of the outside walls of the animal transport container (see air waybill # 006 LAX 78985222).\nThis inspection and ex

In [10]:
# Spot-check for negatives
citations[citations['overheating_flag'] == False]['narrative'].sample(100).tolist()

['The facility does not have a written contingency plan that contains situations that could adversely impact animals at the\nfacility. A complete written contingency plan must include scenarios in which animals would be adversely affected, in\norder to allow for planning and preparation for those scenarios. Correct by 12/27/22 by documenting a contingency plan\nthat contains all of the required elements in 2.38(l).\nThis inspection and exit interview were conducted with the IACUC chair and the Director of Research Compliance.\nn',
 'The licensee does not have a program of veterinary care on hand. The licensee has acquired a non-human\nprimate, guinea pigs, Arctic fox, Prairie Dogs, a raccoon, hedge hogs and an African Crested porcupine.\nEach exhibitor shall employ and attending veterinarian under formal arrangements. In case of a part-time attending\nveterinarian, the formal arrangements shall include a written program of veterinary care and regularly scheduled\nvisits to the premises

In [11]:
# Filtering for flag
flagged_citations = citations[citations['overheating_flag'] == True]
flagged_citations.shape

(661, 10)

In [12]:
# Save citations with new flag column
flagged_citations.to_csv('./flagged_citations/overheating.csv')