# Fire Bucket Distribution

We want to merge the fire risk scores with the fire incidents file, and see how many properties actually had an incident after. 

## Loading the Data

In [2]:
import pandas as pd
import datetime
from dateutil.relativedelta import relativedelta
import numpy as np

In [3]:
fire_new = pd.read_csv("datasets/Fire_Incidents_New.csv", low_memory=False, encoding = 'utf-8',dtype={'street':'str','number':'str', 'CALL_CREATED_DATE': 'str'})
fire_inspections = pd.read_csv("datasets/Fire_Inspections.csv", low_memory=False, dtype={'parcel':'str'})
results = pd.read_csv("datasets/Results_21518.csv", low_memory=False, dtype={'PROPERTYADDRESS':'str','PROPERTYHOUSENUM':'str'})
parcels = pd.read_csv("datasets/parcels.csv", low_memory=False, dtype={'PIN':'str'})
pittdata = pd.read_csv("datasets/pittdata.csv", low_memory=False, encoding = "ISO-8859-1", dtype={'PROPERTYADDRESS':'str','PROPERTYHOUSENUM':'str','CLASSDESC':'str'})

## Merging Parcels with Addresses in order to merge Fire Inspections data at Parcel Level

In [4]:
pitt_risk_parcels = pd.merge(left=pittdata,right=parcels, how='left', left_on='PARID', right_on='PIN')
parcel_address = pitt_risk_parcels[['PROPERTYHOUSENUM', 'PROPERTYADDRESS', 'PARID', 'MUNIDESC', 'CLASSDESC']]
parcel_address.head()

Unnamed: 0,PROPERTYHOUSENUM,PROPERTYADDRESS,PARID,MUNIDESC,CLASSDESC
0,564,FORBES AVE,0002K00173000000,1st Ward - PITTSBURGH,COMMERCIAL
1,952,VICKROY ST,0002K00190000000,1st Ward - PITTSBURGH,GOVERNMENT
2,1001,BLUFF ST,0002K00192000000,1st Ward - PITTSBURGH,GOVERNMENT
3,900,LOCUST ST,0002K00194000000,1st Ward - PITTSBURGH,GOVERNMENT
4,0,LOCUST ST,0002K00201000000,1st Ward - PITTSBURGH,GOVERNMENT


## Fire Incidents DataFrame Cleaning

In [5]:
fire_new['descript'] = fire_new['descript'].str.strip()
remove_descript = ['System malfunction, Other',
                   # 'Smoke detector activation, no fire - unintentional']
                   # 'Alarm system activation, no fire - unintentional']
                   'Detector activation, no fire - unintentional', 'Smoke detector activation due to malfunction',
                   'Dispatched & cancelled en route', 'Dispatched & cancelled on arrival',
                   'EMS call, excluding vehicle accident with injury', 'Medical assist, assist EMS crew',
                   'Emergency medical service, other', 'Good intent call, Other', 'Rescue, EMS incident, other',
                   'Medical Alarm Activation (No Medical Service Req)', 'Motor Vehicle Accident with no injuries',
                   'No Incident found on arrival at dispatch address', 'Unintentional transmission of alarm, Other',
                   'Motor vehicle accident with injuries', 'Vehicle accident, general cleanup', 'Power line down',
                   'Person in distress, Other', 'Cable/Telco Wires Down', 'Service Call, other',
                   'Vehicle Accident canceled en route', 'Lock-out', 'False alarm or false call, Other',
                   'Assist police or other governmental agency', 'Special type of incident, Other',
                   'Alarm system sounded due to malfunction', 'Motor vehicle/pedestrian accident (MV Ped)',
                   'Assist invalid ', 'Malicious, mischievous false call, Other', 'Accident, potential accident, Other',
                   'Assist invalid', 'EMS call, party transported by non-fire agency', 'Rescue or EMS standby',
                   'Public service assistance, Other', 'Police matter', 'Lock-in (if lock out , use 511 )',
                   'Sprinkler activation, no fire - unintentional', 'Wrong location',
                   'Local alarm system, malicious false alarm', 'Authorized controlled burning',
                   'Water problem, Other',
                   # 'Smoke or odor removal']
                   'Passenger vehicle fire', 'CO detector activation due to malfunction',
                   'Authorized controlled burning', 'Steam, vapor, fog or dust thought to be smoke', 'Overheated motor',
                   'Local alarm system, malicious false alarm', 'Central station, malicious false alarm',
                   'Public service',
                   # 'Building or structure weakened or collapsed'
                   'Heat detector activation due to malfunction', 'Citizen complaint',
                   'Municipal alarm system, malicious false alarm', 'Sprinkler activation due to malfunction',
                   'Severe weather or natural disaster, Other', 'Water evacuation', 'Breakdown of light ballast',
                   'Extrication of victim(s) from vehicle', 'Flood assessment', 'Telephone, malicious false alarm',
                   'Cover assignment, standby, moveup', 'Road freight or transport vehicle fire']

In [6]:
for descript in remove_descript:
    fire_new = fire_new[fire_new.descript != descript]
fire_new = fire_new[fire_new['full.code'].str.strip()  != '540 - Animal problem, Other']
fire_new = fire_new[fire_new['full.code'].str.strip()  != '5532 - Public Education (Station Visit)']
fire_new = fire_new[fire_new['full.code'].str.strip()  != '353 - Removal of victim(s) from stalled elevator']

In [7]:
#correcting problems with the street column
fire_new['street'] = fire_new['street'].replace(to_replace=', PGH', value='', regex=True)
fire_new['street'] = fire_new['street'].replace(to_replace=', P', value='', regex=True)
fire_new['street'] = fire_new['street'].replace(to_replace=',', value='', regex=True)
fire_new['street'] = fire_new['street'].replace(to_replace='#.*', value='', regex=True)
fire_new['street'] = fire_new['street'].str.strip()
fire_new['number'] = fire_new['number'].str.strip()

In [8]:
# drop duplicates
fire_new = fire_new.drop_duplicates()

In [9]:
#convert it to datetime and then check how many properties had a call
fire_new = fire_new[['CALL_CREATED_DATE', 'number', 'street', 'CALL_TYPE_FINAL', 'full.code']]
fire_new['CALL_CREATED_DATE'] = pd.to_datetime(fire_new['CALL_CREATED_DATE'])

#make full.code into different fire columns
fire_new['111 - Building Fire'] = np.where(fire_new['full.code'].astype(str).str[:3] == '111', '1', '0')
fire_new['Building Fire'] = np.where((fire_new['full.code'].astype(str).str[:2] == '11') & (fire_new['full.code'].astype(str).str[:3] != '112'), '1', '0')
fire_new['Code 100s'] = np.where(fire_new['full.code'].astype(str).str[0] == '1', '1', '0')
fire_new['Any Fire Code'] = '1'

Now, we want to merge it with address.

In [10]:
pitt_fire = pd.merge(left=parcel_address,right=fire_new, how='left', left_on =['PROPERTYADDRESS','PROPERTYHOUSENUM'],
        right_on = ['street','number'])

In [11]:
del pitt_fire['street']
del pitt_fire['number']

## Fire Inspections Cleaning

In [12]:
fire_inspections['parcel'] = fire_inspections['parcel'].str.strip()
fire_inspections = fire_inspections[['parcel', 'reported', 'codeset', 'code', 'descript', 'completed']]
fire_inspections['reported'] = pd.to_datetime(fire_inspections['reported'])

In [13]:
pitt_inspect = pd.merge(left=parcel_address, right=fire_inspections, how='left', left_on=['PARID'], right_on=['parcel'])
del pitt_inspect['PARID']
pitt_inspect.head()

Unnamed: 0,PROPERTYHOUSENUM,PROPERTYADDRESS,MUNIDESC,CLASSDESC,parcel,reported,codeset,code,descript,completed
0,564,FORBES AVE,1st Ward - PITTSBURGH,COMMERCIAL,0002K00173000000,2018-01-18,VIOLATION,1003.6,Means of Egress Continuity ...,2018-05-15 00:00:00
1,564,FORBES AVE,1st Ward - PITTSBURGH,COMMERCIAL,0002K00173000000,2018-01-18,VIOLATION,1006.3,Illumination Emergency Power ...,2018-05-15 00:00:00
2,564,FORBES AVE,1st Ward - PITTSBURGH,COMMERCIAL,0002K00173000000,2018-01-18,VIOLATION,1011.5.3,Power Source - Exit Signs ...,2018-05-15 00:00:00
3,564,FORBES AVE,1st Ward - PITTSBURGH,COMMERCIAL,0002K00173000000,2018-01-18,VIOLATION,1022.8,Floor numbers in stairwell ...,2018-05-15 00:00:00
4,564,FORBES AVE,1st Ward - PITTSBURGH,COMMERCIAL,0002K00173000000,2018-01-18,VIOLATION,107.1,Maintenance of Safeguards ...,2018-05-15 00:00:00


## Filtering out Properties not in Pittsburgh

Getting rid of outliers

In [14]:
pitt_fire = pitt_fire[pitt_fire['MUNIDESC'].str.contains("Ward|Ingram|Wilkinsburg",na=False)]
pitt_inspect = pitt_inspect[pitt_inspect['MUNIDESC'].str.contains("Ward|Ingram|Wilkinsburg",na=False)]
# get rid of residential properteis in inspections
pitt_inspect = pitt_inspect[pitt_inspect.CLASSDESC!='RESIDENTIAL']

pitt_fire = pitt_fire[pitt_fire.PROPERTYHOUSENUM!= '0']
pitt_fire = pitt_fire[pitt_fire.PROPERTYHOUSENUM!= 'NaN']
pitt_fire = pitt_fire[pitt_fire.PROPERTYADDRESS!= '']
pitt_fire = pitt_fire[pitt_fire.PROPERTYADDRESS!= 'NaN']
pitt_fire.loc[pitt_fire['111 - Building Fire'] != '1', '111 - Building Fire'] = '0'
pitt_fire.loc[pitt_fire['Building Fire'] != '1', 'Building Fire'] = '0'
pitt_fire.loc[pitt_fire['Code 100s'] != '1', 'Code 100s'] = '0'
pitt_fire.loc[pitt_fire['Any Fire Code'] != '1', 'Any Fire Code'] = '0'

pitt_inspect = pitt_inspect[pitt_inspect.PROPERTYADDRESS!= '']
pitt_inspect = pitt_inspect[pitt_inspect.PROPERTYHOUSENUM!= '0']
pitt_inspect['codeset'] = pitt_inspect['codeset'].str.strip()

## Making Commercial Fire Dataset

In [15]:
pitt_fire_c = pitt_fire[pitt_fire.CLASSDESC!='RESIDENTIAL']
pitt_fire_c = pitt_fire_c.groupby(['PROPERTYADDRESS', 'PROPERTYHOUSENUM'], as_index=False).max()

## Making Residential Fire Dataset

In [16]:
pitt_fire_r = pitt_fire[pitt_fire.CLASSDESC =='RESIDENTIAL']
pitt_fire_r.head()

Unnamed: 0,PROPERTYHOUSENUM,PROPERTYADDRESS,PARID,MUNIDESC,CLASSDESC,CALL_CREATED_DATE,CALL_TYPE_FINAL,full.code,111 - Building Fire,Building Fire,Code 100s,Any Fire Code
228,300,4TH AVE,0001H00327020100,1st Ward - PITTSBURGH,RESIDENTIAL,NaT,,,0,0,0,0
229,306,4TH AVE,0001H00327020200,1st Ward - PITTSBURGH,RESIDENTIAL,2015-08-26,False Alarm,"743 - Smoke detector activation, no fire - uni...",0,0,0,1
230,306,4TH AVE,0001H00327020200,1st Ward - PITTSBURGH,RESIDENTIAL,2015-10-09,False Alarm,"712 - Direct tie to FD, malicious false alarm ...",0,0,0,1
231,306,4TH AVE,0001H00327020200,1st Ward - PITTSBURGH,RESIDENTIAL,2016-05-25,False Alarm,"5001 - Smoke Detector Activation, No Fire ...",0,0,0,1
232,300,4TH AVE,0001H00327020300,1st Ward - PITTSBURGH,RESIDENTIAL,NaT,,,0,0,0,0


## Merge with Results of Commerical Model 

In [17]:
results = results[['PROPERTYADDRESS', 'PROPERTYHOUSENUM', 'RiskScore']]

In [18]:
pitt_fire_c = pd.merge(left=results,right=pitt_fire_c, how='left', left_on =['PROPERTYADDRESS','PROPERTYHOUSENUM'],
        right_on = ['PROPERTYADDRESS','PROPERTYHOUSENUM'])

In [19]:
pitt_inspect =  pd.merge(left=results, right=pitt_inspect, how='right', left_on=['PROPERTYADDRESS','PROPERTYHOUSENUM'],
        right_on = ['PROPERTYADDRESS','PROPERTYHOUSENUM'])
pitt_fire_c.head()

Unnamed: 0,PROPERTYADDRESS,PROPERTYHOUSENUM,RiskScore,PARID,MUNIDESC,CLASSDESC,CALL_CREATED_DATE,CALL_TYPE_FINAL,full.code,111 - Building Fire,Building Fire,Code 100s,Any Fire Code
0,ALGER ST,1,0.002491,0054K00230000000,15th Ward - PITTSBURGH,GOVERNMENT,2017-06-12,Hazardous Conditions,412 - Gas leak (natural gas or LPG) ...,0,0,0,1
1,ALLEGHENY AVE,1,0.001635,0007M00500000000,21st Ward - PITTSBURGH,GOVERNMENT,2018-07-12,Service Call,"745 - Alarm system activation, no fire - unint...",1,1,1,1
2,BIGELOW SQ,1,0.000686,0002B00096000000,2nd Ward - PITTSBURGH,COMMERCIAL,2018-07-03,Service Call,"745 - Alarm system activation, no fire - unint...",0,0,1,1
3,BIGELOW SQ,1,0.154457,0002B00096000000,2nd Ward - PITTSBURGH,COMMERCIAL,2018-07-03,Service Call,"745 - Alarm system activation, no fire - unint...",0,0,1,1
4,BIGELOW SQ,1,0.000686,0002B00096000000,2nd Ward - PITTSBURGH,COMMERCIAL,2018-07-03,Service Call,"745 - Alarm system activation, no fire - unint...",0,0,1,1


## Merge with Results of Residential Model

In [20]:
## get the residential model csv and we can do this part very easily

## Commerical Fire Distribution

In [34]:
cutoff = datetime.datetime.strptime("2/15/18", '%m/%d/%y')
#cutoff = day_ran - relativedelta(months=6)
cutoffdate = cutoff.strftime("%m/%d/%Y")

high = pitt_fire_c[(pitt_fire_c['RiskScore'] >= 0.70) & (pitt_fire_c['CALL_CREATED_DATE'] > cutoff)]
medium = pitt_fire_c[(pitt_fire_c['RiskScore'] < 0.70) & (pitt_fire_c['RiskScore'] >= 0.40)& (pitt_fire_c['CALL_CREATED_DATE'] > cutoff)]
low = pitt_fire_c[(pitt_fire_c['RiskScore'] >= 0) & (pitt_fire_c['RiskScore'] < 0.40) & (pitt_fire_c['CALL_CREATED_DATE'] > cutoff)]

In [35]:
high = high.drop_duplicates(subset=['PROPERTYADDRESS', 'PROPERTYHOUSENUM'])
medium = medium.drop_duplicates(subset=['PROPERTYADDRESS', 'PROPERTYHOUSENUM'])
low = low.drop_duplicates(subset=['PROPERTYADDRESS', 'PROPERTYHOUSENUM'])

In [36]:
building_sum_h = len(high[high['111 - Building Fire'] == '1'])
building_fire_h = len(high[high['Building Fire'] == '1'])
code_100_h = len(high[high['Code 100s']== '1'])
fire_h = len(high[high['Any Fire Code']=='1'])
total_num = len(high)
print("High Risk Properties Building Fires (111): %d" % building_sum_h)
print("High Risk Properties Building Fires (all): %d" % building_fire_h)
print("High Risk Properties Code 100's: %d" % code_100_h)
print("High Risk Properties Any Fire Code: %d" % fire_h)
print("Total Number of High Risk Properties: %d" % total_num)



High Risk Properties Building Fires (111): 8
High Risk Properties Building Fires (all): 26
High Risk Properties Code 100's: 34
High Risk Properties Any Fire Code: 41
Total Number of High Risk Properties: 41


In [37]:
building_sum_m = len(medium[medium['111 - Building Fire'] == '1'])
building_fire_m = len(medium[medium['Building Fire'] == '1'])
code_100_m = len(medium[medium['Code 100s']== '1'])
fire_m = len(medium[medium['Any Fire Code']=='1'])
print("Medium Risk Properties Building Fires (111): %d" % building_sum_m)
print("Medium Risk Properties Building Fires (all): %d" % building_fire_m)
print("Medium Risk Properties Code 100's: %d" % code_100_m)
print("Medium Risk Properties Any Fire Code: %d" % fire_m)

Medium Risk Properties Building Fires (111): 5
Medium Risk Properties Building Fires (all): 22
Medium Risk Properties Code 100's: 33
Medium Risk Properties Any Fire Code: 56


In [38]:
building_sum_l = len(low[low['111 - Building Fire'] == '1'])
building_fire_l = len(low[low['Building Fire'] == '1'])
code_100_l = len(low[low['Code 100s']== '1'])
fire_l = len(low[low['Any Fire Code']=='1'])
print("Low Risk Properties Building Fires (111): %d" % building_sum_l)
print("Low Risk Properties Building Fires (all): %d" % building_fire_l)
print("Low Risk Properties Code 100's: %d" % code_100_l)
print("Low Risk Properties Any Fire Code: %d" % fire_l)

Low Risk Properties Building Fires (111): 38
Low Risk Properties Building Fires (all): 103
Low Risk Properties Code 100's: 143
Low Risk Properties Any Fire Code: 401


## Out of those that had a high risk score, how many of them actually had inspection violations? 

In [39]:
pitt_inspect['viol_cutoff'] = np.where(pitt_inspect['RiskScore']>0.70, 1, 0)
pitt_inspect['risk_high'] = np.where((pitt_inspect['RiskScore'] >=0.70) & (pitt_inspect['reported'] > cutoff), 1, 0)
pitt_inspect.drop_duplicates(subset=['PROPERTYADDRESS', 'PROPERTYHOUSENUM'])

Unnamed: 0,PROPERTYADDRESS,PROPERTYHOUSENUM,RiskScore,MUNIDESC,CLASSDESC,parcel,reported,codeset,code,descript,completed,viol_cutoff,risk_high
0,ALGER ST,1,0.002491,15th Ward - PITTSBURGH,GOVERNMENT,0054K00230000000,2016-07-18,VIOLATION,703.1,Maintenance ...,,0,0
9,ALLEGHENY AVE,1,0.001635,21st Ward - PITTSBURGH,GOVERNMENT,,NaT,,,,,0,0
10,BIGELOW SQ,1,0.000686,2nd Ward - PITTSBURGH,COMMERCIAL,0002B00096000000,2016-05-24,VIOLATION,605.6,Open junction boxes & wire splices ...,,0,0
22,HEINZ ST,1,0.000315,23rd Ward - PITTSBURGH,OTHER,,NaT,,,,,0,0
23,TECUMSEH ST,1,0.112297,15th Ward - PITTSBURGH,COMMERCIAL,,NaT,,,,,0,0
24,TRIMONT LN,1,0.039673,19th Ward - PITTSBURGH,COMMERCIAL,,NaT,,,,,0,0
35,5TH AVE,100,0.000169,1st Ward - PITTSBURGH,COMMERCIAL,0001D00125000000,2017-02-06,VIOLATION,1003.6,Means of Egress Continuity ...,,0,0
41,7TH ST,100,0.007241,2nd Ward - PITTSBURGH,COMMERCIAL,0008S00122000000,1900-01-01,,,NA ...,2016-12-01 00:00:00,0,0
44,HYMAN PL,100,0.001138,5th Ward - PITTSBURGH,COMMERCIAL,,NaT,,,,,0,0
45,ROSS ST,100,0.000154,1st Ward - PITTSBURGH,COMMERCIAL,,NaT,,,,,0,0


In [40]:
high_risk = pitt_inspect[pitt_inspect['viol_cutoff']==1]
pitt_inspect_viol =  high_risk[high_risk['codeset']=='VIOLATION']
pitt_inspect_NA =  high_risk[high_risk['codeset']=='NA']

print("High Risk Properties: %d" % len(high_risk)) # pitt_inspect_Nan.count(), pitt_inspect_viol.count(), pitt_inspect_NA.count()
len(pitt_inspect_NA)
len(pitt_inspect_viol)
print("High Risk Properties with Fire Inspection Violation: %d" % len(pitt_inspect_viol))
print("High Risk Properties with Fire Inspection, no Violation: %d" % len(pitt_inspect_NA))

High Risk Properties: 206
High Risk Properties with Fire Inspection Violation: 99
High Risk Properties with Fire Inspection, no Violation: 6
