This notebook is used for to generate a fire risk score used as a decision support tool by the Chattanooga Fire Department to help prioritize inspections. For more information on the model please visit https://www.chattadata.org/stories/s/jxzy-f5id

# Training data

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor

In [2]:
# read data in and and view
df_training = pd.read_csv(r'...\training.csv')
df_training.iloc[:,3:].head(10)

Unnamed: 0,Fire Indicator,Code743Count,Code745Count,Code651Count,Code522Count,Code531Count,Code412Count,LANDVALUE,CALCACRES,Building Age,Storys,Exterior Wall Desc,Year Built
0,0,6,11,0,1,1,1,1084400,12.43,35.0,2.0,BRICK VENEER,1985.0
1,0,0,0,0,0,0,1,171700,1.0,56.0,1.0,BRICK VENEER,1964.0
2,0,0,1,0,0,1,1,69400,1.0,30.0,1.0,CORRUGATED M,1990.0
3,0,0,3,0,0,0,1,604400,1.5,65.0,4.0,BRICK,1955.0
4,0,0,0,0,0,0,1,151500,3.03,75.0,1.71,BRICK,1945.0
5,0,0,0,0,0,0,1,720400,1.8,30.0,1.0,PLATE GLASS,1990.0
6,0,0,0,0,0,0,1,623800,1.0,55.0,1.0,CONC BLK PLA,1965.0
7,0,0,1,0,0,0,1,224000,1.0,13.0,1.0,,2007.0
8,0,1,0,0,0,0,1,769000,1.07,13.0,1.0,BRICK VENEER,2007.0
9,0,0,0,0,0,0,1,252800,1.0,38.0,1.0,BRICK VENEER,1982.0


In [3]:
# replace NaN values with column mean
df_training['Storys'].fillna(df_training['Storys'].mean(), inplace=True)
df_training['Building Age'].fillna(df_training['Building Age'].mean(), inplace=True)

In [4]:
# split into X and y
X = df_training.iloc[:,4:14].values
y = df_training.iloc[:,3:4].values

In [5]:
# view np array
X[0:5]

array([[6.0000e+00, 1.1000e+01, 0.0000e+00, 1.0000e+00, 1.0000e+00,
        1.0000e+00, 1.0844e+06, 1.2430e+01, 3.5000e+01, 2.0000e+00],
       [0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        1.0000e+00, 1.7170e+05, 1.0000e+00, 5.6000e+01, 1.0000e+00],
       [0.0000e+00, 1.0000e+00, 0.0000e+00, 0.0000e+00, 1.0000e+00,
        1.0000e+00, 6.9400e+04, 1.0000e+00, 3.0000e+01, 1.0000e+00],
       [0.0000e+00, 3.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        1.0000e+00, 6.0440e+05, 1.5000e+00, 6.5000e+01, 4.0000e+00],
       [0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        1.0000e+00, 1.5150e+05, 3.0300e+00, 7.5000e+01, 1.7100e+00]])

In [6]:
# create RandomForestRegressor and fit to data
regressor = RandomForestRegressor(n_estimators = 100)
regressor.fit(X, y.ravel())

RandomForestRegressor()

# Testing data

In [7]:
# read past two years fire data create X
df_testing = pd.read_csv(r'...\past_two_years.csv')

In [8]:
# replace NaN values with column mean
df_testing['Storys'].fillna(df_testing['Storys'].mean(), inplace=True)
df_testing['Building Age'].fillna(df_testing['Building Age'].mean(), inplace=True)

In [9]:
# view data
df_testing.iloc[:,:].head(10)

Unnamed: 0,ADDRESS,Code743Count,Code745Count,Code651Count,Code522Count,Code531Count,Code412Count,LANDVALUE,CALCACRES,Building Age,Storys,Exterior Wall Desc,Year Built
0,2216 JENKINS RD,0,0,0,0,0,0,1215300,56.5,33.0,1.0,STONE,1987.0
1,1808 CITICO AVE,0,0,0,0,0,0,4300,1.0,80.0,1.0,COMPOSITION,1940.0
2,1001 BROAD ST,0,0,0,0,0,0,1426000,1.06,43.0,4.0,CONC/TILT-UP,1977.0
3,4007 13TH AVE,0,0,0,0,0,0,14600,1.0,100.0,1.0,WOOD FR W SH,1900.0
4,1611 S WATKINS ST,0,0,0,0,0,0,5400,1.0,16.0,1.0,VINYL,2004.0
5,5326 SLAYTON AVE,0,0,0,0,0,0,4500,1.0,57.0,1.0,BRICK,1963.0
6,3601 ROGERS RD,0,0,0,0,0,0,11400,1.0,61.0,1.0,BRICK,1959.0
7,4118 FAGAN ST,0,0,0,0,0,0,3500,1.0,62.0,1.0,BRICK,1958.0
8,1700 STANFIEL ST,0,0,0,0,0,0,3800,1.0,17.0,1.0,VINYL,2003.0
9,1439 IVY ST,0,0,0,0,0,0,3900,1.0,62.0,1.0,BRICK,1958.0


In [10]:
#create testing array
X_testing = df_testing.iloc[:,1:11].values

In [11]:
# view X_testing 
X_testing[0:5]

array([[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 1.2153e+06, 5.6500e+01, 3.3000e+01, 1.0000e+00],
       [0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 4.3000e+03, 1.0000e+00, 8.0000e+01, 1.0000e+00],
       [0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 1.4260e+06, 1.0600e+00, 4.3000e+01, 4.0000e+00],
       [0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 1.4600e+04, 1.0000e+00, 1.0000e+02, 1.0000e+00],
       [0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 5.4000e+03, 1.0000e+00, 1.6000e+01, 1.0000e+00]])

In [12]:
# predict fire_risk_score_pred
fire_risk_score_pred = regressor.predict(X_testing)

In [13]:
# fire_risk_score_pred and rename column
final_fr_score = df_testing.merge(pd.Series(fire_risk_score_pred).to_frame(), left_index=True, right_index=True)
final_fr_score.rename(columns={0: "Fire Risk Index"}, inplace=True)

In [15]:
# view final df before exporting
final_fr_score.head(10)

Unnamed: 0,ADDRESS,Code743Count,Code745Count,Code651Count,Code522Count,Code531Count,Code412Count,LANDVALUE,CALCACRES,Building Age,Storys,Exterior Wall Desc,Year Built,Fire Risk Index
0,2216 JENKINS RD,0,0,0,0,0,0,1215300,56.5,33.0,1.0,STONE,1987.0,0.68
1,1808 CITICO AVE,0,0,0,0,0,0,4300,1.0,80.0,1.0,COMPOSITION,1940.0,0.424333
2,1001 BROAD ST,0,0,0,0,0,0,1426000,1.06,43.0,4.0,CONC/TILT-UP,1977.0,0.86
3,4007 13TH AVE,0,0,0,0,0,0,14600,1.0,100.0,1.0,WOOD FR W SH,1900.0,0.424254
4,1611 S WATKINS ST,0,0,0,0,0,0,5400,1.0,16.0,1.0,VINYL,2004.0,0.83
5,5326 SLAYTON AVE,0,0,0,0,0,0,4500,1.0,57.0,1.0,BRICK,1963.0,0.284738
6,3601 ROGERS RD,0,0,0,0,0,0,11400,1.0,61.0,1.0,BRICK,1959.0,0.63
7,4118 FAGAN ST,0,0,0,0,0,0,3500,1.0,62.0,1.0,BRICK,1958.0,0.130874
8,1700 STANFIEL ST,0,0,0,0,0,0,3800,1.0,17.0,1.0,VINYL,2003.0,0.376024
9,1439 IVY ST,0,0,0,0,0,0,3900,1.0,62.0,1.0,BRICK,1958.0,0.328663


In [18]:
# download final_df
final_fr_score.to_csv(r'Z:\Workflows\Fire\Fire Risk Scores\data\final_fr_score.csv',index=False)