This notebook is used to help set up default and input options for the bridge rating flask app.

In [1]:
import numpy as np
import pandas as pd
from sklearn.externals import joblib

In [2]:
df_num = pd.read_csv('../data/bridges_num.csv', index_col=0)
df_cat = pd.read_csv('../data/bridges_cat.csv', index_col=0)
feat_df = pd.read_csv('../data/feat_df.csv', index_col=0)

In [3]:
feat_df.head()

Unnamed: 0,MIN_VERT_CLR_010,DETOUR_KILOS_019,AGE,TRAFFIC_LANES_ON_028A,TRAFFIC_LANES_UND_028B,ADT_029,APPR_WIDTH_MT_032,DEGREES_SKEW_034,NAV_VERT_CLR_MT_039,NAV_HORR_CLR_MT_040,...,SUPERSTRUCTURE_COND_059_6,SUPERSTRUCTURE_COND_059_7,SUPERSTRUCTURE_COND_059_8,SUPERSTRUCTURE_COND_059_9,SUPERSTRUCTURE_COND_059_N,SUBSTRUCTURE_COND_060_4,SUBSTRUCTURE_COND_060_5,SUBSTRUCTURE_COND_060_6,SUBSTRUCTURE_COND_060_7,SUBSTRUCTURE_COND_060_8
51-1VA3701,99.99,2.0,53.0,2.0,0,2000.0,7.9,45.0,0.0,0.0,...,0,0,0,0,0,0,1,0,0,0
51-1VA591A,99.99,4.0,42.0,2.0,0,50.0,6.5,0.0,0.0,0.0,...,0,0,0,0,0,0,0,1,0,0
51-2VA00AA,99.99,22.0,60.0,2.0,0,2000.0,6.1,0.0,0.0,0.0,...,0,1,0,0,0,0,1,0,0,0
51-2VA00CC,99.99,6.0,60.0,2.0,0,1000.0,7.3,0.0,0.0,0.0,...,1,0,0,0,0,0,0,1,0,0
51-2VA00DD,99.99,12.0,60.0,2.0,0,500.0,6.1,0.0,0.0,0.0,...,1,0,0,0,0,0,1,0,0,0


In [4]:
# numerical values reserved for input
num_items = [
    'SUFFICIENCY_RATING_feat_yr'
]

In [5]:
# categorical values reserved for app input
cat_items = [
    'SUPERSTRUCTURE_COND_059',
    'DECK_STRUCTURE_TYPE_107', 
    'STRUCTURE_KIND_043A', 
    'SURFACE_TYPE_108A', 
    'HISTORY_037',
    'FUNCTIONAL_CLASS_026', 
    'OPEN_CLOSED_POSTED_041',
    'SERVICE_UND_042B',
    'DECK_PROTECTION_108C', 
    'MEMBRANE_TYPE_108B'
]

# Default Values

In [6]:
# initiate default series data
defaults = pd.Series(data=[0]*len(feat_df.columns), index=feat_df.columns)
defaults

MIN_VERT_CLR_010              0
DETOUR_KILOS_019              0
AGE                           0
TRAFFIC_LANES_ON_028A         0
TRAFFIC_LANES_UND_028B        0
ADT_029                       0
APPR_WIDTH_MT_032             0
DEGREES_SKEW_034              0
NAV_VERT_CLR_MT_039           0
NAV_HORR_CLR_MT_040           0
MAIN_UNIT_SPANS_045           0
APPR_SPANS_046                0
HORR_CLR_MT_047               0
MAX_SPAN_LEN_MT_048           0
STRUCTURE_LEN_MT_049          0
LEFT_CURB_MT_050A             0
RIGHT_CURB_MT_050B            0
ROADWAY_WIDTH_MT_051          0
DECK_WIDTH_MT_052             0
VERT_CLR_OVER_MT_053          0
VERT_CLR_UND_054B             0
LAT_UND_MT_055B               0
LEFT_LAT_UND_MT_056           0
RECON_AGE                     0
PERCENT_ADT_TRUCK_109         0
SUFFICIENCY_RATING_feat_yr    0
TOLL_020_1.0                  0
TOLL_020_2.0                  0
TOLL_020_3.0                  0
MAINTENANCE_021_1.0           0
                             ..
MEMBRANE

## Numerical

In [7]:
# specify columns needed for default numerical values
num_col_def = ~df_num.columns.isin(num_items)
# use average numerical values for default input
num_def = df_num.iloc[:, num_col_def].mean()
num_def

MIN_VERT_CLR_010            98.647907
DETOUR_KILOS_019            27.702257
AGE                         52.727883
TRAFFIC_LANES_ON_028A        2.178157
TRAFFIC_LANES_UND_028B       0.782184
ADT_029                   8298.545455
APPR_WIDTH_MT_032            8.782001
DEGREES_SKEW_034            13.001220
NAV_VERT_CLR_MT_039          0.156681
NAV_HORR_CLR_MT_040          0.437157
MAIN_UNIT_SPANS_045          2.597926
APPR_SPANS_046               0.342892
HORR_CLR_MT_047              8.366626
MAX_SPAN_LEN_MT_048         15.430506
STRUCTURE_LEN_MT_049        51.870897
LEFT_CURB_MT_050A            0.335937
RIGHT_CURB_MT_050B           0.345149
ROADWAY_WIDTH_MT_051         8.642099
DECK_WIDTH_MT_052            9.885906
VERT_CLR_OVER_MT_053        98.916571
VERT_CLR_UND_054B            1.377322
LAT_UND_MT_055B              1.128920
LEFT_LAT_UND_MT_056          2.551556
RECON_AGE                   49.061623
PERCENT_ADT_TRUCK_109        4.114598
dtype: float64

In [8]:
# fill default series with numerical defaults
defaults[num_def.index] = num_def

## Categorical

In [9]:
# specify columns needed for default categorical values
cat_col_def = ~df_cat.columns.isin(cat_items)
# use most frequent categorical values as default input
cat_def = df_cat.loc[:, cat_col_def].mode()
# one-hot encode categorical data as strings
cat_def = pd.get_dummies(cat_def.astype(str))
# convert to series
cat_def = pd.Series(cat_def.values[0], index=cat_def.columns)
cat_def

TOLL_020_3.0                1
MAINTENANCE_021_1.0         1
DESIGN_LOAD_031_6.0         1
MEDIAN_CODE_033_0.0         1
STRUCTURE_FLARED_035_0.0    1
RAILINGS_036A_0             1
TRANSITIONS_036B_0          1
APPR_RAIL_036C_1            1
APPR_RAIL_END_036D_0        1
NAVIGATION_038_0            1
SERVICE_ON_042A_1           1
STRUCTURE_TYPE_043B_2       1
APPR_KIND_044A_0.0          1
APPR_TYPE_044B_0.0          1
DECK_COND_058_6             1
SUBSTRUCTURE_COND_060_6     1
dtype: uint8

In [10]:
# fill default series with categorical defaults
defaults[cat_def.index] = cat_def

In [11]:
# export default series
defaults.to_csv('default_feats.csv')

# Input Value Options

In [12]:
input_options = {}

## Sufficiency Rating

In [13]:
suff_rtg = {
    'min': df_num['SUFFICIENCY_RATING_feat_yr'].min(),
    'max': df_num['SUFFICIENCY_RATING_feat_yr'].max()
}

In [14]:
input_options['Sufficiency Rating'] = suff_rtg

## 59 - Superstructure Condition Rating

In [15]:
sorted(df_cat['SUPERSTRUCTURE_COND_059'].unique())

['4', '5', '6', '7', '8', '9', 'N']

In [16]:
sup_rtg = {
    '4 - Poor': 4,
    '5 - Fair': 5,
    '6 - Satisfactory': 6,
    '7 - Good': 7,
    '8 - Very Good': 8,
    '9 - Excellent': 9,
    'Not Applicable': 'N'
}

In [17]:
input_options['Item 59: Superstructure Condition Rating'] = sup_rtg

## 107 - Deck Structure Type

In [18]:
sorted(df_cat['DECK_STRUCTURE_TYPE_107'].unique())

['1', '2', '3', '5', '6', '8', '9', 'N']

In [19]:
deck_type = {
    'Concrete Cast-in-Place': 1,
    'Concrete Precast Panels': 2,
    'Open Grating': 3,
    'Steel plate': 5,
    'Corrugated Steel': 6,
    'Wood or Timber': 8,
    'Other': 9,
    'Not applicable': 'N'
}

In [20]:
input_options['Item 107: Deck Type']= deck_type

## 43A - Structure Type

In [21]:
sorted(df_cat['STRUCTURE_KIND_043A'].unique())

[1, 2, 3, 4, 5, 6, 7, 8, 9]

In [22]:
struct_type = {
    'Concrete': 1,
    'Concrete continuous': 2,
    'Steel': 3,
    'Steel continuous': 4,
    'Prestressed concrete': 5,
    'Prestressed concrete continuous': 6,
    'Wood or Timber': 7,
    'Masonry': 8,
    'Aluminum or Iron': 9
}

In [23]:
input_options['Item 43A: Structure Type'] = struct_type

## 108A - Surface Type

In [24]:
sorted(df_cat['SURFACE_TYPE_108A'].unique())

['0', '1', '2', '3', '5', '6', '7', '8', '9', 'N']

In [25]:
surf_type = {
    'None': 0,
    'Monolithic Concrete': 1,
    'Integral Concrete': 2,
    'Latex Concrete': 3,
    'Epoxy Overlay': 5,
    'Bituminous': 6,
    'Wood or Timber': 7,
    'Gravel': 8,
    'Other': 9,
    'Not applicable': 'N'
}

In [26]:
input_options['Item 108A: Surface Type'] = surf_type

## 37 - History

In [27]:
sorted(df_cat['HISTORY_037'].unique())

[1.0, 2.0, 3.0, 4.0, 5.0]

In [28]:
hist = {
    'Registered as historic': 1,
    'Eligible to register': 2,
    'Possibly eligible': 3,
    'Significance indeterminable': 4,
    'Not eligible': 5
}

In [29]:
input_options['Item 37: History'] = hist

## 26 - Functional Class

In [30]:
sorted(df_cat['FUNCTIONAL_CLASS_026'].unique())

[1.0, 2.0, 6.0, 7.0, 8.0, 9.0, 11.0, 12.0, 14.0, 16.0, 17.0, 19.0]

In [31]:
fun_class = {
    'Rural Intrst. Arterial': 1,
    'Rural Other Arterial': 2,
    'Rural Minor Arterial': 6,
    'Rural Major Collector': 7,
    'Rural Minor Collector': 8,
    'Rural Local': 9,
    'Urban Intrst. Arterial': 11,
    'Urban Exprswy. Arterial': 12,
    'Urban Other Arterial': 14,
    'Urban Minor Arterial': 16,
    'Urban Collector': 17,
    'Urban Local': 19
}

In [32]:
input_options['Item 26: Functional Class'] = fun_class

## 41 - Load Posting

In [33]:
sorted(df_cat['OPEN_CLOSED_POSTED_041'].unique())

['A', 'D', 'P', 'R']

In [34]:
post = {
    'Open': 'A',
    'Open, with shoring': 'D',
    'Posted for load': 'P',
    'Posted for other': 'R'
}

In [35]:
input_options['Item 41: Load Posting'] = post

## 42B - Service under Bridge

In [36]:
sorted(df_cat['SERVICE_UND_042B'].unique())

[0, 1, 2, 3, 4, 5, 6, 7, 8]

In [37]:
serv_und = {
    'Other': 0,
    'Highway': 1,
    'Railroad': 2,
    'Pedestrian-bicycle': 3,
    'Highway-railroad': 4,
    'Waterway': 5,
    'Highway-waterway': 6,
    'Railroad-waterway': 7,
    'Highway-waterway-railroad': 8
}

In [38]:
input_options['Item 42B: Service under Bridge'] = serv_und

## 108B - Membrane Type

In [39]:
sorted(df_cat['MEMBRANE_TYPE_108B'].unique())

['0', '1', '2', '3', '8', '9', 'N']

In [40]:
mem_type = {
    'None': 0,
    'Built-up': 1,
    'Preformed Fabric': 2,
    'Epoxy': 3,
    'Unknown': 8,
    'Other': 9,
    'Not Applicable': 'N'
}

In [41]:
input_options['Item 108B: Membrane Type'] = mem_type

## 108C - Deck Protection

In [42]:
sorted(df_cat['DECK_PROTECTION_108C'].unique())

['0', '1', '3', '8', '9', 'N']

In [43]:
deck_prot = {
    'None': 0,
    'Epoxy Coated Reinforcing': 1,
    'Other Coated Reinforcing': 3,
    'Unknown': 8,
    'Other': 9,
    'Not Applicable': 'N'
}

In [44]:
input_options['Item 108C: Deck Protection'] = deck_prot

## Export Input Dictionary

In [46]:
joblib.dump(input_options, 'input_options.sav')

['input_options.sav']

# Test Input

In [33]:
t_input = defaults.copy()

## Numerical Input

In [34]:
# generate sample numerical input values
np.random.seed(0)
num_input = np.round(np.random.uniform(min_rtng, max_rtng, 1), 1)[0]
num_input

66.4

In [35]:
# overide default with numerical input
t_input[t_input.index.isin(num_items)] = num_input

## Categorical Input

In [36]:
# generate sample categorical input values
np.random.seed(0)
cat_input = [np.random.choice(list(df_cat[i].unique())) for i in cat_items]
# create categorical input dataframe
cat_input = pd.DataFrame(data=[cat_input], columns=cat_items)
# one-hot encode categorical data as strings
cat_input = pd.get_dummies(cat_input.astype(str))
# convert to series
cat_input = pd.Series(cat_input.values[0], index=cat_input.columns)
cat_input

SUPERSTRUCTURE_COND_059_4    1
DECK_STRUCTURE_TYPE_107_6    1
STRUCTURE_KIND_043A_7        1
SURFACE_TYPE_108A_0          1
HISTORY_037_2.0              1
FUNCTIONAL_CLASS_026_8.0     1
OPEN_CLOSED_POSTED_041_D     1
SERVICE_UND_042B_0           1
DECK_PROTECTION_108C_1       1
MEMBRANE_TYPE_108B_2         1
dtype: uint8

In [42]:
# overide default with categorical input
t_input[cat_input.index] = cat_input

## Run Model

In [62]:
t_input

MIN_VERT_CLR_010                98.647907
DETOUR_KILOS_019                27.702257
AGE                             52.727883
TRAFFIC_LANES_ON_028A            2.178157
TRAFFIC_LANES_UND_028B           0.782184
ADT_029                       8298.545455
APPR_WIDTH_MT_032                8.782001
DEGREES_SKEW_034                13.001220
NAV_VERT_CLR_MT_039              0.156681
NAV_HORR_CLR_MT_040              0.437157
MAIN_UNIT_SPANS_045              2.597926
APPR_SPANS_046                   0.342892
HORR_CLR_MT_047                  8.366626
MAX_SPAN_LEN_MT_048             15.430506
STRUCTURE_LEN_MT_049            51.870897
LEFT_CURB_MT_050A                0.335937
RIGHT_CURB_MT_050B               0.345149
ROADWAY_WIDTH_MT_051             8.642099
DECK_WIDTH_MT_052                9.885906
VERT_CLR_OVER_MT_053            98.916571
VERT_CLR_UND_054B                1.377322
LAT_UND_MT_055B                  1.128920
LEFT_LAT_UND_MT_056              2.551556
RECON_AGE                       49

In [66]:
# load saved model
model = joblib.load('../model_output/model.sav')

In [68]:
model.predict([t_input])

array(['not poor'], dtype=object)

In [69]:
model.classes_

array(['not poor', 'poor'], dtype=object)

In [70]:
model.predict_proba([t_input])

array([[0.93019513, 0.06980487]])