This notebook is used to help set up default and input options for the bridge rating flask app.

In [1]:
import numpy as np
import pandas as pd
import json
from sklearn.externals import joblib

In [2]:
df_num = pd.read_csv('../data/bridges_num.csv', index_col=0)
df_cat = pd.read_csv('../data/bridges_cat.csv', index_col=0)
feat_df = pd.read_csv('../data/feat_df.csv', index_col=0)

In [3]:
# numerical values reserved for app input
num_items = [
    'SUFFICIENCY_RATING_feat_yr'
]

In [4]:
# categorical values reserved for app input
cat_items = [
    'FUNCTIONAL_CLASS_026', 
    'HISTORY_037',
    'OPEN_CLOSED_POSTED_041',
    'SERVICE_UND_042B',
    'STRUCTURE_KIND_043A', 
    'SUPERSTRUCTURE_COND_059',
    'DECK_STRUCTURE_TYPE_107', 
    'SURFACE_TYPE_108A', 
    'MEMBRANE_TYPE_108B', 
    'DECK_PROTECTION_108C'
]

# Default Values

In [5]:
# initiate blank series for model input
input_series = pd.Series(data=[0]*len(feat_df.columns), index=feat_df.columns)
input_series

MIN_VERT_CLR_010              0
DETOUR_KILOS_019              0
AGE                           0
TRAFFIC_LANES_ON_028A         0
TRAFFIC_LANES_UND_028B        0
ADT_029                       0
APPR_WIDTH_MT_032             0
DEGREES_SKEW_034              0
NAV_VERT_CLR_MT_039           0
NAV_HORR_CLR_MT_040           0
MAIN_UNIT_SPANS_045           0
APPR_SPANS_046                0
HORR_CLR_MT_047               0
MAX_SPAN_LEN_MT_048           0
STRUCTURE_LEN_MT_049          0
LEFT_CURB_MT_050A             0
RIGHT_CURB_MT_050B            0
ROADWAY_WIDTH_MT_051          0
DECK_WIDTH_MT_052             0
VERT_CLR_OVER_MT_053          0
VERT_CLR_UND_054B             0
LAT_UND_MT_055B               0
LEFT_LAT_UND_MT_056           0
RECON_AGE                     0
PERCENT_ADT_TRUCK_109         0
SUFFICIENCY_RATING_feat_yr    0
TOLL_020_1.0                  0
TOLL_020_2.0                  0
TOLL_020_3.0                  0
MAINTENANCE_021_1.0           0
                             ..
MEMBRANE

In [6]:
# save as json file
# input_series.to_json('input_series.json')

## Numerical

In [7]:
# use average numerical values for default input
num_def = df_num.mean()
num_def

MIN_VERT_CLR_010                98.647907
DETOUR_KILOS_019                27.702257
AGE                             52.727883
TRAFFIC_LANES_ON_028A            2.178157
TRAFFIC_LANES_UND_028B           0.782184
ADT_029                       8298.545455
APPR_WIDTH_MT_032                8.782001
DEGREES_SKEW_034                13.001220
NAV_VERT_CLR_MT_039              0.156681
NAV_HORR_CLR_MT_040              0.437157
MAIN_UNIT_SPANS_045              2.597926
APPR_SPANS_046                   0.342892
HORR_CLR_MT_047                  8.366626
MAX_SPAN_LEN_MT_048             15.430506
STRUCTURE_LEN_MT_049            51.870897
LEFT_CURB_MT_050A                0.335937
RIGHT_CURB_MT_050B               0.345149
ROADWAY_WIDTH_MT_051             8.642099
DECK_WIDTH_MT_052                9.885906
VERT_CLR_OVER_MT_053            98.916571
VERT_CLR_UND_054B                1.377322
LAT_UND_MT_055B                  1.128920
LEFT_LAT_UND_MT_056              2.551556
RECON_AGE                       49

In [8]:
# save as json file
# num_def.to_json('num_def.json')

## Categorical

In [9]:
# use most frequent categorical values as default input
cat_def = df_cat.mode()
# convert to series
cat_def = pd.Series(cat_def.values[0], index=cat_def.columns)
cat_def

TOLL_020                   3
MAINTENANCE_021            1
FUNCTIONAL_CLASS_026       9
DESIGN_LOAD_031            6
MEDIAN_CODE_033            0
STRUCTURE_FLARED_035       0
RAILINGS_036A              0
TRANSITIONS_036B           0
APPR_RAIL_036C             1
APPR_RAIL_END_036D         0
HISTORY_037                5
NAVIGATION_038             0
OPEN_CLOSED_POSTED_041     A
SERVICE_ON_042A            1
SERVICE_UND_042B           5
STRUCTURE_KIND_043A        3
STRUCTURE_TYPE_043B        2
APPR_KIND_044A             0
APPR_TYPE_044B             0
DECK_STRUCTURE_TYPE_107    1
SURFACE_TYPE_108A          6
MEMBRANE_TYPE_108B         0
DECK_PROTECTION_108C       0
DECK_COND_058              6
SUPERSTRUCTURE_COND_059    6
SUBSTRUCTURE_COND_060      6
dtype: object

In [10]:
# save as json file
# cat_def.to_json('cat_def.json')

# Input Value Options

In [11]:
input_options = {}

## Sufficiency Rating

In [12]:
suff_rtg = {
    'min': df_num['SUFFICIENCY_RATING_feat_yr'].min(),
    'max': df_num['SUFFICIENCY_RATING_feat_yr'].max()
}

In [13]:
input_options['Sufficiency Rating'] = suff_rtg

## 26 - Functional Class

In [14]:
sorted(df_cat['FUNCTIONAL_CLASS_026'].unique())

[1.0, 2.0, 6.0, 7.0, 8.0, 9.0, 11.0, 12.0, 14.0, 16.0, 17.0, 19.0]

In [15]:
fun_class = {
    'Rural Intrst. Arterial': 1.0,
    'Rural Other Arterial': 2.0,
    'Rural Minor Arterial': 6.0,
    'Rural Major Collector': 7.0,
    'Rural Minor Collector': 8.0,
    'Rural Local': 9.0,
    'Urban Intrst. Arterial': 11.0,
    'Urban Exprswy. Arterial': 12.0,
    'Urban Other Arterial': 14.0,
    'Urban Minor Arterial': 16.0,
    'Urban Collector': 17.0,
    'Urban Local': 19.0
}

In [16]:
input_options['Item 26: Functional Class'] = fun_class

## 37 - History

In [17]:
sorted(df_cat['HISTORY_037'].unique())

[1.0, 2.0, 3.0, 4.0, 5.0]

In [18]:
hist = {
    'Registered as historic': 1.0,
    'Eligible to register': 2.0,
    'Possibly eligible': 3.0,
    'Significance indeterminable': 4.0,
    'Not eligible': 5.0
}

In [19]:
input_options['Item 37: History'] = hist

## 41 - Load Posting

In [20]:
sorted(df_cat['OPEN_CLOSED_POSTED_041'].unique())

['A', 'D', 'P', 'R']

In [21]:
post = {
    'Open': 'A',
    'Open, with shoring': 'D',
    'Posted for load': 'P',
    'Posted for other': 'R'
}

In [22]:
input_options['Item 41: Load Posting'] = post

## 42B - Service under Bridge

In [23]:
sorted(df_cat['SERVICE_UND_042B'].unique())

[0, 1, 2, 3, 4, 5, 6, 7, 8]

In [24]:
serv_und = {
    'Other': 0,
    'Highway': 1,
    'Railroad': 2,
    'Pedestrian-bicycle': 3,
    'Highway-railroad': 4,
    'Waterway': 5,
    'Highway-waterway': 6,
    'Railroad-waterway': 7,
    'Highway-waterway-railroad': 8
}

In [25]:
input_options['Item 42B: Service under Bridge'] = serv_und

## 43A - Structure Type

In [26]:
sorted(df_cat['STRUCTURE_KIND_043A'].unique())

[1, 2, 3, 4, 5, 6, 7, 8, 9]

In [27]:
struct_type = {
    'Concrete': 1,
    'Concrete continuous': 2,
    'Steel': 3,
    'Steel continuous': 4,
    'Prestressed concrete': 5,
    'Prestressed concrete continuous': 6,
    'Wood or Timber': 7,
    'Masonry': 8,
    'Aluminum or Iron': 9
}

In [28]:
input_options['Item 43A: Structure Type'] = struct_type

## 59 - Superstructure Condition Rating

In [29]:
sorted(df_cat['SUPERSTRUCTURE_COND_059'].unique())

['4', '5', '6', '7', '8', '9', 'N']

In [30]:
sup_rtg = {
    '4 - Poor': 4,
    '5 - Fair': 5,
    '6 - Satisfactory': 6,
    '7 - Good': 7,
    '8 - Very Good': 8,
    '9 - Excellent': 9,
    'Not Applicable': 'N'
}

In [31]:
input_options['Item 59: Superstructure Condition Rating'] = sup_rtg

## 107 - Deck Structure Type

In [32]:
sorted(df_cat['DECK_STRUCTURE_TYPE_107'].unique())

['1', '2', '3', '5', '6', '8', '9', 'N']

In [33]:
deck_type = {
    'Concrete Cast-in-Place': 1,
    'Concrete Precast Panels': 2,
    'Open Grating': 3,
    'Steel plate': 5,
    'Corrugated Steel': 6,
    'Wood or Timber': 8,
    'Other': 9,
    'Not applicable': 'N'
}

In [34]:
input_options['Item 107: Deck Type'] = deck_type

## 108A - Surface Type

In [35]:
sorted(df_cat['SURFACE_TYPE_108A'].unique())

['0', '1', '2', '3', '5', '6', '7', '8', '9', 'N']

In [36]:
surf_type = {
    'None': 0,
    'Monolithic Concrete': 1,
    'Integral Concrete': 2,
    'Latex Concrete': 3,
    'Epoxy Overlay': 5,
    'Bituminous': 6,
    'Wood or Timber': 7,
    'Gravel': 8,
    'Other': 9,
    'Not applicable': 'N'
}

In [37]:
input_options['Item 108A: Surface Type'] = surf_type

## 108B - Membrane Type

In [38]:
sorted(df_cat['MEMBRANE_TYPE_108B'].unique())

['0', '1', '2', '3', '8', '9', 'N']

In [39]:
mem_type = {
    'None': 0,
    'Built-up': 1,
    'Preformed Fabric': 2,
    'Epoxy': 3,
    'Unknown': 8,
    'Other': 9,
    'Not Applicable': 'N'
}

In [40]:
input_options['Item 108B: Membrane Type'] = mem_type

## 108C - Deck Protection

In [41]:
sorted(df_cat['DECK_PROTECTION_108C'].unique())

['0', '1', '3', '8', '9', 'N']

In [42]:
deck_prot = {
    'None': 0,
    'Epoxy Coated Reinforcing': 1,
    'Other Coated Reinforcing': 3,
    'Unknown': 8,
    'Other': 9,
    'Not Applicable': 'N'
}

In [43]:
input_options['Item 108C: Deck Protection'] = deck_prot

## Export Input Dictionary

In [44]:
# export input options dictionary
# with open('input_options.json', 'w') as outfile:
#     json.dump(input_options, outfile)

# Test Input

## Load

In [45]:
# load input options
with open('input_options.json', 'r') as f:
    input_options = json.load(f)
# load blank input series
input_series = pd.read_json('input_series.json', typ='series')
# load numerical default values
num_feat = pd.read_json('num_def.json', typ='series')
# load categorical default values
cat_feat = pd.read_json('cat_def.json', typ='series')

## Categorical Input

In [46]:
# generate sample categorical input values
np.random.seed(0)
cat_input = [np.random.choice(list(input_options[i].keys())) for i in list(input_options.keys())[1:]]
cat_input

['Rural Local',
 'Registered as historic',
 'Posted for other',
 'Pedestrian-bicycle',
 'Masonry',
 '5 - Fair',
 'Steel plate',
 'Bituminous',
 'Preformed Fabric',
 'Other']

## Numerical Input

In [47]:
# generate sample numerical input values
np.random.seed(0)
num_input = np.round(np.random.uniform(
    input_options['Sufficiency Rating']['min'], 
    input_options['Sufficiency Rating']['max'], 
    1
), 1)[0]
num_input

66.4

## Process Input

### Categorical

In [48]:
# convert categorical string input to code value
cat_input = pd.Series([input_options[i][j] for i, j in zip(list(input_options.keys())[1:], cat_input)], index=cat_items)
cat_input

FUNCTIONAL_CLASS_026       9
HISTORY_037                1
OPEN_CLOSED_POSTED_041     R
SERVICE_UND_042B           3
STRUCTURE_KIND_043A        8
SUPERSTRUCTURE_COND_059    5
DECK_STRUCTURE_TYPE_107    5
SURFACE_TYPE_108A          6
MEMBRANE_TYPE_108B         2
DECK_PROTECTION_108C       9
dtype: object

In [49]:
# override default values with categorical input
cat_feat[cat_input.index] = cat_input
cat_feat

TOLL_020                   3
MAINTENANCE_021            1
FUNCTIONAL_CLASS_026       9
DESIGN_LOAD_031            6
MEDIAN_CODE_033            0
STRUCTURE_FLARED_035       0
RAILINGS_036A              0
TRANSITIONS_036B           0
APPR_RAIL_036C             1
APPR_RAIL_END_036D         0
HISTORY_037                1
NAVIGATION_038             0
OPEN_CLOSED_POSTED_041     R
SERVICE_ON_042A            1
SERVICE_UND_042B           3
STRUCTURE_KIND_043A        8
STRUCTURE_TYPE_043B        2
APPR_KIND_044A             0
APPR_TYPE_044B             0
DECK_STRUCTURE_TYPE_107    5
SURFACE_TYPE_108A          6
MEMBRANE_TYPE_108B         2
DECK_PROTECTION_108C       9
DECK_COND_058              6
SUPERSTRUCTURE_COND_059    5
SUBSTRUCTURE_COND_060      6
dtype: object

In [50]:
# encode input values
cat_feat_enc = pd.Series(1, index=pd.get_dummies(pd.DataFrame(cat_feat).T).columns)
cat_feat_enc

TOLL_020_3.0                 1
MAINTENANCE_021_1.0          1
FUNCTIONAL_CLASS_026_9.0     1
DESIGN_LOAD_031_6.0          1
MEDIAN_CODE_033_0.0          1
STRUCTURE_FLARED_035_0.0     1
RAILINGS_036A_0              1
TRANSITIONS_036B_0           1
APPR_RAIL_036C_1             1
APPR_RAIL_END_036D_0         1
HISTORY_037_1.0              1
NAVIGATION_038_0             1
OPEN_CLOSED_POSTED_041_R     1
SERVICE_ON_042A_1            1
SERVICE_UND_042B_3           1
STRUCTURE_KIND_043A_8        1
STRUCTURE_TYPE_043B_2        1
APPR_KIND_044A_0.0           1
APPR_TYPE_044B_0.0           1
DECK_STRUCTURE_TYPE_107_5    1
SURFACE_TYPE_108A_6          1
MEMBRANE_TYPE_108B_2         1
DECK_PROTECTION_108C_9       1
DECK_COND_058_6              1
SUPERSTRUCTURE_COND_059_5    1
SUBSTRUCTURE_COND_060_6      1
dtype: int64

### Numerical

In [51]:
# override default with numerical input
if num_input:
    num_feat['SUFFICIENCY_RATING_feat_yr'] =  num_input
num_feat

MIN_VERT_CLR_010                98.647907
DETOUR_KILOS_019                27.702257
AGE                             52.727883
TRAFFIC_LANES_ON_028A            2.178157
TRAFFIC_LANES_UND_028B           0.782184
ADT_029                       8298.545455
APPR_WIDTH_MT_032                8.782001
DEGREES_SKEW_034                13.001220
NAV_VERT_CLR_MT_039              0.156681
NAV_HORR_CLR_MT_040              0.437157
MAIN_UNIT_SPANS_045              2.597926
APPR_SPANS_046                   0.342892
HORR_CLR_MT_047                  8.366626
MAX_SPAN_LEN_MT_048             15.430506
STRUCTURE_LEN_MT_049            51.870897
LEFT_CURB_MT_050A                0.335937
RIGHT_CURB_MT_050B               0.345149
ROADWAY_WIDTH_MT_051             8.642099
DECK_WIDTH_MT_052                9.885906
VERT_CLR_OVER_MT_053            98.916571
VERT_CLR_UND_054B                1.377322
LAT_UND_MT_055B                  1.128920
LEFT_LAT_UND_MT_056              2.551556
RECON_AGE                       49

## Run Model

In [52]:
# override blank input series
input_series[num_feat.index] = num_feat
input_series[cat_feat_enc.index] = cat_feat_enc

In [53]:
input_series

MIN_VERT_CLR_010                98.647907
DETOUR_KILOS_019                27.702257
AGE                             52.727883
TRAFFIC_LANES_ON_028A            2.178157
TRAFFIC_LANES_UND_028B           0.782184
ADT_029                       8298.545455
APPR_WIDTH_MT_032                8.782001
DEGREES_SKEW_034                13.001220
NAV_VERT_CLR_MT_039              0.156681
NAV_HORR_CLR_MT_040              0.437157
MAIN_UNIT_SPANS_045              2.597926
APPR_SPANS_046                   0.342892
HORR_CLR_MT_047                  8.366626
MAX_SPAN_LEN_MT_048             15.430506
STRUCTURE_LEN_MT_049            51.870897
LEFT_CURB_MT_050A                0.335937
RIGHT_CURB_MT_050B               0.345149
ROADWAY_WIDTH_MT_051             8.642099
DECK_WIDTH_MT_052                9.885906
VERT_CLR_OVER_MT_053            98.916571
VERT_CLR_UND_054B                1.377322
LAT_UND_MT_055B                  1.128920
LEFT_LAT_UND_MT_056              2.551556
RECON_AGE                       49

In [54]:
# load saved model
model = joblib.load('../model_output/model.sav')

In [60]:
print(model.predict([input_series])[0])

not poor


In [56]:
model.classes_

array(['not poor', 'poor'], dtype=object)

In [57]:
model.predict_proba([input_series])

array([[0.99563995, 0.00436005]])