In [1]:
import pymongo
from pymongo import MongoClient
import time
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import csv
from sklearn import preprocessing
from nbi import *

In [2]:
collection = connectToNBI("bridges","mongodb://research:superSMART1%3A%3A@ist177a-mongo.ist.unomaha.edu/admin")

In [20]:
code_state_mapping = {'25':'MA',
                            '04':'AZ',
                            '08':'CO',
                            '38':'ND',
                            '09':'CT',
                            '19':'IA',
                            '26':'MI',
                            '48':'TX',
                            '35':'NM',
                            '17':'IL',
                            '51':'VA',
                            '23':'ME',
                            '16':'ID',
                            '36':'NY',
                            '56':'WY',
                            '29':'MO',
                            '39':'OH',
                            '28':'MS',
                            '11':'DC',
                            '21':'KY',
                            '18':'IN',
                            '06':'CA',
                            '47':'TN',
                            '12':'FL',
                            '24':'MD',
                            '34':'NJ',
                            '46':'SD',
                            '13':'GA',
                            '55':'WI',
                            '30':'MT',
                            '54':'WV',
                            '15':'HI',
                            '32':'NV',
                            '37':'NC',
                            '10':'DE',
                            '33':'NH',
                            '44':'RI',
                            '50':'VT',
                            '42':'PA',
                            '05':'AR',
                            '20':'KS',
                            '45':'SC',
                            '22':'LA',
                            '40':'OK',
                            '72':'PR',
                            '41':'OR',
                            '27':'MN',
                            '53':'WA',
                            '01':'AL',
                            '31':'NE',
                            '02':'AK',
                            '49':'UT'
                   }

In [33]:
# set the initials of the states
#states =  list(code_state_mapping.keys())
states = ['31']
## set the years of the states
years  =  [1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017]
## select fields for the data
fields =  {"_id":0, 
                         "year":1,           # year of the survey
                         "stateCode":1,     
                         "structureNumber":1,
                         "yearBuilt":1,
                         "deck":1,
                         "substructure":1,
                         "superstructure":1,
                         "lengthOfMaximumSpan":1,
                         "kindOfMaterial":"$structureTypeMain.kindOfMaterialDesign",
                         "Structure Type":"$structureTypeMain.typeOfDesignConstruction"
                         
         }

## database name
db = "bridges"

## Connection string
connection_string = "mongodb://research:superSMART1%3A%3A@ist177a-mongo.ist.unomaha.edu/admin"
survey_records = getSurveyRecords(states, years, fields, db, connection_string)

In [34]:
len(survey_records)

415869

In [35]:
survey_records['year'].unique()

array([1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
       2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013,
       2014, 2015, 2016, 2017])

In [36]:
survey_records = survey_records[['structureNumber', 'stateCode','year', 'yearBuilt', 'deck', 'lengthOfMaximumSpan','kindOfMaterial','Structure Type']]

In [37]:
survey_records.head()

Unnamed: 0,structureNumber,stateCode,year,yearBuilt,deck,lengthOfMaximumSpan,kindOfMaterial,Structure Type
0,C000100405,31,1992,1925,5,8.8,1,11
1,C000100305,31,1992,1935,6,9.4,7,2
2,C000100505P,31,1992,1974,8,9.4,3,2
3,C000100305P,31,1992,1935,7,6.7,1,1
4,C000100910,31,1992,1968,7,12.2,1,1


In [38]:
def retMaterialNames(structure_type_coding):
    """ return kind of material name of the structure type code"""
    
    kind_of_material = {
                            1:"Concrete",
                            2:"Concrete Continuous",
                            3:"Steel",
                            4:"Steel Continuous",
                            5:"Prestressed Concrete",
                            6:"Prestressed Concrete Continuous",
                            7:"Wood or Timber",
                            8:"Masonry",
                            9:"Aluminum, Wrought Iron, or Cast Iron",
                            10:"Other",
                       }
    
    material_names = structure_type_coding.map(kind_of_material)
    
    return material_names

In [39]:
survey_records['Kind of Material'] = retMaterialNames(survey_records['kindOfMaterial'])

In [40]:
def retStructureType(structure_type_coding):
    """
      return kind of material name of the structure type code
      :return-type: List
      
    """
    
    
    structure_type = {   
                             1:"Slab",   
                             2:"Stringer/Multi-beam or Girder",
                             3:"Girder and Floorbeam System",
                             4:"Tee Beam",
                             5:"Box Beam or Girders - Multiple",
                             6:"Box Beam or Girders - Single or Spread",
                             7:"Frame (except frame culverts)",
                             8:"Orthotropic",
                             9:"Truss - Deck",
                             10:"Truss - Thru",
                             11:"Arch - Deck",
                             12:"Arch - Thru",
                             13:"Suspension",
                             14:"Stayed Girder",
                             15:"Movable - Lift",
                             16:"Movable - Bascule",
                             17:"Movable - Swing",
                             18:"Tunnel",
                             19:"Culvert (includes frame culverts)",
                             20:"Mixed types",
                             21:"Segmental Box Girder",
                             22:"Channel Beam",
                             0:"Other"
     }

    material_names = structure_type_coding.map(structure_type)
    
    return material_names

In [41]:
survey_records['Structure Type'] = retStructureType(survey_records['Structure Type'])

In [42]:
survey_records.shape

(415869, 9)

### Data Filtering

##### Bridges constructed between 1987 and 1991

In [16]:
#survey_records = survey_records[(survey_records['yearBuilt']>=1987) & (survey_records['yearBuilt'] <=1991)]

#### Test: Bridges constructed between 1987 and 1991

In [18]:
#survey_records.shape

(33163, 9)

In [19]:
#survey_records['yearBuilt'].unique()

array([1987, 1988, 1990, 1991, 1989])

#### Bridges with steel and girder design

In [31]:
# survey_records = survey_records[survey_records['Kind of Material'] == 'Steel']
# survey_records = survey_records[survey_records['Structure Type'] == 'Stringer/Multi-beam or Girder']

In [32]:
survey_records.shape

(139879, 9)

##### Test Kind of Material = Steel

In [43]:
survey_records['Kind of Material'].unique()

array(['Concrete', 'Wood or Timber', 'Steel', 'Prestressed Concrete',
       'Concrete Continuous', 'Steel Continuous',
       'Prestressed Concrete Continuous', 'Masonry',
       'Aluminum, Wrought Iron, or Cast Iron', nan], dtype=object)

#### Test: Structure Type = Stringer/Multi-beam or Girder

In [46]:
survey_records['Structure Type'].unique()

array(['Stringer/Multi-beam or Girder'], dtype=object)

In [44]:
survey_records.columns

(415869, 9)

### Create Timeseries of Bridge Data

In [47]:
def createTimeseries(survey_records):
    """Create time series data from the loose records"""
    
    survey_timeseries = [[key]+[col for col in value] for key, value in {k:[g['stateCode'].tolist(),g['year'].tolist(),g['yearBuilt'].tolist(), g['deck'].tolist(), g['lengthOfMaximumSpan'].tolist(),g["kindOfMaterial"].tolist(),g["Kind of Material"].tolist(), g["Structure Type"].tolist()] for k, g in survey_records.groupby('structureNumber')}.items()]
    # for key, value in {k:[g['Age'].tolist(),g['ADT Type'].tolist(),g['Category'],g['superstructure'].tolist()] for k, g in survey_records.groupby('structureNumber')}.items():
    return survey_timeseries
    

In [48]:
survey_recs  = createTimeseries(survey_records)

In [54]:
len(survey_recs)

17469

In [55]:
structureNumbers_NE = [i[0] for i in survey_recs]

In [58]:
df = pd.DataFrame({'Structure Number':structureNumbers_NE})

In [61]:
df.to_excel('bridges-NE.xlsx')

In [50]:
survey_timeseries_df = pd.DataFrame(survey_recs,  columns=['Structure Number', 'State Code', 'Year Of Survey', 'Year Built','Deck', 'Length Of Maximum Span','kindOfMaterial','StructureType','Kind of Material','Structure Type'])

In [51]:
survey_timeseries_df = survey_timeseries_df[['Structure Number','State Code','Year Of Survey','Year Built', 'Deck', \
                                            'Length Of Maximum Span', 'Kind of Material', 'Structure Type']]

In [52]:
survey_timeseries_df.shape

(9064, 8)

#### Flatten the fields

In [53]:
def all_the_same(elements):
    if not elements:
        return True
    return [elements[0]] * len(elements) == elements

survey_timeseries_df['No Change Span'] = [all_the_same(row) for row in survey_timeseries_df['Length Of Maximum Span']]

In [54]:
survey_timeseries_df['State Code'] = [ x[0] for x in survey_timeseries_df['State Code']]
survey_timeseries_df['Year Built'] = [ x[0] for x in survey_timeseries_df['Year Built']]
survey_timeseries_df['Length Of Maximum Span'] = [ x[0] for x in survey_timeseries_df['Length Of Maximum Span']]
survey_timeseries_df['Kind of Material'] = [ x[0] for x in survey_timeseries_df['Kind of Material']]
survey_timeseries_df['Structure Type'] = [ x[0] for x in survey_timeseries_df['Structure Type']]

### Testing length of the span

In [55]:
survey_timeseries_df['No Change Span'].unique()

array([ True, False])

In [56]:
survey_timeseries_df.shape

(9064, 9)

In [58]:
final_df = survey_timeseries_df[survey_timeseries_df['No Change Span'] == True]

In [59]:
final_df.shape

(6985, 9)

In [60]:
final_df

Unnamed: 0,Structure Number,State Code,Year Of Survey,Year Built,Deck,Length Of Maximum Span,Kind of Material,Structure Type,No Change Span
0,000000000000015,51,"[1992, 1993, 2017]",1987,"[9, 9, 6]",22.3,Steel,Stringer/Multi-beam or Girder,True
1,000000000000117,51,"[2003, 2003, 2004, 2004, 2005, 2005, 2006, 200...",1988,"[7, NA, 7, NA, 7, NA, 7, NA, 7, NA, 7, NA, 7, ...",23.2,Steel,Stringer/Multi-beam or Girder,True
2,000000000000158,51,"[1992, 1993, 1994, 1995, 1996, 1997, 1998, 199...",1987,"[8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, ...",18.0,Steel,Stringer/Multi-beam or Girder,True
4,000000000000502,26,"[2011, 2012, 2013, 2014, 2015, 2016, 2017]",1987,"[7, 7, 7, 7, 7, 7, 7]",22.6,Steel,Stringer/Multi-beam or Girder,True
6,000000000001027,35,"[2009, 2010, 2011, 2012]",1987,"[7, 7, 7, 7]",63.4,Steel,Stringer/Multi-beam or Girder,True
7,000000000001054,37,"[1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999]",1989,"[7, 7, 7, 7, 7, 7, 7, 7]",12.2,Steel,Stringer/Multi-beam or Girder,True
8,000000000001056,42,"[2011, 2012, 2013, 2014, 2015, 2016, 2017]",1987,"[7, 7, 7, 7, 7, 7, 6]",36.6,Steel,Stringer/Multi-beam or Girder,True
9,000000000001130,37,"[1992, 1992, 1993, 1993, 1994, 1994, 1995, 199...",1991,"[9, NA, 9, NA, 7, NA, 7, NA, 7, NA, 7, NA, 6, ...",36.6,Steel,Stringer/Multi-beam or Girder,True
10,000000000001140,37,"[1992, 1992, 1993, 1993, 1994, 1994, 1995, 199...",1991,"[9, NA, 9, NA, 7, NA, 7, NA, 7, NA, 7, NA, 7, ...",29.3,Steel,Stringer/Multi-beam or Girder,True
11,000000000001142,37,"[1992, 1992, 1993, 1993, 1994, 1994, 1995, 199...",1991,"[9, NA, 9, NA, 7, NA, 7, NA, 6, NA, 6, NA, 7, ...",28.3,Steel,Stringer/Multi-beam or Girder,True


In [61]:
final_df.to_csv('steel-girder-1987-1991-bridges-same-span.csv')