In [2]:
import pymongo
from pymongo import MongoClient
import time
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import csv

In [392]:
def connectToNBI(collection_name,string):
    """Connects to NBI mongodb instance and returns a collection"""
    Client = MongoClient(string)
    db = Client.nbi
    collection = db[collection_name]
    return collection

def getSurveyRecords(states, years, collection_name):
    """returns survey records of provided states, years, and collection. need one more argument for fields"""
    masterdec = []
    for yr in years:
        for state in states:
            pipeline = [{"$match":{"$and":[{"year":yr},{"stateCode":state}]}},
                        {"$project":{"_id":0,
                         "year":1,
                         "stateCode":1, 
                         "countyCode":1,
                         "structureNumber":1,
                         "yearBuilt":1,
                         "averageDailyTraffic":1,
                         "deck":1,
                         "substructure":1, ## rating of substructure
                         "superstructure":1, ## rating of superstructure
                         "Structure Type":"$structureTypeMain.kindOfMaterialDesign",
                         "Type of Wearing Surface":"$wearingSurface/ProtectiveSystem.deckProtection",
                         }}]
            dec = collection.aggregate(pipeline)

            for i in list(dec):
                masterdec.append(i)
    survey_records = pd.DataFrame(masterdec)
    return survey_records

def retMaterialNames(structure_type_coding):
    """ return kind of material name of the structure type code"""
    
    kind_of_material = {
                            1:"Concrete",
                            2:"Concrete Continuous",
                            3:"Steel",
                            4:"Steel Continuous",
                            5:"Prestressed Concrete",
                            6:"Prestressed Concrete Continuous",
                            7:"Wood or Timber",
                            8:"Masonry",
                            9:"Aluminum, Wrought Iron, or Cast Iron",
                            10:"Other",
                       }
    
    material_names = structure_type_coding.map(kind_of_material)
    
    return material_names

def retDeckProctectionNames(type_of_wearing_surface_coding):
    """ return deck protection name of the type of wearing code """
    
    deck_protection = {
                        '1':'Epoxy Coated Reinforcing',
                        '2':'Galvanized Reinforcing',
                        '3':'Other Coated Reinforcing',
                        '4':'Cathodic Protection',
                        '6':'Polymer Impregnated',
                        '7':'Internally Sealed',
                        '8':'Unknown',
                        '9':'Other',
                        '0':'None',
                        'N':'Not Applicable'
        
                      }
    
    deck_protection_names = type_of_wearing_surface_coding.map(deck_protection)
    
    return deck_protection_names

def filterSurveyData(survey_records):
    """ returns filtred survey records, length of survey records before filteration and after filteration """   
    
    before_filtr = len(survey_records) ## Length of survey record before filtering
    
    ## Filtring Criteria for  deck, substructure and superstructure
    survey_records = survey_records.loc[~survey_records['deck'].isin(['N','NA'])]
    survey_records = survey_records.loc[~survey_records['substructure'].isin(['N','NA'])]
    survey_records = survey_records.loc[~survey_records['superstructure'].isin(['N','NA'])]
    
    ## discards survey records of Structure type - 19  and Type of Wearing Surface - 6
    survey_records = survey_records.loc[~survey_records['Structure Type'].isin([19])]
    survey_records = survey_records.loc[~survey_records['Type of Wearing Surface'].isin(['6'])]
  

    after_filtr = len(survey_records) ## Length of survey record before filtering
    
    return survey_records, before_filtr, after_filtr

def getAges(bridgeSurveyYear, builtSurveyYearBuilt):
    """ returns age of bridges """
    return bridgeSurveyYear - builtSurveyYearBuilt


def categorizeBridgesByADT(ADT): 
    """ returns A list of class of the bridge as define by Author in so and so """
    class_of_bridges_adt = []
    for adt in ADT:
        if adt < 100:
            class_of_bridges_adt.append('Very Light')     
        elif 100 <= adt < 1000:
            class_of_bridges_adt.append('Light')
        elif 1000 <= adt < 5000:
            class_of_bridges_adt.append('Moderate')
        elif 5000 <= adt:
            class_of_bridges_adt.append('Heavy')
        else:
            class_of_bridges_adt.append('IDK')
    return class_of_bridges_adt

def countCats(list_of_classes):
    """ returns a dictionary of count of all categories of the bridges """
    return {cls:list_of_classes.count(cls) for cls in set(list_of_classes)}



def codeToState(list_of_statecode):
    """return a list of name of states """
    code_state_mapping =   {'25':'MA',
                            '04':'AZ',
                            '08':'CO',
                            '38':'ND',
                            '09':'CT',
                            '19':'IA',
                            '26':'MI',
                            '48':'TX',
                            '35':'NM',
                            '17':'IL',
                            '51':'VA',
                            '23':'ME',
                            '16':'ID',
                            '36':'NY',
                            '56':'WY',
                            '29':'MO',
                            '39':'OH',
                            '28':'MS',
                            '11':'DC',
                            '21':'KY',
                            '18':'IN',
                            '06':'CA',
                            '47':'TN',
                            '12':'FL',
                            '24':'MD',
                            '34':'NJ',
                            '46':'SD',
                            '13':'GA',
                            '55':'WI',
                            '30':'MT',
                            '54':'WV',
                            '15':'HI',
                            '32':'NV',
                            '37':'NC',
                            '10':'DE',
                            '33':'NH',
                            '44':'RI',
                            '50':'VT',
                            '42':'PA',
                            '05':'AR',
                            '20':'KS',
                            '45':'SC',
                            '22':'LA',
                            '40':'OK',
                            '72':'PR',
                            '41':'OR',
                            '27':'MN',
                            '53':'WA',
                            '01':'AL',
                            '31':'NE',
                            '02':'AK',
                            '49':'UT'
                   }
    
    state_names = [code_state_mapping[statecode] for statecode in  list_of_statecode]
    return state_codes

def stateToCode(list_of_statename):
    """ return a list of state code from state name """
    code_state_mapping =   {'25':'MA',
                            '04':'AZ',
                            '08':'CO',
                            '38':'ND',
                            '09':'CT',
                            '19':'IA',
                            '26':'MI',
                            '48':'TX',
                            '35':'NM',
                            '17':'IL',
                            '51':'VA',
                            '23':'ME',
                            '16':'ID',
                            '36':'NY',
                            '56':'WY',
                            '29':'MO',
                            '39':'OH',
                            '28':'MS',
                            '11':'DC',
                            '21':'KY',
                            '18':'IN',
                            '06':'CA',
                            '47':'TN',
                            '12':'FL',
                            '24':'MD',
                            '34':'NJ',
                            '46':'SD',
                            '13':'GA',
                            '55':'WI',
                            '30':'MT',
                            '54':'WV',
                            '15':'HI',
                            '32':'NV',
                            '37':'NC',
                            '10':'DE',
                            '33':'NH',
                            '44':'RI',
                            '50':'VT',
                            '42':'PA',
                            '05':'AR',
                            '20':'KS',
                            '45':'SC',
                            '22':'LA',
                            '40':'OK',
                            '72':'PR',
                            '41':'OR',
                            '27':'MN',
                            '53':'WA',
                            '01':'AL',
                            '31':'NE',
                            '02':'AK',
                            '49':'UT'
                   }
    
    reverse_code_state_map = {value: key for key, value in code_state_mapping.items()}
    state_names = [code_state_mapping[statename] for statecode in  list_of_statename]
    return state_names

def getDictFipsCounty(fips, counties):
    """return a dictionary of key: fip and  value: county"""
    return {fip:county for fip, county in zip(fips,counties)}
    
def getDictFipsAdp(fips, daily_precps):
    """return a dictionary of key: fips and value: daily_precps"""
    return {fip:dlp for fip, dlp in zip(fips,daily_precps)}
    
def correctFips(stateCodes, FIPS):
    """ return a list of correctFips from FIPS"""
    return [int(str(stateCode) + str(fcode).zfill(3)) for stateCode,fcode in zip(stateCodes,FIPS)]

def getCounty(FIPS, counties):
    """ returns a list of county from FIPS"""
    county = []
    for fip in FIPS:
        try:
            county.append(getCountyFips(FIPS, counties)[int(fip)])
        except:
            county.append("NA")
    return county

def getAverageDailyPrecp(FIPS, daily_precps):
    """ returns a list of average daily precipitation from  FIPS"""
    avg_daily_precp = []
    for fip in FIPS:
        try:
            avg_daily_precp.append(getDailyPrecpOfFips(fips, daily_precps)[int(fip)])
        except:
            avg_daily_precp.append(-1)
    return avg_daily_precp  


def getDict(keys, values):
    """ return a dictionary of key and value"""
    return {key:value for key, value in zip(keys,values)}


################################################## FUNCTIONS TO SPLIT RECORDS (INTERVENTIONS) ####################################################################
def createTimeseries(survey_records):
    """Create time series data from the loose records"""
    survey_timeseries = [[key]+[col for col in value] for key, value in {k:[g['Age'].tolist(),g['ADT Type'].tolist(),g['superstructure'].tolist()] for k, g in survey_records.groupby('structureNumber')}.items()]
    # for key, value in {k:[g['Age'].tolist(),g['ADT Type'].tolist(),g['Category'],g['superstructure'].tolist()] for k, g in survey_records.groupby('structureNumber')}.items():
    return survey_timeseries
    

## 2nd in sequence
def createProfile(data):
    """ this function creates a profile to split records"""
    counter = 0
    profile = [True]
    while counter+1 < len(data):
        if data[counter]<data[counter+1]:
            profile.append(True)
        else:
            profile.append(False)
            profile.append(True)
        counter = counter + 1
    return profile


def utilitySplitBridgeRecords(data, profile):
    """ The ultility function to split records by intervention"""
    counter = 0
    main_list = []
    temp_list = []
    for bval in profile:
        if bval == True:
            temp_list.append(data[counter])
            counter  = counter + 1 
        else:
            main_list.append(temp_list)
            temp_list = []
            
    main_list.append(temp_list)
    return main_list


def splitSurveyRecords(survey_timeseries,profile):
    """return split records of bridge to account intervention like Rebuilt, Reconstruction, and Rehabilitation"""
    temp = []
    for i in survey_timeseries:
        profile = createProfile((i[1]))
        temp_list = []
        temp_list.append(i[0])
        for row in i[1:]:
            split_records = utilitySplitBridgeRecords(row, profile)
            temp_list.append(split_records)
        temp.append(temp_list)
    return temp

## combine function:
def combinedStructureNumberWithRecords(structure_numbers_split_records, s):
    """Combine function of split structure numbers with the rest of the records"""
    combined_records = []
    for h,j in zip(structure_numbers_split_records, s):
        combined_records.append([h]+j[1:])
    return combined_records

def splitStructureNumbers(s):
    structure_numbers_split_records = []
    for i in s:
        len_K = len(i[1])
        structureNumber = i[0]
        structureNumbers = []
        for k in range(len_K):
            stNumber=(structureNumber+'_'+str(k+1))
            structureNumbers.append(stNumber)
        structure_numbers_split_records.append(structureNumbers)
    return structure_numbers_split_records

def createIndividualRecords(survey_records):
    """ create split records from individual records """
    split_by_intervention_survey_records = []
    length_i = len(combined_function[0])
    for i in combined_function:
        length = len(i[1])
        for j in range(length):
            split_temp1 = []
            for k in range(0,length_i):
                split_temp1.append(i[k][j])
            split_by_intervention_survey_records.append(split_temp1)
    return split_by_intervention_survey_records


################################### SLOPES ##########################################################
def computeSlope(AgeFlatList,subsRatingsFlatList):
    """ compute slope of the bridge from its condition rating and age"""
    ## the code goes here
    Slopes = []
    averageSlopes = []
    temp = []

    ## Note subs  = ConditionRatings
    for age, subs in zip(AgeFlatList, subsRatingsFlatList):
        j = 0
        first_pointer = 0
        second_pointer = 1
        temp_1 = []
        finalList = []
        intervention = []
        for i in range(0,len(subs)):
            if second_pointer < len(subs): 
                # if the first pointer Condition Ratings is less that the second Condition Rating number
                if subs[first_pointer] <  subs[second_pointer]:
                    ## split points
        #             print(conditionRating[first_pointer])
                    temp.append(subs[first_pointer])
                    finalList.append(temp_1)
                    temp_1 = []
                    #print(from_to_matrix[str(conditionRating[first_pointer])+'-'+str(conditionRating[second_pointer])])
                    #intervention.append(from_to_matrix[str(subs[first_pointer])+'-'+str(subs[second_pointer])])

                else:
                    temp_1.append(subs[first_pointer])

                first_pointer = first_pointer + 1
                second_pointer = second_pointer + 1

            else:

                temp_1.append(subs[first_pointer])

                finalList.append(temp_1)

        slopes_of_the_bridges = []
        for rating in finalList:
            if len(rating) != 0:
                try:
                    slopes_of_the_bridges.append(((int(rating[-1]) - int(rating[0])) / len(rating))*len(rating))
                except:
                    pass
                    #print(rating[-1])
            else:
                pass
        #print(slopes_of_the_bridges)
        count = 0
        for i in finalList:
            for j in i:
                count = count + 1
        Slopes.append(sum(slopes_of_the_bridges) / count)
    return Slopes



############################## COMPUTATION OF BASELINE DIFFERENCE SCORE ########################

In [72]:
collection = connectToNBI("bridges","mongodb://research:superSMART1%3A%3A@ist177a-mongo.ist.unomaha.edu/admin")

survey_records = getSurveyRecords(['31'],[1992,1993],collection)

survey_records, before, after = filterSurveyData(survey_records)

survey_records['Material'] = retMaterialNames(survey_records['Structure Type'])

survey_records['Deck Protection'] = retDeckProctectionNames(survey_records['Type of Wearing Surface'])

survey_records['Age'] = getAges(survey_records['year'], survey_records['yearBuilt'])

survey_records['ADT Type'] = categorizeBridgesByADT(survey_records['averageDailyTraffic']) 

df_precp = pd.read_csv('new_climate_data.csv')

df_precp.drop('Unnamed: 0', axis = 1, inplace = True)

survey_records['FIPS'] = correctFips(survey_records['stateCode'], survey_records['countyCode'])

survey_records['County '] = survey_records['FIPS'].map(getDict(df_precp['FIPS'], df_precp['County']))

survey_records['Avg. Daily Precipitation (mm)'] = survey_records['FIPS'].map(getDict(df_precp['County Code'], df_precp['Avg Daily Precipitation (mm)']))

survey_timeseries = createTimeseries(survey_records)

profile = createProfile(survey_timeseries)

survey_timeseries = splitSurveyRecords(survey_timeseries, profile)

survey_timeseries_split_structureNumbers = splitStructureNumbers(survey_timeseries)

survey_timeseries = combinedStructureNumberWithRecords(survey_timeseries_split_structureNumbers, survey_timeseries)

survey_timeseries = pd.DataFrame(survey_timerseries,  columns=['Structure Number', 'Age', 'ADT Type', 'Superstructure'])

age_flat_list = survey_timeseries_df['Age'].tolist()

superstructure_flat_list = survey_timeseries_df['Superstructure'].tolist()

slopes = computeSlope(age_flat_list,superstructure_flat_list)

survey_timeseries_df['Deterioration Score'] = slopes

############################## COMPUTATION OF BASELINE DIFFERENCE SCORE ##################################
list_of_avgs, dict_of_avgs, counts_of_aba = getListOfAvgs(age_flat_list, superstructure_flat_list)

baselineScores = computeBaselineScore(age_flat_list, superstructure_flat_list, dict_of_avgs)

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


In [339]:
def getListOfAvgs(AgeFlatList, subsRatingsFlatList):
    """ returns average condition rating of the bridges at a age"""
    sum_by_age = {}
    counts_of_aba = {}
    
    for i in range(0,150,1):
        sum_by_age[i] = 0
        counts_of_aba[i] = 0


    for i,j in zip(subsRatingsFlatList,AgeFlatList):
        for rating, age in zip(i,j):
            #print(rating, age)
            try:
                counts_of_aba[age]= counts_of_aba[age] + 1
                sum_by_age[age] = sum_by_age[int(age)] + int(rating)
            except:
                pass

    list_of_avgs = [sums/count for sums, count in zip(sum_by_age.values(), counts_of_aba.values()) if count !=0 ]
    ages = [age for age in range(1,101,1)]
    dict_of_avgs = {age:avg for age, avg in zip(ages,list_of_avgs)}
    return list_of_avgs, dict_of_avgs, counts_of_aba

In [380]:
list_of_avgs, dict_of_avgs, counts_of_aba = getListOfAvgs(age_flat_list, superstructure_flat_list)

In [388]:
baselineScores = computeBaselineScore(age_flat_list, superstructure_flat_list, dict_of_avgs)

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


In [387]:
def computeBaselineScore(ages, condition_ratings, dict_of_avgs):
    """Computation of Baseline Score"""
    scores_temp = []
    for ratings, age in zip(condition_ratings, ages):
        temp_list = []
        for rating, a in zip(ratings,age):
            try:
                temp_list.append(int(rating) -  dict_of_avgs[a])
            except:
                pass
        scores_temp.append(temp_list)
    scores = [np.mean(score) for score in scores_temp]
    return scores


# west_df['Method3 Score'] = spc_west
# west_df = west_df.dropna()
# standardized_method3_score = preprocessing.scale(west_df['Method3 Score'])
# # west_df['Scaled Slope Score'] = standardized_slope_score
# west_df['Scaled Method3 Score'] = standardized_method3_score

In [396]:
survey_timeseries_df['Deterioration Score'] = slopes
survey_timeseries_df['Baseline Difference Score'] = baselineScores

In [398]:
standardized_method3_score = preprocessing.scale(west_df['Method3 Score'])
survey_timeseries_df

Unnamed: 0,Structure Number,Age,ADT Type,Superstructure,Deterioration Score,Baseline Difference Score
0,C000100305_1,"[57, 58]","[Very Light, Very Light]","[7, 7]",0.0,0.990234
1,C000100305P_1,"[57, 58]","[Very Light, Very Light]","[7, 7]",0.0,0.990234
2,C000100405_1,"[67, 68]","[Very Light, Very Light]","[7, 7]",0.0,1.142500
3,C000100505P_1,"[18, 19]","[Very Light, Very Light]","[7, 7]",0.0,-0.453455
4,C000100905_1,"[30, 31]","[Light, Light]","[7, 7]",0.0,-0.073556
5,C000100910_1,"[24, 25]","[Light, Light]","[8, 8]",0.0,0.906511
6,C000101004_1,"[7, 8]","[Light, Light]","[9, 9]",0.0,0.559512
7,C000101005_1,"[5, 6]","[Light, Light]","[9, 9]",0.0,0.386703
8,C000101010_1,"[6, 7]","[Very Light, Very Light]","[9, 9]",0.0,0.468825
9,C000101105_1,"[57, 58]","[Very Light, Very Light]","[7, 7]",0.0,0.990234
