In [94]:
# import libraries
import pandas as pd
import html5lib
import matplotlib.pyplot as plt
import numpy as np
import logging

#define the relative values of the trucks according to economic value equation
Heavy_value = 30.0
Medium_value = 25.0
Small_value = 16.0

In [95]:
#function that takes the roadname as an argument and returns a panda dataframe with the traffic data for that road
def htm_import(roadname):
        
        #make a temporary dataframe that will be returned by the function
        df_current_road = pd.DataFrame()
        
        #get filename and open the .htm file
        filename = "RMMS/{}.traffic.htm".format(roadname)
        f =  open(filename, "rb")
        
        #find all the dataframes that contain the string of the roadname
        list_road = pd.read_html(f, skiprows = {0,0,5},header  = 1, match = roadname) #the third data frame has 5 skip rows
        df_current_road = list_road[2] #third table in the file is the data frame with the traffic data
        f.close()

        #subset to remove all columns after the 25th column. 
        #The 26th column is a repeat of total AADT data in 25th column and after that there are a number of empty columns 
        #as a result of parsing from .htm that should also be removed
        column_remove =list(df_current_road.columns.values)[25:]
        df_current_road = df_current_road.drop(labels=column_remove, axis = 1)

        #rename the columns
        df_current_road.columns = ["Link no","Link Name","Start LRP","Start Offset","Start Chainage","End LRP","End Offset","End Chainage","Link Length", "Heavy Truck","Medium Truck","Small Truck","Large Bus","Medium Bus","Micro Bus","Utility","Car","Auto Rickshaw","Motor Cycle","Bi-Cycle","Cycle Rickshaw","Cart","Motorized Total","Non Motorized Total","Total Traffic"]       

        #include the roadname in the dataframe
        df_current_road['Road name'] = roadname
        return(df_current_road)



In [96]:
#involves calculating the economic value of the traffic on each 'link', and processing duplicates for L and R on a link.
#the average vaule of L and R traffc is used, and one of them is removed
#also removes links for which there is no data at all, and returns a list of the links that have been removed
def process_traffic(df):

    end_index = len(df)
    drop_indices = [] #list containing indices of duplictates to be removed
    for i in range(end_index):
        
        #check for missing data in the link
        if df.iloc[i]['Heavy Truck'] == 'NS' :
            #all the rows with NS data (i.e missing data) have NS values in this column
            #remove the 'bad data row' - but maybe change to NA instead?
            print(i)
            drop_indices.append(i)
            #report deletion to log
            logging.warning('%s link deleted due to no data',df.iloc[i]['Link no'] )
        elif  i != (end_index-1):
            if df.iloc[i+1]['Heavy Truck'] != 'NS':
     
                #no missing values so continue as normal...

                #calculate the "economic value of traffic" according to formula
                #df.iloc[i, df.columns.get_loc("Economic Traffic")] = df.iloc[i]['Heavy Truck']*Heavy_value+df.iloc[i]['Medium Truck']*Medium_value + df.iloc[i]['Small Truck']*Small_value


                #EXTEND THIS CONDITION TO MAKE IT MORE ACCURATE (MAYBE NOT NECESSARY): same chainage/name etc also
                if 'L' in df.iloc[i]['Link no'] and 'R' in df.iloc[i+1]['Link no']:

                    if not (df.iloc[i+1]['Heavy Truck'] == 'NS'): #cannot average with next row's value if the next row is corrupt data


                                #use average of all the traffic data from L and R 
                                df.iloc[i, df.columns.get_loc('Heavy Truck')] = (float(df.iloc[i]['Heavy Truck'])+float(df.iloc[i+1]['Heavy Truck']))/2
                                df.iloc[i, df.columns.get_loc('Medium Truck')] = (float(df.iloc[i]['Medium Truck'])+float(df.iloc[i+1]['Medium Truck']))/2
                                df.iloc[i, df.columns.get_loc('Small Truck')] = (float(df.iloc[i]['Small Truck'])+float(df.iloc[i+1]['Small Truck']))/2

                                df.iloc[i, df.columns.get_loc('Large Bus')] = (float(df.iloc[i]['Large Bus'])+float(df.iloc[i+1]['Large Bus']))/2
                                df.iloc[i, df.columns.get_loc('Medium Bus')] = (float(df.iloc[i]['Medium Bus'])+float(df.iloc[i+1]['Medium Bus']))/2
                                df.iloc[i, df.columns.get_loc('Micro Bus')] = (float(df.iloc[i]['Micro Bus'])+float(df.iloc[i+1]['Micro Bus']))/2

                                df.iloc[i, df.columns.get_loc('Utility')] = (float(df.iloc[i]['Utility'])+float(df.iloc[i+1]['Utility']))/2

                                df.iloc[i, df.columns.get_loc('Car')] = (float(df.iloc[i]['Car'])+float(df.iloc[i+1]['Car']))/2
                                df.iloc[i, df.columns.get_loc('Auto Rickshaw')] = (float(df.iloc[i]['Auto Rickshaw'])+float(df.iloc[i+1]['Auto Rickshaw']))/2
                                df.iloc[i, df.columns.get_loc('Motor Cycle')] = (float(df.iloc[i]['Motor Cycle'])+float(df.iloc[i+1]['Motor Cycle']))/2
                                df.iloc[i, df.columns.get_loc('Bi-Cycle')] = (float(df.iloc[i]['Bi-Cycle'])+float(df.iloc[i+1]['Bi-Cycle']))/2        
                                df.iloc[i, df.columns.get_loc('Cycle Rickshaw')] = (float(df.iloc[i]['Cycle Rickshaw'])+float(df.iloc[i+1]['Cycle Rickshaw']))/2
                                df.iloc[i, df.columns.get_loc('Cart')] = (float(df.iloc[i]['Cart'])+float(df.iloc[i+1]['Cart']))/2

                                df.iloc[i, df.columns.get_loc('Motorized Total')] = (float(df.iloc[i]['Motorized Total'])+float(df.iloc[i+1]['Motorized Total']))/2        
                                df.iloc[i, df.columns.get_loc('Non Motorized Total')] = (float(df.iloc[i]['Non Motorized Total'])+float(df.iloc[i+1]['Non Motorized Total']))/2        
                                df.iloc[i, df.columns.get_loc('Total Traffic')] = (float(df.iloc[i]['Total Traffic'])+float(df.iloc[i+1]['Total Traffic']))/2        

                                #remove the duplicate
                                drop_indices.append(i+1)
                                logging.warning('%s link deleted due to duplicate',df.iloc[i]['Link no'] )

                                #rename the Link no (e.g. N1-1R is removed and N1-1L is renamed N1-1)
                                df.iloc[i, df.columns.get_loc('Link no')] = df.iloc[i]['Link no'].replace('L','')

    df.drop(labels = drop_indices, inplace = True)
    
    for i in range(len(df)):      
        #calculate the "economic value of traffic" according to formula
        df.iloc[i, df.columns.get_loc("Economic Traffic")] = float(df.iloc[i]['Heavy Truck'])*Heavy_value+float(df.iloc[i]['Medium Truck'])*Medium_value + float(df.iloc[i]['Small Truck'])*Small_value


    return(df)
    #question: do we also want to rename any single R or L link numbers? Or is this okay? I think it is more informative this way...?

In [97]:
#function to return the 'average number of lanes over a segment of road'...weighted by the length for which the road
#has that many lanes
#input arguments: start chainage of road segment (km), end chainage of road segment (km), segment length (km), data frame of lane/width dtaa for that road
#output arguments: 'average number of lanes over that segment of road'
def average_lanes (a,b,segment_length, df):
    weighted_lanes = 0.0 
    
    for i in range(len(df)):
        start = df.iloc[i]['startChainage']
        end = df.iloc[i]['endChainage']
        lanes = df.iloc[i]['nrLanes']
        if not((a > end) or (b < start)):
            
            if a <= start and b >= end:
                #entire part of the lane segment is on the road segment
                weighted_lanes += (end-start)*lanes

            elif a >= start and b>= end:
                #end part of the lane segment is on the road segment
                weighted_lanes += (end-a)*lanes

            elif (a < start) and (b < end):
                #beginning part of the lane segment is on the road segment
                weighted_lanes += (b-start)*lanes

            elif (a >= start) and (b<= end):
                #entire road segment is within a lane segment
                weighted_lanes += (b-a)*lanes

        #else no part of that lane segment is on the road segment
    if weighted_lanes == 0.0:
        return (-1) #no lane data found that matched, -1 is the indicator of this error
    
    
        #finally, return the number of lanes, averaged by length of the total lanes
    return (weighted_lanes/segment_length)

In [102]:
#MAIN BLOCK OF CODE, PART 1: Road Segments

#create the output data frame with corresponding columns for output
df_traffic_allroads = pd.DataFrame(columns =["Road name","Link no","Link Name","Start LRP","Start Offset","Start Chainage","End LRP","End Offset","End Chainage","Link Length", "Economic Traffic","No Lanes","Heavy Truck","Medium Truck","Small Truck","Large Bus","Medium Bus","Micro Bus","Utility","Car","Auto Rickshaw","Motor Cycle","Bi-Cycle","Cycle Rickshaw","Cart","Motorized Total","Non Motorized Total","Total Traffic"])


#get list of all the roads 
roadnames = pd.read_csv('_roadnames_list.csv') # need to have roadnames list in same folder path 
roadnames_list = list(roadnames.columns.unique())


#iterate through each road and append it to the main dataframe using defined function htm_import
for roadname in roadnames_list:
    df_traffic_allroads = df_traffic_allroads.append(htm_import(roadname), ignore_index = True, sort = False)


#call function to process the data from htm: ie. remove duplicates, and find the criticality of each segment
df_traffic_allroads = process_traffic(df_traffic_allroads)

#next section of code (for loop) calls the function to calculate the average number of lanes in each road segment
#iterate through each road segment on the database
road_name_previous = 'Na' #initialise for first comparison
for i in range(len(df_traffic_allroads)):
    
    road_name_current = df_traffic_allroads.iloc[i]['Road name']
    #check if segmnet of road is on a different road
    if (road_name_current != road_name_previous):
        #open new file
        filename = "RMMS/{}.widths.processed.txt".format(road_name_current)
        f =  open(filename, "rb")
        df_lanes = pd.read_csv(f, sep='\t', lineterminator='\n')
        df_lanes.columns = ['roadNo', 'roadId', 'startChainage', 'endChainage', 'width','nrLanes']

    a = df_traffic_allroads.iloc[i]['Start Chainage']
    b = df_traffic_allroads.iloc[i]['End Chainage']
    length = df_traffic_allroads.iloc[i]['Link Length']
    
    #calculate average value of lanes using function and update the data frame
    df_traffic_allroads.iloc[i, df_traffic_allroads.columns.get_loc('No Lanes')] = average_lanes(a,b,length,df_lanes)
    
    #pass into memory for next comparison
    road_name_previous = road_name_current

#call function that converts all traffic modes into traffic density for that mode
df = df_traffic_allroads
for i in range(len(df)):
    lanes = df.iloc[i]['No Lanes']

    if  lanes == -1 :
    #mark as NA using numpy.nan
        df.iloc[i, df.columns.get_loc('Heavy Truck')] = np.nan
        df.iloc[i, df.columns.get_loc('Medium Truck')] = np.nan
        df.iloc[i, df.columns.get_loc('Small Truck')] = np.nan
                    
        df.iloc[i, df.columns.get_loc('Large Bus')] = np.nan
        df.iloc[i, df.columns.get_loc('Medium Bus')] = np.nan
        df.iloc[i, df.columns.get_loc('Micro Bus')] = np.nan
                    
        df.iloc[i, df.columns.get_loc('Utility')] = np.nan
                    
        df.iloc[i, df.columns.get_loc('Car')] = np.nan
        df.iloc[i, df.columns.get_loc('Auto Rickshaw')] =np.nan
        df.iloc[i, df.columns.get_loc('Motor Cycle')] = np.nan
        df.iloc[i, df.columns.get_loc('Bi-Cycle')] = np.nan    
        df.iloc[i, df.columns.get_loc('Cycle Rickshaw')] = np.nan
        df.iloc[i, df.columns.get_loc('Cart')] = np.nan
                    
        df.iloc[i, df.columns.get_loc('Motorized Total')] = np.nan     
        df.iloc[i, df.columns.get_loc('Non Motorized Total')] = np.nan      
        df.iloc[i, df.columns.get_loc('Total Traffic')] = np.nan
                           
    else:

        #convert from traffic to traffic density
        df.iloc[i, df.columns.get_loc('Heavy Truck')] = float((df.iloc[i]['Heavy Truck']))/lanes
        df.iloc[i, df.columns.get_loc('Medium Truck')] = float((df.iloc[i]['Medium Truck']))/lanes
        df.iloc[i, df.columns.get_loc('Small Truck')] = float((df.iloc[i]['Small Truck']))/lanes
                    
        df.iloc[i, df.columns.get_loc('Large Bus')] = float((df.iloc[i]['Large Bus']))/lanes
        df.iloc[i, df.columns.get_loc('Medium Bus')] = float((df.iloc[i]['Medium Bus']))/lanes
        df.iloc[i, df.columns.get_loc('Micro Bus')] = float((df.iloc[i]['Micro Bus']))/lanes
                    
        df.iloc[i, df.columns.get_loc('Utility')] = float((df.iloc[i]['Utility']))/lanes
                    
        df.iloc[i, df.columns.get_loc('Car')] = float((df.iloc[i]['Car']))/lanes
        df.iloc[i, df.columns.get_loc('Auto Rickshaw')] = float((df.iloc[i]['Auto Rickshaw']))/lanes
        df.iloc[i, df.columns.get_loc('Motor Cycle')] = float((df.iloc[i]['Motor Cycle']))/lanes
        df.iloc[i, df.columns.get_loc('Bi-Cycle')] = float((df.iloc[i]['Bi-Cycle']))/lanes       
        df.iloc[i, df.columns.get_loc('Cycle Rickshaw')] = float((df.iloc[i]['Cycle Rickshaw']))/lanes
        df.iloc[i, df.columns.get_loc('Cart')] = float((df.iloc[i]['Cart']))/lanes
                    
        df.iloc[i, df.columns.get_loc('Motorized Total')] = float((df.iloc[i]['Motorized Total']))/lanes     
        df.iloc[i, df.columns.get_loc('Non Motorized Total')] = float((df.iloc[i]['Non Motorized Total']))/lanes      
        df.iloc[i, df.columns.get_loc('Total Traffic')] = float((df.iloc[i]['Total Traffic']))/lanes        

   
    
#save output file
df.to_csv('data_traffic_road_segments.csv',sep= ',', header=True)





152
153
154
155
156
157
158
159




278




321




528




847
861




947
953
954
956
960
961
967
970
971
972
973
974
975
976
977
978
979
983
986
989
991
995
997
1000
1001
1003
1007
1008
1010




1017
1024
1028
1032
1035
1044
1047
1054
1055
1056
1057
1058
1060
1062
1075
1077
1078
1083
1084
1085
1088
1092
1094
1096
1098
1103
1104
1105
1107
1108




1109
1111
1117
1124
1125
1126
1145
1152
1161
1167
1172
1174
1176
1177
1184
1186
1189
1196
1202
1205
1209
1211
1213
1215
1222
1224
1225
1227
1229
1241
1246




1248
1251
1257
1262
1264
1267
1269
1272
1276
1280
1282
1288
1289
1290
1297
1298
1302
1313
1314
1315
1317
1320
1324
1331
1332
1334
1335
1336
1339




1348
1353
1356
1360
1363
1364
1366
1368
1379
1389
1394
1397
1398
1400
1415
1417
1422
1424
1425
1426
1430
1433
1435
1437
1439
1441
1443
1448
1449




1454
1456
1457
1458
1463
1464
1470
1474
1475
1476
1480
1482
1484
1488
1497
1501
1502
1511
1513
1515
1517
1520
1531
1535
1536
1539
1542
1545
1548
1555
1558




1562
1565
1566
1567
1568
1572
1574
1580
1584
1585
1590
1591
1596
1597
1605
1607
1609
1610
1616
1619
1623
1625
1628
1632
1642
1643
1647
1648
1650
1652
1653




1656
1657
1658
1659
1661
1663
1665
1666
1669
1672
1673
1674
1677
1682
1683
1689
1692
1702
1704
1707
1714
1721
1725
1726
1727
1730




1731
1733
1734
1735
1741
1746
1749
1761
1762
1772
1780
1783
1785
1787
1790
1793
1796
1802
1807
1810
1816
1823
1841
1847
1849
1861
1870
1873




1874
1875


In [89]:
df


Unnamed: 0,Road name,Link no,Link Name,Start LRP,Start Offset,Start Chainage,End LRP,End Offset,End Chainage,Link Length,...,Utility,Car,Auto Rickshaw,Motor Cycle,Bi-Cycle,Cycle Rickshaw,Cart,Motorized Total,Non Motorized Total,Total Traffic
0,N5,N5-1,Mirpur Bridge-Turag (Int.with Z5069) (Left),LRPS,0,0.000,LRP002,635,2.590,2.590,...,38.5,645,227.5,328.5,10.5,41.75,0,3977.75,52.25,4030
2,N5,N5-2,Turag (Int.with Z5069)-Hamayetpur (Int.with R5...,LRP002,635,2.590,LRP007,20,6.945,4.355,...,38.5,645,227.5,328.5,10.5,41.75,0,3977.75,52.25,4030
4,N5,N5-3,Hamayetpur(Int.with R504)-Nabinagar(Int.with N...,LRP007,20,6.945,LRP020,2478,22.343,15.398,...,80.6536,652.806,289.986,211.41,34.7055,72.4661,0,3891.66,107.172,3998.83
6,N5,N5-4,Nabinagar(Int.with N540)-Dulivita (Int.with R315),LRP020,2478,22.343,LRP027,160,27.075,4.732,...,79.1879,488.116,227.089,211.587,99.2991,72.9031,0,4645.27,172.202,4817.47
7,N5,N5-5,Dulivita(Int.with R315) -Kalampur Bus Stand (I...,LRP027,160,27.075,LRP033,265,33.180,6.105,...,91.9398,566.719,263.658,245.659,115.29,84.643,0,5393.32,199.933,5593.25
8,N5,N5-6,Kalampur Bus Stand(Int.with Z5061) -Golra (Int...,LRP033,265,33.180,LRP044,1583,45.498,12.318,...,78.0928,481.366,223.949,208.661,97.9259,71.8949,0,4581.03,169.821,4750.85
9,N5,N5-7,Golra(Int.with Z5063) -Manikganj (Int.with R504),LRP044,1583,45.498,LRP051,325,51.255,5.757,...,60.271,371.512,172.841,161.042,75.578,55.4876,0,3535.58,131.066,3666.65
10,N5,N5-8,Manikganj (Int.with R504) -Baniajuri (Int.with...,LRP051,325,51.255,LRP057,445,57.375,6.120,...,60.0405,370.091,172.18,160.426,75.2889,55.2754,0,3522.06,130.564,3652.62
11,N5,N5-9,Baniajuri (Int.with Z5064) -Barangail (Int.wit...,LRP057,445,57.375,LRP065,860,65.785,8.410,...,77.0849,475.153,221.058,205.968,96.662,70.967,0,4521.9,167.629,4689.53
12,N5,N5-10,Barangail(Int.with R506) -Utholi (Int.with N503),LRP065,860,65.785,LRP072,160,72.095,6.310,...,1.10831,0.738876,24.3829,29.555,38.4215,41.377,1.10831,69.8238,80.9069,150.731


In [103]:
#MAIN BLOCK OF CODE, PART 2: Summary per Road

#this section of code agregates the data from the road segments, to the total road (for each road)

df1 =df #road segment data frame (input), indexed with i
df2 = pd.DataFrame(columns =["Road name","Economic Traffic","No Lanes","Heavy Truck","Medium Truck","Small Truck","Large Bus","Medium Bus","Micro Bus","Utility","Car","Auto Rickshaw","Motor Cycle","Bi-Cycle","Cycle Rickshaw","Cart","Motorized Total","Non Motorized Total","Total Traffic"], index = range(1000))
df2 = df2.fillna(0) #same data frame aggregated to road level (output), indexed with j
j = 0
segment_length = 0
total_length = 0

road_name_previous = 'Na' #initialise for first comparison
for i in range(len(df1)):

#iterating through segments
    segment_length = df1.iloc[i]['Link Length'] #record segment length each time

    road_name_current = df1.iloc[i]['Road name']
    #check if segmnet of road is on a different road i.e. the next road in the df
    if (road_name_current != road_name_previous):
        #divide data by number of segments in the road to get average
        total_length = 0 #reset number of segments for the road
        j = j+1 #update to next row in road_summary dataframe for next road
        df2.iloc[j, df.columns.get_loc('Road name')] = road_name_current
        #set the road name for the row in road_summary
    else:
        total_length += segment_length
    if type(df1.iloc[i]['Economic Traffic'] ) != str:
        #sum the data from this segment to the data already in the summary file for that roads row
        df2.iloc[j, df2.columns.get_loc('No Lanes')] += (df1.iloc[i]['No Lanes'])*segment_length      
        df2.iloc[j, df2.columns.get_loc('Economic Traffic')] += (df1.iloc[i]['Economic Traffic']) *segment_length
        #not sure whether economic traffic should be averaged this way - check definitions with Emma !!


        df2.iloc[j, df2.columns.get_loc('Heavy Truck')] += (df1.iloc[i]['Heavy Truck'])*segment_length
        df2.iloc[j, df2.columns.get_loc('Medium Truck')] += (df1.iloc[i]['Medium Truck'])*segment_length
        df2.iloc[j, df2.columns.get_loc('Small Truck')] += (df1.iloc[i]['Small Truck'])*segment_length

        df2.iloc[j, df2.columns.get_loc('Large Bus')] += (df1.iloc[i]['Large Bus'])*segment_length
        df2.iloc[j, df2.columns.get_loc('Medium Bus')] += (df1.iloc[i]['Medium Bus'])*segment_length
        df2.iloc[j, df2.columns.get_loc('Micro Bus')] += (df1.iloc[i]['Micro Bus'])*segment_length

        df2.iloc[j, df2.columns.get_loc('Utility')] += (df1.iloc[i]['Utility'])*segment_length
                    
        df2.iloc[j, df2.columns.get_loc('Car')] += (df1.iloc[i]['Car'])*segment_length
        df2.iloc[j, df2.columns.get_loc('Auto Rickshaw')] += (df1.iloc[i]['Auto Rickshaw'])*segment_length
        df2.iloc[j, df2.columns.get_loc('Motor Cycle')] += (df1.iloc[i]['Motor Cycle'])*segment_length
        df2.iloc[j, df2.columns.get_loc('Bi-Cycle')] += (df1.iloc[i]['Bi-Cycle'])*segment_length      
        df2.iloc[j, df2.columns.get_loc('Cycle Rickshaw')] += (df1.iloc[i]['Cycle Rickshaw'])*segment_length
        df2.iloc[j, df2.columns.get_loc('Cart')] += (df1.iloc[i]['Cart'])*segment_length
                    
        df2.iloc[j, df2.columns.get_loc('Motorized Total')] += (df1.iloc[i]['Motorized Total'])*segment_length    
        df2.iloc[j, df2.columns.get_loc('Non Motorized Total')] += (df1.iloc[i]['Non Motorized Total'])*segment_length      
        df2.iloc[j, df2.columns.get_loc('Total Traffic')] += (df1.iloc[i]['Total Traffic'])*segment_length       

    
    #pass into memory for next comparison
    road_name_previous = road_name_current
    
    
#save output file
df2.to_csv('summary_data_traffic_per_road.csv',sep= ',', header=True)            

In [None]:
##Insert Ivar Data Prep things here - !!
##MAIN BLOCK OF CODE, PART 3: Calculate Vulnerability per Road Segment