In [64]:
# import libraries
import pandas as pd
import html5lib
import matplotlib.pyplot as plt
import numpy as np

#define the relative values of the trucks according to economic value equation
Heavy_value = 3
Medium_value = 2
Small_value = 1

In [55]:
#function that takes the roadname as an argument and returns a panda dataframe with the traffic data for that road
def htm_import(roadname):
        
        #make a temporary dataframe that will be returned by the function
        df_current_road = pd.DataFrame()
        
        #get filename and open the .htm file
        filename = "RMMS/{}.traffic.htm".format(roadname)
        f =  open(filename, "rb")
        
        #find all the dataframes that contain the string of the roadname
        list_road = pd.read_html(f, skiprows = {0,0,5},header  = 1, match = roadname) #the third data frame has 5 skip rows
        df_current_road = list_road[2] #third table in the file is the data frame with the traffic data
        f.close()

        #subset to remove all columns after the 25th column. 
        #The 26th column is a repeat of total AADT data in 25th column and after that there are a number of empty columns 
        #as a result of parsing from .htm that should also be removed
        column_remove =list(df_current_road.columns.values)[25:]
        df_current_road = df_current_road.drop(labels=column_remove, axis = 1)

        #rename the columns
        df_current_road.columns = ["Link no","Link Name","Start LRP","Start Offset","Start Chainage","End LRP","End Offset","End Chainage","Link Length", "Heavy Truck","Medium Truck","Small Truck","Large Bus","Medium Bus","Micro Bus","Utility","Car","Auto Rickshaw","Motor Cycle","Bi-Cycle","Cycle Rickshaw","Cart","Motorized Total","Non Motorized Total","Total Traffic"]       

        #include the roadname in the dataframe
        df_current_road['Road name'] = roadname
        return(df_current_road)



In [85]:
#involves calculating the economic value of the traffic on each 'link', and processing duplicates for L and R on a link.
#the average vaule of L and R traffc is used, and one of them is removed
def process_traffic(df):
    end_index = len(df)
    drop_indices = [] #list containing indices of duplictates to be removed
    for i in range(end_index):
        
        #calculate the "economic value of traffic" according to formula
        df.iloc[i, df.columns.get_loc("Economic Traffic")] = df.iloc[i]['Heavy Truck']*Heavy_value+df.iloc[i]['Medium Truck']*Medium_value + df.iloc[i]['Small Truck']*Small_value


        #EXTEND THIS CONDITION TO MAKE IT MORE ACCURATE (MAYBE NOT NECESSARY): same chainage/name etc also
        if 'L' in df.iloc[i]['Link no'] and 'R' in df.iloc[i+1]['Link no']:
            
            #use average of all the traffic data from L and R 
            df.iloc[i, df.columns.get_loc('Heavy Truck')] = (df.iloc[i]['Heavy Truck']+df.iloc[i+1]['Heavy Truck'])/2
            df.iloc[i, df.columns.get_loc('Medium Truck')] = (df.iloc[i]['Medium Truck']+df.iloc[i+1]['Medium Truck'])/2
            df.iloc[i, df.columns.get_loc('Small Truck')] = (df.iloc[i]['Small Truck']+df.iloc[i+1]['Small Truck'])/2
            #df.iloc[i, df.columns.get_loc('Economic Traffic')] = (df.iloc[i]['Economic Traffic']+df.iloc[i+1]['Economic Traffic'])/2
            
            df.iloc[i, df.columns.get_loc('Large Bus')] = (df.iloc[i]['Large Bus']+df.iloc[i+1]['Large Bus'])/2
            df.iloc[i, df.columns.get_loc('Medium Bus')] = (df.iloc[i]['Medium Bus']+df.iloc[i+1]['Medium Bus'])/2
            df.iloc[i, df.columns.get_loc('Micro Bus')] = (df.iloc[i]['Micro Bus']+df.iloc[i+1]['Micro Bus'])/2
            df.iloc[i, df.columns.get_loc('Utility')] = (df.iloc[i]['Utility']+df.iloc[i+1]['Utility'])/2
            df.iloc[i, df.columns.get_loc('Car')] = (df.iloc[i]['Car']+df.iloc[i+1]['Car'])/2
            df.iloc[i, df.columns.get_loc('Auto Rickshaw')] = (df.iloc[i]['Auto Rickshaw']+df.iloc[i+1]['Auto Rickshaw'])/2
            df.iloc[i, df.columns.get_loc('Motor Cycle')] = (df.iloc[i]['Motor Cycle']+df.iloc[i+1]['Motor Cycle'])/2
            df.iloc[i, df.columns.get_loc('Bi-Cycle')] = (df.iloc[i]['Bi-Cycle']+df.iloc[i+1]['Bi-Cycle'])/2        
            df.iloc[i, df.columns.get_loc('Cycle Rickshaw')] = (df.iloc[i]['Cycle Rickshaw']+df.iloc[i+1]['Cycle Rickshaw'])/2
            df.iloc[i, df.columns.get_loc('Cart')] = (df.iloc[i]['Cart']+df.iloc[i+1]['Cart'])/2
            df.iloc[i, df.columns.get_loc('Motorized Total')] = (df.iloc[i]['Motorized Total']+df.iloc[i+1]['Motorized Total'])/2        
            df.iloc[i, df.columns.get_loc('Non Motorized Total')] = (df.iloc[i]['Non Motorized Total']+df.iloc[i+1]['Non Motorized Total'])/2        
            df.iloc[i, df.columns.get_loc('Total Traffic')] = (df.iloc[i]['Total Traffic']+df.iloc[i+1]['Total Traffic'])/2        

            #remove the duplicate
            drop_indices.append(i+1)

            #rename the Link no (e.g. N1-1R is removed and N1-1L is renamed N1-1)
            df.iloc[i, df.columns.get_loc('Link no')] = df.iloc[i]['Link no'].replace('L','')
    
    for i in range(end_index):      
        #calculate the "economic value of traffic" according to formula
        df.iloc[i, df.columns.get_loc("Economic Traffic")] = df.iloc[i]['Heavy Truck']*Heavy_value+df.iloc[i]['Medium Truck']*Medium_value + df.iloc[i]['Small Truck']*Small_value

    df = df.drop(labels = drop_indices)

    return(df)
    #question: do we also want to rename any single R or L link numbers? Or is this okay? I think it is more informative this way...?

In [90]:
#create the output data frame with corresponding columns for output
df_traffic_allroads = pd.DataFrame(columns =["Road name","Link no","Link Name","Start LRP","Start Offset","Start Chainage","End LRP","End Offset","End Chainage","Link Length", "Economic Traffic","No Lanes","Heavy Truck","Medium Truck","Small Truck","Large Bus","Medium Bus","Micro Bus","Utility","Car","Auto Rickshaw","Motor Cycle","Bi-Cycle","Cycle Rickshaw","Cart","Motorized Total","Non Motorized Total","Total Traffic"])


#get list of all the roads 

roadnames = pd.read_csv('_roadnames_list.csv') # need to have roadnames list in same folder path 
roadnames_list = list(roadnames.columns.unique())
#
#
#
#JUST FOR TESTING N1 !!!!
#NB: works for N1, but need to clean NAs from other small roads before running on entire road list
roadnames_list = ['N1']

#iterate through each road and append it to the main dataframe using defined function htm_import
for roadname in roadnames_list:
    df_traffic_allroads = df_traffic_allroads.append(htm_import(roadname), ignore_index = True, sort = False)

    


df_traffic_allroads = process_traffic(df_traffic_allroads)


In [93]:
#save output file
df_traffic_allroads.to_csv('data_traffic.csv',sep= ',', header=True)


In [89]:
#testing - for economic values of trucks
df_traffic_allroads = pd.DataFrame(columns =["Road name","Link no","Link Name","Start LRP","Start Offset","Start Chainage","End LRP","End Offset","End Chainage","Link Length", "Economic Traffic","No Lanes","Heavy Truck","Medium Truck","Small Truck","Large Bus","Medium Bus","Micro Bus","Utility","Car","Auto Rickshaw","Motor Cycle","Bi-Cycle","Cycle Rickshaw","Cart","Motorized Total","Non Motorized Total","Total Traffic"])


#get list of all the roads 

roadnames = pd.read_csv('_roadnames_list.csv') # need to have roadnames list in same folder path 
roadnames_list = list(roadnames.columns.unique())
roadnames_list = ['N1']
#

df_traffic_allroads.append(htm_import(roadname), ignore_index = True, sort = False)

df_traffic_allroads

Unnamed: 0,Road name,Link no,Link Name,Start LRP,Start Offset,Start Chainage,End LRP,End Offset,End Chainage,Link Length,...,Utility,Car,Auto Rickshaw,Motor Cycle,Bi-Cycle,Cycle Rickshaw,Cart,Motorized Total,Non Motorized Total,Total Traffic


In [75]:
roadname = 'N1'
link_chainage_s = 0
link_chainage_e = 1
filename = "RMMS/{}.widths.processed.txt".format(roadname)
f =  open(filename, "rb")
df = pd.read_csv(f, sep='\t', lineterminator='\n')

df


Unnamed: 0,roadNo,roadId,startChainage,endChainage,width,nrLanes
0,N1,1585,0.000,0.130,29.7,8
1,N1,1585,0.130,0.400,14.0,4
2,N1,1585,0.400,1.050,28.3,8
3,N1,1585,1.050,5.100,16.5,5
4,N1,1585,5.100,6.200,16.2,5
5,N1,1585,6.200,6.890,16.0,5
6,N1,1585,6.890,7.330,26.0,8
7,N1,1585,7.330,7.900,28.0,8
8,N1,1585,7.900,8.900,14.9,4
9,N1,1585,8.900,9.800,15.0,4


In [92]:
df_traffic_allroads['Economic Traffic']


0      14940.5
2      14037.0
3       7632.5
5       7813.5
7      13224.0
9      13224.0
11     13224.0
13     12272.0
15     12272.0
17     12272.0
19     12272.0
21     11663.0
23     11663.0
25     11663.0
27     11663.0
29     11663.0
31     12545.5
33     12545.5
35     12545.5
37     12545.5
39     13054.5
41     13054.5
43     13054.5
45     13054.5
47      9433.0
49      9433.0
51     12843.5
53     12843.5
55     14183.0
57     12435.0
        ...   
74      3081.0
75      3081.0
76      3039.0
77      3039.0
78      3316.0
79      3316.0
80      3316.0
81      2878.0
82      2878.0
83      2878.0
84      2878.0
85      6069.0
86      6069.0
87      6069.0
88      6069.0
89      1929.0
90      1929.0
91      1929.0
92      1929.0
93      2684.0
94      1454.0
95      1454.0
96      1454.0
97      1454.0
98      1454.0
99      1119.0
100     1119.0
101     1119.0
102     1119.0
103     1119.0
Name: Economic Traffic, Length: 69, dtype: float64

In [None]:


#find the number of lanes for each road

#match according to chainage


