In [29]:
## Data Preparation File
## Group 14

In [30]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [31]:
# Load in data files
# load in road file
roads = pd.read_csv('_roads3.csv')
# load in bridge file
bridges = pd.read_excel('BMMS_overview.xlsx')

In [32]:
# Create list of all roadnames
# from road file
road_list = list(roads.road.unique())
road_list.sort()

In [33]:
# EDIT THIS LIST OF ROADS IF YOU DON'T WANT TO LOAD IN ALL THE ROAD FILES!
# road_list = road_list[1:3]
# road_list

In [34]:
# Prep road file
def prep_road(roadname):
# road data
    data_fr = roads[roads['road'] == roadname]
    data_fr = data_fr[['road','lrp','lat','lon','chainage']] # subset desired columns
    data_fr["RLRPN"] = data_fr.road+data_fr.lrp
    data_fr.columns = ['Road','LRPName','Lat','Lon','Chainage',"RLRPN"]
    return data_fr

In [35]:
# Prep bridge file
def prep_bridge(roadname):
# bridge data
    data_fb = bridges[bridges['road'] == roadname]
    if len(data_fb) > 0:
        data_fb = data_fb.dropna(subset =['width']) # drop duplicates
        data_fb = data_fb[['road','LRPName','name','chainage','length','condition','lat','lon']] # subset columns
        data_fb["RLRPN"] = data_fb.road+data_fb.LRPName
        data_fb.columns = ['Road','LRPName','Description','Chainage','Length','Cat','Lat','Lon',"RLRPN"]
        data_fb = data_fb.sort_values(by = ['Chainage','RLRPN'])
        data_fb = data_fb.reset_index(drop = True)
        return data_fb
    else:
        data_fb = data_fb[['road','LRPName','name','chainage','length','condition','lat','lon']] # subset columns
        data_fb["RLRPN"] = data_fb.road+data_fb.LRPName
        data_fb.columns = ['Road','LRPName','Description','Chainage','Length','Cat','Lat','Lon',"RLRPN"]
        return data_fb

In [36]:
# Clean duplicate L/R briges before merge
# where LRPName is the same, compare Cat and drop the lower one (if Cat same, drop second one)
def clean_lr(data_fb):
    if len(data_fb) > 0:
        for i in range(len(data_fb)):
            if i < len(data_fb)-1:
                if data_fb.RLRPN.iloc[i,] == data_fb.RLRPN.iloc[i+1,]:
#                     print("same LRP")
                    if data_fb.Cat.iloc[i,] == data_fb.Cat.iloc[i+1,]:
#                         print("same Cat dropped second entry by default")
                        data_fb.drop(data_fb.index[i+1], inplace = True)
                    elif data_fb.Cat.iloc[i,] < data_fb.Cat.iloc[i+1,]:
#                         print("first Cat greater dropped second entry")
                        data_fb.drop(data_fb.index[i+1], inplace = True)
                    elif data_fb.Cat.iloc[i,] > data_fb.Cat.iloc[i+1,]:
#                         print("second Cat greater dropped first entry")
                        data_fb.drop(data_fb.index[i], inplace = True)
                    else:
                        pass
                else:
                    pass
            else:
                pass
        return data_fb
    else:
        return data_fb

In [37]:
# Join road and bridge files
def join_roadbridge(data_fb, data_fr):
    data = data_fr.append(data_fb, ignore_index = True, sort = False)
    data = data.sort_values(by = ['RLRPN']) 
    data = data.reset_index(drop = True)
    return data

In [38]:
# Clean duplicate LRP Numbers between road and bridge files
# there are cases where within the same km there is both a bridge and a non-bridge LRP, drop the non bridge ones
def bridge_priority(data):
    for i in range(len(data)):
        if i < len(data)-1:
            if data.RLRPN.iloc[i,] == data.RLRPN.iloc[i+1,]:
#                 print("same LRP")
                if data.Description.iloc[i,] == "":
#                     print("des null, is road point, drop it")
                    data.drop(data.index[i], inplace = True)
                elif data.Description.iloc[i,] != "":
#                     print("des exists, is bridge point, drop other one")
                    data.drop(data.index[i+1], inplace = True)
                else:
                    pass
            else:
                pass
        else:
            pass
    return data

In [39]:
# Set up columns for simio
def simio_columns(data):
#     Set up Node Name column
    data = data.sort_values(by = ['Chainage','RLRPN'])
    data = data.reset_index(drop = True)
    data["Next Node"] = data.RLRPN.shift(-1)
    data.loc[data.RLRPN.str.contains("LRPE", case = False), "Next Node"] = "PreRome"
    
#     Set up Add On Process column
    data.loc[data.Length.notnull(), "Add On Process"] = "BridgesDelay"
    data.loc[data.Length.isnull(), "Add On Process"] = "BlankProcess"
    
#     Replace NAN with 0
    data = data.fillna(0)
    return data

In [40]:
# Save result
def save_for_simio(data_out):
    data_out.to_csv("data_clean_all.csv", sep=',')

In [41]:
# Main function -- call this
def do_it():
    data_out = pd.DataFrame()
    for roadname in road_list:
        data_fr = prep_road(roadname)
        data_fb = prep_bridge(roadname)
        data_fb = clean_lr(data_fb)
        data = join_roadbridge(data_fb, data_fr)
        data = bridge_priority(data)
        data = simio_columns(data)
        data_out = data_out.append(data, ignore_index = True)
    save_for_simio(data_out)

In [42]:
do_it()