In [6]:
import pandas as pd
import numpy as np

In [7]:
#Function to check monotony in datasets

def check_chainage_monotony(df):
    # convert the chainage column to numeric values, if value cannot be converted it becomes NaN
    df["chainage"] = pd.to_numeric(df["chainage"], errors="coerce")
    # create empty list to store True or False values indicating whether there's monotony within each road group
    report = []
    # loop through each group of rows grouped by "road"
    for road, group in df.groupby("road"):
        # reset index inside each group so iteration start from 0 (and drop old index column)
        group = group.reset_index(drop=True)
        # variable to keep track of previous chainage value
        prev_chainage = None
        # iterate through each row in the grouped DataFrame
        for i, row in group.iterrows():
            current_chainage = row["chainage"]

            # it it's the first row in the group, it will automatically be marked as monotone (True)
            if prev_chainage is None:
                report.append(True)
            else:
                # Check if current chainage is greater than or equal to the previous one
                report.append(current_chainage >= prev_chainage)
            # update chainage for next iteration
            prev_chainage = current_chainage

    # add the report as new column to original dataframe
    df["monotone"] = report

    not_monotone = df.groupby("road")["monotone"].all().eq(False).sum()
    # print results
    if not_monotone == 0:
        print("All roads in the dataset have chainage monotony")
    else:
        print(not_monotone, "roads break chainage monotony")
        road_monotone = df.groupby("road")["monotone"].all()
        broken_roads = road_monotone[road_monotone ==False].index
        print("Roads that break monotony:", broken_roads)

In [8]:
df_roads = pd.read_csv("../data/raw/Roads_InfoAboutEachLRP.csv")

check_chainage_monotony(df_roads)

All roads in the dataset have chainage monotony


In [9]:
df_bridges = pd.read_excel("../data/raw/BMMS_overview.xlsx")

check_chainage_monotony(df_bridges)

# the output is only one road with broken montony R750, 
# but seems this doesn't have coordinates nor much data, likely to be road
# in conclusion, both datasets 


507 roads break chainage monotony
Roads that break monotony: Index(['N1', 'N102', 'N104', 'N105', 'N106', 'N107', 'N108', 'N110', 'N2',
       'N204',
       ...
       'Z8716', 'Z8717', 'Z8803', 'Z8804', 'Z8806', 'Z8810', 'Z8814', 'Z8910',
       'Z8913', 'Z8915'],
      dtype='object', name='road', length=507)


In [26]:
# Observing the dataset, the roads seem to be all over the place
# N1 in initial rows but also afterwards
# Here's a function to group and sort them in order

def fix_chainage_monotony(df): 
    df = df.copy()
    # group frist by road and sort each road by chainage
    df_sorted = df.sort_values(by=["road", "chainage"]).reset_index(drop=True)
    return df_sorted

df_sorted_bridges = fix_chainage_monotony(df_bridges)
check_chainage_monotony(df_sorted_bridges)




1 roads break chainage monotony
Roads that break monotony: Index(['R750'], dtype='object', name='road')


In [34]:
# Check of road R750
roadR750 = df_sorted_bridges[df_sorted_bridges["road"] == "R750"]
roadR750.tail(30)

Unnamed: 0,road,km,type,LRPName,name,length,condition,structureNr,roadName,chainage,...,constructionYear,spans,zone,circle,division,sub-division,lat,lon,EstimatedLoc,monotone
7668,R750,5.765,Arch Masonry,LRP006a,.,1.9,C,103281,Jessore-Narail Road,5.765,...,1980.0,1.0,Khulna,Jessore,Jessore,Jessore-2,23.166583,89.2765,bcs1,True
7669,R750,6.135,RCC Girder Bridge,LRP006b,Daitala Bridge,41.6,C,104032,Jessore-Narail Road,6.135,...,1976.0,3.0,Khulna,Jessore,Jessore,Jessore-2,23.168056,89.279722,bcs1,True
7670,R750,8.932,Slab Culvert,LRP009a,.,3.8,B,104035,Jessore-Narail Road,8.932,...,1980.0,1.0,Khulna,Jessore,Jessore,Jessore-2,23.184361,89.299778,bcs1,True
7671,R750,10.016,Slab Culvert,LRP010a,.,3.8,C,104036,Jessore-Narail Road,10.016,...,1985.0,1.0,Khulna,Jessore,Jessore,Jessore-2,23.184444,89.310278,bcs1,True
7672,R750,11.78,Slab Culvert,LRP012a,.,4.2,C,104039,Jessore-Narail Road,11.78,...,1985.0,1.0,Khulna,Jessore,Jessore,Jessore-2,23.185778,89.327472,bcs1,True
7673,R750,12.825,Slab Culvert,LRP013a,.,4.0,C,104046,Jessore-Narail Road,12.825,...,1980.0,1.0,Khulna,Jessore,Jessore,Jessore-2,23.187778,89.3375,bcs1,True
7674,R750,14.05,Arch Masonry,LRP014b,.,3.1,B,104050,Jessore-Narail Road,14.05,...,1976.0,1.0,Khulna,Jessore,Jessore,Jessore-2,23.187778,89.349167,bcs1,True
7675,R750,14.623,Slab Culvert,LRP015a,.,4.15,A,104063,Jessore-Narail Road,14.623,...,1985.0,1.0,Khulna,Jessore,Jessore,Jessore-2,23.186833,89.354722,bcs1,True
7676,R750,15.157,Slab Culvert,LRP016a,.,4.1,B,104069,Jessore-Narail Road,15.157,...,1985.0,1.0,Khulna,Jessore,Jessore,Jessore-2,23.185556,89.359722,bcs1,True
7677,R750,16.028,Slab Culvert,LRP017a,.,4.16,C,104078,Jessore-Narail Road,16.028,...,1980.0,1.0,Khulna,Jessore,Jessore,Jessore-2,23.187222,89.3675,bcs1,True


In [43]:
# Chainage breaks because one entry in the road has a NaN value in the chainage column 
# Since this row has the same LRP, name and length as another entry, we assume it refers to the same bridge
# We drop the row with NaN value in chainage

df_cleaned_bridges = df_sorted_bridges.drop(df_sorted_bridges[(df_sorted_bridges['chainage'].isna()) & (df_sorted_bridges["road"] == "R750")].index)
check_chainage_monotony(df_cleaned_bridges)

All roads in the dataset have chainage monotony


df_sorted_bridges.to_csv()