In [59]:
import pandas as pd
import xml.etree.ElementTree as ET

ns = {'kml': 'http://www.opengis.net/kml/2.2'}

def read_xml(path):
    tree = ET.parse(path)
    root = tree.getroot()
    return root

def get_placemarks(root):
    return root.findall('.//kml:Placemark', namespaces=ns)

def get_line_strings(placemark):
    return placemark.findall('.//kml:LineString', namespaces=ns)

def get_points(placemark):
    return placemark.findall('.//kml:Point', namespaces=ns)

def filter_placemarks(placemarks, attribute_name, attribute_value):
    filtered = []
    for placemark in placemarks:
        # Find the SimpleData element within each Placemark
        simple_data = placemark.find(f".//kml:SimpleData[@name='{attribute_name}']", namespaces=ns)
        if simple_data is not None and simple_data.text == attribute_value:
            filtered.append(placemark)
    return filtered

xml_file_pavement = '/Users/lebakuprathyushkumarreddy/Pavement.xml'
xml_file_crash = "/Users/lebakuprathyushkumarreddy/Downloads/Crash_Data_2022.xml"
csv_file_pavement = '/Users/lebakuprathyushkumarreddy/Pavement.csv'

xml_root_pavement = read_xml(xml_file_pavement)
placemarks_pavement = get_placemarks(xml_root_pavement)

xml_root_crash = read_xml(xml_file_crash)
placemarks_crash = get_placemarks(xml_root_crash)
placemarks_crash_filtered = filter_placemarks(placemarks_crash, 'RDTYP', '1')

linestring_list = []
for placemark in placemarks_pavement:
    line_strings = get_line_strings(placemark)
    for line_string in line_strings:
        coords = line_string.find('.//kml:coordinates', ns).text.strip().split(' ')
        coords = [(float(coord.split(',')[0]), float(coord.split(',')[1])) for coord in coords if coord]
        linestring_list.append(coords)

points_list = []
for placemark in placemarks_crash_filtered:
    points = get_points(placemark)
    for point in points:
        coords = point.find('.//kml:coordinates', ns).text.strip().split(',')
        x, y = float(coords[0]), float(coords[1])
        points_list.append((x, y))

df_pavement = pd.read_csv(csv_file_pavement)
df_pavement['Crashes'] = 0

for i, line_string in enumerate(linestring_list):
    crashes = sum(1 for point in points_list
                  if any(abs(x - point[0]) <= 0.0002 and abs(y - point[1]) <= 0.0002 for x, y in line_string))
    df_pavement.loc[i, 'Crashes'] = crashes

print(df_pavement.head())



# xml_file_pavement = '/Users/lebakuprathyushkumarreddy/Pavement.xml'
# xml_file_crash = "/Users/lebakuprathyushkumarreddy/Downloads/Crash_Data_2022.xml"
# csv_file_pavement = '/Users/lebakuprathyushkumarreddy/Pavement.csv'

  df_pavement = pd.read_csv(csv_file_pavement)


   OBJECTID              ORIGKEY  PMISYR     ROUTE_ID  FROM_MEASURE  \
0       1.0  00131085 24086 5452  2022.0  S001930001N      85.85317   
1       2.0  00131086 54086 9752  2022.0  S001930001N      86.58392   
2       3.0  00131086 97087 6152  2022.0  S001930001N      87.01304   
3       4.0  00131087 61087 9652  2022.0  S001930001N      87.97725   
4       5.0  00131087 96089 4452  2022.0  S001930001N      88.14779   

   TO_MEASURE  SYSTEM  ROUTE  DIR   BPOST  ... FAILURE_AREA FAILURE_AREA_MI  \
0    86.58392     3.0    1.0  1.0  085 24  ...         32.0            43.8   
1    87.01304     3.0    1.0  1.0  086 54  ...          0.0             0.0   
2    87.97725     3.0    1.0  1.0  086 97  ...          0.0             0.0   
3    88.14779     3.0    1.0  1.0  087 61  ...          3.0            17.4   
4    89.56268     3.0    1.0  1.0  087 96  ...         38.0            26.9   

  FAILURE_CNT  FAILURE_CNT_MI  \
0        10.0            13.7   
1         0.0             0.0   

In [60]:
df_pavement["Crash_Rate"]= df_pavement["Crashes"]*100000000/ ((df_pavement["AADT"]*df_pavement["PMIS_LENGTH"]*365))

In [61]:
df_pavement

Unnamed: 0,OBJECTID,ORIGKEY,PMISYR,ROUTE_ID,FROM_MEASURE,TO_MEASURE,SYSTEM,ROUTE,DIR,BPOST,...,FAILURE_AREA_MI,FAILURE_CNT,FAILURE_CNT_MI,PATHWEB,STRUC_C_PCT,UNSEALCRK,Shape__Length,New_col,Crashes,Crash_Rate
0,1.0,00131085 24086 5452,2022.0,S001930001N,85.85317,86.58392,3.0,1.0,1.0,085 24,...,43.8,10.0,13.7,http://rams.iowadot.gov/pathweb/?cycle='Curren...,6.0,100.0,1572.384335,1.0,12.0,286.255592
1,2.0,00131086 54086 9752,2022.0,S001930001N,86.58392,87.01304,3.0,1.0,1.0,086 54,...,0.0,0.0,0.0,http://rams.iowadot.gov/pathweb/?cycle='Curren...,16.0,73.0,924.786561,2.0,0.0,0.000000
2,3.0,00131086 97087 6152,2022.0,S001930001N,87.01304,87.97725,3.0,1.0,1.0,086 97,...,0.0,0.0,0.0,http://rams.iowadot.gov/pathweb/?cycle='Curren...,9.0,73.0,2082.699791,3.0,6.0,271.798217
3,4.0,00131087 61087 9652,2022.0,S001930001N,87.97725,88.14779,3.0,1.0,1.0,087 61,...,17.4,2.0,11.6,http://rams.iowadot.gov/pathweb/?cycle='Curren...,0.0,100.0,367.455262,4.0,0.0,0.000000
4,5.0,00131087 96089 4452,2022.0,S001930001N,88.14779,89.56268,3.0,1.0,1.0,087 96,...,26.9,8.0,5.7,http://rams.iowadot.gov/pathweb/?cycle='Curren...,3.0,100.0,3049.606515,5.0,5.0,54.427678
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4115,4116.0,17531109 00110 4713,2022.0,S001930175E,109.47359,110.95011,3.0,175.0,1.0,109 00,...,0.0,0.0,0.0,http://rams.iowadot.gov/pathweb/?cycle='Curren...,0.0,,3207.441024,4116.0,0.0,0.000000
4116,4117.0,17531110 47111 6413,2022.0,S001930175E,110.95011,112.11939,3.0,175.0,1.0,110 47,...,0.0,0.0,0.0,http://rams.iowadot.gov/pathweb/?cycle='Curren...,2.0,100.0,2540.013140,4117.0,0.0,0.000000
4117,4118.0,17531111 64117 6594,2022.0,S001930175E,112.11939,118.12912,3.0,175.0,1.0,111 64,...,0.0,0.0,0.0,http://rams.iowadot.gov/pathweb/?cycle='Curren...,0.0,100.0,13054.969770,4118.0,0.0,0.000000
4118,4119.0,17531117 65122 7794,2022.0,S001930175E,118.12912,123.21996,3.0,175.0,1.0,117 65,...,0.0,0.0,0.0,http://rams.iowadot.gov/pathweb/?cycle='Curren...,16.0,100.0,11058.948600,4119.0,1.0,33.369906


In [62]:
df_pavement.to_csv('/Users/lebakuprathyushkumarreddy/Downloads/pavement_with_crashes_for_each_collisiontype_csvfile/Pavement_with_road_type_non_intersecting_collision_2022.csv', index=False)