In [282]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, LineString
from bs4 import BeautifulSoup
from IPython.display import display

# Step 1: Read the CSV and KML files
csv_file_path = "/Users/lebakuprathyushkumarreddy/Downloads/pavement_with_crashes_for_each_collisiontype_csvfile/Crash_Data_53640.csv"
kml_file_path = '/Users/lebakuprathyushkumarreddy/Pavement.kml'

# Read the CSV file and create a GeoDataFrame
csv_data = pd.read_csv(csv_file_path)
geometry = [Point(lon, lat) for lon, lat in zip(csv_data['X'], csv_data['Y'])]
csv_gdf = gpd.GeoDataFrame(csv_data, geometry=geometry, crs='EPSG:4326')  # Use the appropriate CRS for your data

# Parse the KML file with BeautifulSoup
with open(kml_file_path, 'r') as kml_file:
    kml_content = kml_file.read()

soup = BeautifulSoup(kml_content, 'xml')

# Extract LineString geometries from KML
line_geometries = []
iri_values = []  # List to store IRI values
origkey_values=[]

attributes =["OBJECTID", "ORIGKEY", "PMISYR", "ROUTE_ID", "FROM_MEASURE", "TO_MEASURE", "SYSTEM", 
    "ROUTE", "DIR", "BPOST", "EPOST", "PAVTYP", "DESCRIPT", "CONYR", "RESYR", "PMIS_LENGTH", 
    "LANE_MILES", "MDIST", "COUNTY", "CITY", "PCI_2", "PCI_2DEF", "RUT_INDX", "IRI_INDX", 
    "FAULT_INDX", "CRACK_INDX", "IRI", "FRICT", "FRIDATE", "FAULT", "FAULTAV", "RUT", 
    "CRACK_RATIO", "T_INDX", "L_INDX", "LW_INDX", "LLW_INDX", "A_INDX", "ACRACKH", "ACRACKM", 
    "ACRACKL", "TCRACKH", "TCRACKM", "TCRACKL", "LCRACKH", "LCRACKM", "LCRACKL", "LCRACKWH", 
    "LCRACKWM", "LCRACKWL", "DCRACKH", "DCRACKM", "DCRACKL", "JTSPALLH", "JTSPALLM", 
    "JTSPALL_LONG", "JTSPALL_TRANS", "PATCHAB", "PATCHAG", "PATCHES", "COVERAGE", "CAPDAT", 
    "STRUC80", "STRUCT_NEED80", "STRUCAV", "STRUCJTR", "AVEK", "FWD_DATE", "TRYR", "AADT", 
    "TRUCKS", "NHS", "SYS_NHS", "URBAN", "PCLASS", "FCLASS", "MEDIAN", "MEDIAN_PCT", "LANES", 
    "SPEED", "SURFACE_TYPE", "PAVTHICK", "TACCDPTH", "TPCCDPTH", "BASEDPTH", "MLEVEL", "WIDTH", 
    "WDRIVELN", "RCURB", "LCURB", "ISHLDTIE", "ISHLDTYP", "ISHLDWID", "OSHLDTIE", "OSHLDTYP", 
    "OSHLDWID", "COMPLEX", "SUBDRAIN", "SUBDPCT", "SUBDMULT", "TREATMENT", "LAYR1", "LAYR2", 
    "LAYR3", "LAYR4", "LAYR5", "LAYR6", "LAYR7", "LAYR8", "PROJECT1", "PROJECT2", "PROJECT3", 
    "PROJECT4", "PROJECT5", "PROJECT6", "PROJECT7", "PROJECT8", "PROJTYP1", "PROJTYP2", 
    "PROJTYP3", "PROJTYP4", "PROJTYP5", "PROJTYP6", "PROJTYP7", "PROJTYP8", "SURTYP1", "SURTYP2", 
    "SURTYP3", "SURTYP4", "SURTYP5", "SURTYP6", "SURTYP7", "SURTYP8", "SURTHK1", "SURTHK2", 
    "SURTHK3", "SURTHK4", "SURTHK5", "SURTHK6", "SURTHK7", "SURTHK8", "BASTYP1", "BASTYP2", 
    "BASTYP3", "BASTYP4", "BASTYP5", "BASTYP6", "BASTYP7", "BASTYP8", "AGGCLAS1", "AGGCLAS2", 
    "AGGCLAS3", "AGGCLAS4", "AGGCLAS5", "AGGCLAS6", "AGGCLAS7", "AGGCLAS8", "BASTHK1", "BASTHK2", 
    "BASTHK3", "BASTHK4", "BASTHK5", "BASTHK6", "BASTHK7", "BASTHK8", "SUBTHK1", "SUBTHK2", 
    "SUBTHK3", "SUBTHK4", "SUBTHK5", "SUBTHK6", "SUBTHK7", "SUBTHK8", "RMVTHK1", "RMVTHK2", 
    "RMVTHK3", "RMVTHK4", "RMVTHK5", "RMVTHK6", "RMVTHK7", "RMVTHK8", "AGGSRC1", "AGGSRC2", 
    "AGGSRC3", "AGGSRC4", "AGGSRC5", "AGGSRC6", "AGGSRC7", "AGGSRC8", "REMARKS1", "REMARKS2", 
    "REMARKS3", "REMARKS4", "REMARKS5", "REMARKS6", "REMARKS7", "REMARKS8", "RMVTYP1", "RMVTYP2", 
    "RMVTYP3", "RMVTYP4", "RMVTYP5", "RMVTYP6", "RMVTYP7", "RMVTYP8", "AGGTYP1", "AGGTYP2", 
    "AGGTYP3", "AGGTYP4", "AGGTYP5", "AGGTYP6", "AGGTYP7", "AGGTYP8", "SUBTYP1", "SUBTYP2", 
    "SUBTYP3", "SUBTYP4", "SUBTYP5", "SUBTYP6", "SUBTYP7", "SUBTYP8", "REST_UPDATED", 
    "REST_UTC_OFFSET", "GLOBALID", "PATCH_PCT", "CONRTE", "FAILURE_AREA", "FAILURE_AREA_MI", 
    "FAILURE_CNT", "FAILURE_CNT_MI", "PATHWEB", "STRUC_C_PCT", "UNSEALCRK", "Shape__Length"]

# Initialize a dictionary to hold lists of values for each attribute
values_dict = {attribute: [] for attribute in attributes}
line_geometries = []

placemarks = soup.find_all('Placemark')
for placemark in placemarks:
    line = placemark.find('LineString')
    coordinates = line.find('coordinates').text.strip()
    coordinates = [tuple(map(float, coord.split(','))) for coord in coordinates.split(',0') if coord!='']
    line_geometry = LineString(coordinates)
    line_geometries.append(line_geometry)

    for attribute in attributes:
        simpledata = placemark.find('SimpleData', {'name': attribute})
        value = simpledata.text.strip() if simpledata else None
        values_dict[attribute].append(value)

# Create a GeoDataFrame for KML LineStrings
kml_gdf = gpd.GeoDataFrame(geometry=line_geometries, crs='EPSG:4326')  # Use the appropriate CRS for your data

# Add IRI values as a column to the GeoDataFrame
for attribute in values_dict:
    kml_gdf[attribute] = values_dict[attribute]
# Buffer the lines to create a buffer around them
buffer_distance = 0.0001  # Adjust this distance as needed (in degrees)
kml_gdf['buffered_geometry'] = kml_gdf['geometry'].buffer(buffer_distance)

# # Set the active geometry column for csv_gdf
csv_gdf = csv_gdf.set_geometry('geometry')
kml_gdf = kml_gdf.set_geometry('buffered_geometry')

# Perform spatial analysis to find points close to buffered lines
points_near_lines = gpd.sjoin(csv_gdf, kml_gdf, op='intersects')

# Print the result or perform further analysis
# print("Number of data points located near KML lines:", len(points_near_lines))


  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__se