In [204]:
import geopandas as gpd
import pandas as pd
import numpy as np
from shapely.geometry import Point
from shapely.ops import linemerge, unary_union
from shapely.geometry import LineString, MultiLineString




In [205]:
base_dir  = 'D:/Desktop/MSc Thesis - Copy/'
dir_osm   = 'D:/Desktop/MSc Thesis - Copy/Data/GIS data/south-korea-osm.shp/'
dir_urban = 'D:/Desktop/MSc Thesis - Copy/shapefiles/crop/Urban cropped/'

crop_neig_offset = gpd.read_file(base_dir + 'shapefiles/crop/crop_neighborhood_offset.shp')
railways = gpd.read_file(dir_osm + 'gis_osm_railways_free_1.shp')
railways.to_crs('EPSG:5174', inplace=True)

print(crop_neig_offset.crs)
print(railways.crs)



EPSG:5174
EPSG:5174


In [206]:
railways = railways[railways.intersects(crop_neig_offset.unary_union)]
railways = railways[~railways['name'].isna()]

  railways = railways[railways.intersects(crop_neig_offset.unary_union)]


In [209]:
merged_lines = []

for name, group in railways.groupby('name'):
    # Combine segments into one geometry
    united = unary_union(group.geometry)

    # Ensure input to linemerge is a list or MultiLineString
    if isinstance(united, LineString):
        merged = united  # No need to merge; it's already a single line
    else:
        merged = linemerge(united)

    # Handle both LineString and MultiLineString
    if isinstance(merged, LineString):
        merged_lines.append({'name': name, 'geometry': merged})
    elif isinstance(merged, MultiLineString):
        for part in merged.geoms:
            merged_lines.append({'name': name, 'geometry': part})

merged_railways = gpd.GeoDataFrame(merged_lines, crs=railways.crs)

In [210]:
import geopandas as gpd
from shapely.geometry import LineString, MultiLineString

buffer_distance = 300  # meters

cleaned_lines = []

for name, group in merged_railways.groupby('name'):
    lines = list(group.geometry)
    buffers = [line.buffer(buffer_distance, cap_style=1) for line in lines]

    containment_counts = []

    for i, line in enumerate(lines):
        count = 0
        for j, buf in enumerate(buffers):
            if i != j and line.within(buf):
                count += 1
        containment_counts.append((i, count))

    # Sort lines by how "deeply contained" they are
    containment_counts.sort(key=lambda x: x[1])  # keep least contained lines

    kept = set()
    for i, _ in containment_counts:
        # If not already marked for keeping and doesn't fall within another kept line's buffer
        if all(not lines[i].within(buffers[j]) for j in kept):
            kept.add(i)

    # Ensure at least one line remains
    if not kept and lines:
        kept.add(containment_counts[0][0])  # keep least contained

    final_geoms = [lines[i] for i in kept]
    if len(final_geoms) == 1:
        final_geom = final_geoms[0]
    else:
        final_geom = MultiLineString(final_geoms)

    cleaned_lines.append({
        'name': name,
        'geometry': final_geom
    })

cleaned_gdf = gpd.GeoDataFrame(cleaned_lines, crs=merged_railways.crs)

In [211]:
drop_names = [
    '경부제1본선', '경부제3본선', '경부선', '용산역 인상선',
    '경부제3본선', '경인선', '경부제2선', '경인제2본',
    '경부선', '경인제2본선', '경부선', '구로기지선',
    '사당역 인상선', '남태령역 인상선'
]
cleaned_gdf = cleaned_gdf[~cleaned_gdf['name'].isin(drop_names)]


In [212]:
railways['segment_id'] = railways.index
matched_segments = []

for idx, row in cleaned_gdf.iterrows():
    cleaned_geom = row.geometry
    name = row['name']

    candidates = railways[railways['name'] == name]

    for segment_idx, segment_row in candidates.iterrows():
        segment_geom = segment_row.geometry

        if segment_geom.intersects(cleaned_geom):
            matched_segments.append(segment_row)

final_segments = gpd.GeoDataFrame(matched_segments, crs=railways.crs)

In [213]:


final_segments.to_file(base_dir + 'Data/GIS data/Urban cropped/tests/railways_cropped11.shp', driver='ESRI Shapefile')