In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
from scipy.spatial import cKDTree
from shapely.geometry import Point, LineString, Polygon
from shapely.ops import nearest_points
from shapely import affinity
from datetime import datetime

## Set up variables

In [2]:
# url for Chicago's street center lines layer, from their data portal
cdot_cenerlines_shapefile_path = 'https://data.cityofchicago.org/api/geospatial/6imu-meau?method=export&format=Original'

# path to csv file containing columns named (at minimum)"On", "From", "To", and "ID".
# ID should be a unique id for each row in the list.
on_from_to_list_path = 'on_from_to_list.csv'

# path to folder the processed data should be saved in
save_folder_path = r'processed_files/'

# timestamp for file naming
current_time = datetime.now().strftime('%Y%m%d-%H%M%S')

# file paths for final processed data to be exported
file_path_mapped_gpkg = f"{save_folder_path}{current_time}_on_from_to_mapped.gpkg"
file_path_unmapped_csv = f"{save_folder_path}{current_time}_on_from_to_unmapped.csv"
file_path_mapping_status_list_csv = f"{save_folder_path}{current_time}_on_from_to_all_mapping_status.csv"

## Download and prepare city street centerlines shapefile

In [3]:
# Download CDOT streets layer
cdot_centerlines_gdf = gpd.read_file(cdot_cenerlines_shapefile_path)

In [4]:
# Combine street names and types into a single column
# to match the expected format of the on/from/to street lists
gdf = cdot_centerlines_gdf.copy() 
    
gdf['On_Street'] = gdf['STREET_NAM'] + ' ' + gdf['STREET_TYP']

gdf.head(5)

Unnamed: 0,OBJECTID,FNODE_ID,TNODE_ID,TRANS_ID,PRE_DIR,STREET_NAM,STREET_TYP,SUF_DIR,STREETNAME,L_F_ADD,...,FLAG_STRIN,EWNS_DIR,EWNS_COORD,CREATE_USE,CREATE_TIM,UPDATE_USE,UPDATE_TIM,SHAPE_LEN,geometry,On_Street
0,510,10809,16581,127104,S,YALE,AVE,,1782,0,...,,W,232,EXISTING,1999-01-01,EXISTING,1999-01-01,220.566012,"LINESTRING (1175570.097 1863498.080, 1175577.8...",YALE AVE
1,511,6501,34082,128895,S,COTTAGE GROVE,AVE,,1236,7301,...,,,0,EXISTING,1999-01-01,EXISTING,1999-01-01,664.774607,"LINESTRING (1182822.668 1856787.427, 1182824.9...",COTTAGE GROVE AVE
2,512,15338,22358,142645,S,CAMPBELL,AVE,,1177,10801,...,,W,2500,EXISTING,1999-01-01,EXISTING,1999-01-01,665.378453,"LINESTRING (1161631.239 1832936.206, 1161634.6...",CAMPBELL AVE
3,513,15799,28881,148189,S,SANGAMON,ST,,1696,0,...,,W,932,EXISTING,1999-01-01,EXISTING,1999-01-01,152.564966,"LINESTRING (1172013.812 1831615.472, 1171905.1...",SANGAMON ST
4,514,36407,36534,139728,W,118TH,ST,,1823,1933,...,,S,11800,EXISTING,1999-01-01,EXISTING,1999-01-01,332.691382,"LINESTRING (1165307.502 1826592.692, 1165260.9...",118TH ST


## Map a single corridor based on on, from, and to street names

In [5]:
def scale_linestring(line, scale_length):
    """
    Scale a linestring to a specified length from its midpoint.
    """

    # Calculate the scaling factor
    current_length = line.length
    scaling_factor = scale_length / current_length

    # Scale the line
    midpoint = line.interpolate(0.5, normalized=True)
    scaled_line = affinity.scale(line, xfact=scaling_factor, yfact=scaling_factor, origin=midpoint)

    return scaled_line

In [6]:
def find_nearest_segments(street1_gdf, street2_gdf):
    """
    Find the nearest segment from each street based on their proximity to the other street.
    """
    # Find the segment in street1 that is closest to any point on street2
    min_distance_1 = street1_gdf.distance(street2_gdf.geometry.unary_union).min()
    nearest_segment_1 = street1_gdf[street1_gdf.distance(street2_gdf.geometry.unary_union) == min_distance_1].geometry.iloc[0]

    # Find the segment in street2 that is closest to any point on street1
    min_distance_2 = street2_gdf.distance(street1_gdf.geometry.unary_union).min()
    nearest_segment_2 = street2_gdf[street2_gdf.distance(street1_gdf.geometry.unary_union) == min_distance_2].geometry.iloc[0]
    
    return nearest_segment_1, nearest_segment_2

In [7]:
def extend_segments_to_intersection(segment1, segment2, scale_length=10560):  # 10560 feet = 2 miles
    """
    Extend the two given segments to create an intersection line.
    """
    # Create extended lines from the segments
    extended_line_1 = scale_linestring(segment1, scale_length)
    extended_line_2 = scale_linestring(segment2, scale_length)
    
    # Check intersection of the extended lines
    intersection = extended_line_1.intersection(extended_line_2)
    
    # If they intersect, return the intersection
    if not intersection.is_empty:
        return intersection
    else:
        # If they don't, return both extended lines for visualization
        return extended_line_1, extended_line_2

In [8]:
def get_intersection_point(street1_gdf, street2_gdf, scale_length=2640):  # 2640 ft = 1/2 mile
    """
    Return the intersection point of two streets. If they don't intersect, find the closest features
    and create a virtual intersection by extending the features to the specified scale_length.
    """
    intersection = street1_gdf.geometry.unary_union.intersection(street2_gdf.geometry.unary_union)
    
    # If intersection exists and is a point, return it
    if not intersection.is_empty:
        if intersection.geom_type == "Point":
            return intersection
        # if multiple intersections exist, return only the first one.
        elif intersection.geom_type == "MultiPoint":
            points = [p for p in intersection.geoms]
            return points[0]
    
    # If no intersection, find the closest points and create a virtual intersection
    nearest_segment_1, nearest_segment_2 = find_nearest_segments(street1_gdf, street2_gdf)
    
    virtual_intersection = extend_segments_to_intersection(nearest_segment_1, nearest_segment_2)
    
    return virtual_intersection


In [9]:
def filter_segments_between_points_keep_details(on_street_gdf, from_intersection, to_intersection):
    """
    Filter the on_street segments based on the orientation of the line formed by the intersections.
    """
    # Determine the orientation of the intersection line
    delta_x = abs(to_intersection.x - from_intersection.x)
    delta_y = abs(to_intersection.y - from_intersection.y)
    
    filtered_segments = []
    
    # If intersection_line is oriented more in the x direction
    if delta_x > delta_y:
        min_x, max_x = sorted([from_intersection.x, to_intersection.x])
        for index, row in on_street_gdf.iterrows():
            midpoint_x = row['geometry'].centroid.x
            if min_x <= midpoint_x <= max_x:
                filtered_segments.append(row)
    # If intersection_line is oriented more in the y direction
    else:
        min_y, max_y = sorted([from_intersection.y, to_intersection.y])
        for index, row in on_street_gdf.iterrows():
            midpoint_y = row['geometry'].centroid.y
            if min_y <= midpoint_y <= max_y:
                filtered_segments.append(row)
                
    # Convert the list of filtered segments to a GeoDataFrame
    filtered_gdf = gpd.GeoDataFrame(filtered_segments, crs=on_street_gdf.crs)
    
    return filtered_gdf

In [10]:
    
def extract_street_segments(gdf, on_street, from_street, to_street):
    '''
    Extract the segment of on_street that is between its intersection with from_street and to_street.
    
    on_street, from_street, and to_street are strings representing cleaned official street names found
    in the gdf.  (For example, "Madison St")
    
    gdf is the CDOT Street Centerline shapefile downloaded and converted to a 
    GeoDataFrame.
    '''

    # This version is for the CDOT map base layer, downloaded as a shapefile.
    # It has separate street_nam and street_typ fields.

    # Filter the GeoDataFrame for the given streets, ignoring case 
    on_street_gdf = gdf[gdf['On_Street'].str.lower() == on_street.lower()]
    
    from_street_gdf = gdf[gdf['On_Street'].str.lower() == from_street.lower()]
    
    to_street_gdf = gdf[gdf['On_Street'].str.lower() == to_street.lower()]
    
    # Get the intersection points
    on_from_point = get_intersection_point(on_street_gdf, from_street_gdf)
    on_to_point = get_intersection_point(on_street_gdf, to_street_gdf)
    
    # Filter the segments based on the orientation of the line formed by the intersections
    filtered_segments_gdf = filter_segments_between_points_keep_details(on_street_gdf, on_from_point, on_to_point)
    
    # dissolve so the entire corridor is one feature
    # dissolved_gdf = filtered_segments_gdf.dissolve()
    
    # add on/from/to street data to the feature
    # dissolved_gdf['On'] = on_street
    # dissolved_gdf['From'] = from_street
    # dissolved_gdf['To'] = to_street

    # return dissolved_gdf
    return filtered_segments_gdf



In [11]:
## Run code to test ##

on_street = ('Halsted St')
from_street = ('98th St')
to_street = ('74th St')

test_gdf = extract_street_segments(gdf, on_street, from_street, to_street)

test_gdf.explore()

## Map a list of on/from/to streets

In [12]:
# read the original on/from/to list as a DataFrame
oft_df = pd.read_csv(on_from_to_list_path)

# Set up a dataframe to list items that cannot be mapped
oft_unmapped = pd.DataFrame()


In [13]:
def extract_segments_from_list(row, gdf):
    '''Extract street segments based on the on, from, and to streets. 
    Return the extracted data.'''

    try:
        gdf_extracted = extract_street_segments(gdf, row['On'], row['From'], row['To'])
        return gdf_extracted

    # if the location can't be mapped, return None.
    except:
        return None



In [14]:

# # Add geometry data to the on/from/to list.
# oft_df['geometry'] = oft_df.apply(get_oft_geometry, args=(gdf,), axis=1)

# # Turn on/from/to list into a GeoDataFrame
# oft_gdf = gpd.GeoDataFrame(oft_df, crs="EPSG:3435", geometry='geometry')

# Set up dataframe to contain all mapped segments
on_from_to_mapped_df = pd.DataFrame()

# Add a column in the on/from/to list to flag whether rows were mapped
oft_df['Mapped'] = ''

# Loop through rows, extract segments, and add the segment info 
# from the on-from-to list
for index, row in oft_df.iterrows():
    extracted_gdf = extract_segments_from_list(row, gdf)
    # if there are any features in the extracted geodataframe
    if not (extracted_gdf is None):
        # add on/from/to list info to the extracted data
        extracted_gdf['oft_list_on'] = row['On']
        extracted_gdf['oft_list_from'] = row['From']
        extracted_gdf['oft_list_to'] = row['To']
        extracted_gdf['oft_list_id'] = row['ID']
        # add extracted data to the full mapped file
        on_from_to_mapped_df = pd.concat([on_from_to_mapped_df,extracted_gdf])
        # Flag as mapped in the on/from/to list
        oft_df['Mapped'].iloc[index] = 'Yes'
    else:
        # flag as unmapped in the on/from/to list
        oft_df['Mapped'].iloc[index] = 'No'


# filter out the unmapped locations
unmapped_filter = oft_df['Mapped']=='No'

# # Create a geoDataFrame with only mapped locations
on_from_to_unmapped_df = oft_df[unmapped_filter]

on_from_to_mapped_gdf = gpd.GeoDataFrame(on_from_to_mapped_df, crs='EPSG:3435')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  oft_df['Mapped'].iloc[index] = 'Yes'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  oft_df['Mapped'].iloc[index] = 'Yes'
  return lib.intersection(a, b, **kwargs)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  oft_df['Mapped'].iloc[index] = 'No'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  oft_df['Mapped']

In [20]:
on_from_to_mapped_gdf.explore()


In [16]:
oft_df

Unnamed: 0,ID,Orig_ON,Orig_FROM,Orig_TO,On,From,To,Mapped
0,1,E. 53rd St. (5300S),S. State St. (0E),S. Martin Luther King Jr. Dr. (344E),53RD ST,STATE ST,DR MARTIN LUTHER KING JR DR,Yes
1,2,W. 123rd St. (12300S),S. Sangamon St. (932W),S. Halsted St. (800W),123RD ST,SANGAMON ST,HALSTED ST,Yes
2,3,S. Normal Ave. (500W),W. 59th St. (5900S),W. 63rd St. (6300S),NORMAL AVE,59TH ST,63RD ST,No
3,4,. Fort Dearborn Dr. (),. Dead end (),E. 31st St. (3100S),FORT DEARBORN,CERMAK RD,31ST ST,No
4,5,E. 86th St. (8600S),S. State St. (0E),S. Dr. Martin Luther King Jr. Dr. (400E),86TH ST,STATE ST,DR MARTIN LUTHER KING JR DR,Yes
...,...,...,...,...,...,...,...,...
228,229,W. Lunt Av. (7000N),N. Oriole Ave. (7600W),N. Harlem Ave. (7200W),LUNT AVE,ORIOLE AVE,HARLEM AVE,Yes
229,230,N. Lamon Ave. (4900W),N. Roscoe St. (3400N),N. Nelson St. (3032N),LAMON AVE,ROSCOE ST,NELSON ST,Yes
230,231,W. Berteau Ave. (4200N),N. Kedzie Ave. (3200W),N. Sacramento Ave. (3000W),BERTEAU AVE,KEDZIE AVE,SACRAMENTO AVE,Yes
231,232,W. Grace St. (3800N),N. St. Louis Ave. (3500W),N. Kedzie Ave. (3200W),GRACE ST,ST LOUIS AVE,KEDZIE AVE,Yes


## Export mapped and unmapped streets as geopackages and csv files

In [17]:
# export mapped locations to gpkg
on_from_to_mapped_gdf.to_file(file_path_mapped_gpkg, driver='GPKG', mode='w')

# export list of unmapped locations to csv
on_from_to_unmapped_df.to_csv(file_path_unmapped_csv)

# export location list with mapped / unmapped status to csv
oft_df.to_csv(file_path_mapping_status_list_csv)


In [18]:
on_from_to_unmapped_df

Unnamed: 0,ID,Orig_ON,Orig_FROM,Orig_TO,On,From,To,Mapped
2,3,S. Normal Ave. (500W),W. 59th St. (5900S),W. 63rd St. (6300S),NORMAL AVE,59TH ST,63RD ST,No
3,4,. Fort Dearborn Dr. (),. Dead end (),E. 31st St. (3100S),FORT DEARBORN,CERMAK RD,31ST ST,No
41,42,E. 82nd St. (8200S),S. Yates Ave. (2435E),S. Marquette Ave. (2700E),82ND ST,YATES AVE,MARQUETTE AVE,No
46,47,E. 107th St. (10700S),S. Mackinaw Ave. (3332E),S. Dead End (3910E),107TH ST,MACKINAW AVE,AVENUE E,No
47,48,W. Henderson St. (3334N),N. Kimball Ave. (3400W),N. Avondale Ave. (1800W),HENDERSON ST,KIMBALL AVE,AVONDALE AVE,No
49,50,E. 136th St. (13600S),S. Avenue O (3432E),S. Avenue K (3624E),136TH ST,AVENUE O,AVENUE K,No
52,53,E. 111th St. (11100S),S. Green Bay Ave. (3400E),s. Avenue G (3800E),111TH ST,GREEN BAY AVE,AVENUE G,No
53,54,E. 133rd St. (13300S),S. Brainard Ave. (2900E),S. Dead End (4032E),133RD ST,BRAINARD AVE,STATE LINE RD,No
63,64,S. Campbell Ave. (2500W),W. Congress Dr. (500S),W. Roosevelt Dr. (1200S),CAMPBELL AVE,CONGRESS DR,ROOSEVELT DR,No
72,73,W. Fulton St. (300N),N. Austin Ave. (6000W),N. Cicero Ave. (4800W),FULTON ST,AUSTIN AVE,CICERO AVE,No
