In [1]:
import requests
import xlrd
import pandas as pd
import numpy as np
import json
import time as time_module 
import geopandas as gpd
import matplotlib.pyplot as plt
import polyline
from shapely.geometry import LineString, MultiLineString
from shapely.ops import linemerge
from collections import defaultdict
import math
from geopy.distance import geodesic
from shapely.ops import nearest_points
import folium
import geopandas as gpd
import pandas as pd
from pprint import pprint
import os
import re
import glob
from dotenv import load_dotenv
from shapely.geometry import Point


pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
os.environ['OGR_GEOMETRY_ACCEPT_UNCLOSED_RING'] = 'NO'

# Retrieving api key
load_dotenv("../key.env")
api_key = os.getenv("API_KEY")
TOKEN = os.getenv('ONEMAPTOKEN')

#### Reading in dataset

In [2]:
geospatial_train_path = "../datasets/geospatial_layer/TrainStation_Jul2024/RapidTransitSystemStation.shp"
train_stations = pd.read_excel("../datasets/Train_Stations.xls")
geospatial_train_gdf = gpd.read_file(geospatial_train_path)

In [3]:
%run get_bus_info_function.ipynb
bus_services_df = get_bus_info("https://datamall2.mytransport.sg/ltaodataservice/BusServices", api_key)
bus_routes_df = get_bus_info("https://datamall2.mytransport.sg/ltaodataservice/BusRoutes", api_key)
bus_stops_df = get_bus_info("https://datamall2.mytransport.sg/ltaodataservice/BusStops", api_key)

In [13]:
bus_routes_gdf = gpd.read_file('../datasets/routes/filtered_bus_routes.geojson')

#### Data Pre-processing

In [4]:
bus_routes_stops = pd.merge(bus_routes_df, bus_stops_df, on = "BusStopCode", how = 'left')
bus_routes_stops = bus_routes_stops.merge(
    bus_services_df[['ServiceNo', 'Category']],  # Select only the columns needed for merging
    on='ServiceNo',  # Merge on BusStopCode
    how='left'  # Use 'left' join to keep all rows from bus_routes_stops
)

In [5]:
# Drop duplicates and assign it back to the original DataFrame
bus_routes_stops = bus_routes_stops.drop_duplicates().reset_index(drop=True)

# Filter the DataFrame for rows with 'Category' equal to 'TRUNK'
bus_routes_stops = bus_routes_stops[bus_routes_stops['Category'] == 'TRUNK']

In [6]:
# Step 1: Union the geometries for the same station
unioned_gdf = geospatial_train_gdf.dissolve(by='STN_NAM_DE',aggfunc='first')

# Step 2: Calculate the centroid of the unioned polygon
unioned_gdf['centroid'] = unioned_gdf.centroid
unioned_gdf['geometry'] = unioned_gdf['centroid']

# Reset index to clean up
unioned_gdf.reset_index(inplace=True)

  merged_geom = block.unary_union


In [7]:
# Function to normalize station names in train_stations_df
def normalize_station_name(name):
    return name.strip().upper()  # Ensure names are uppercase for consistent merging

# Apply normalization function to train_stations_df
train_stations['Normalized_Station'] = train_stations['MRT_Station'].apply(normalize_station_name)

# Create a column to append " MRT STATION" or " LRT STATION" based on the MRT_Line
train_stations['Station_MRT_LRT'] = train_stations.apply(
    lambda row: f"{row['Normalized_Station']} MRT STATION" if "LRT" not in row['MRT_Line'] else f"{row['Normalized_Station']} LRT STATION",
    axis=1
)

# Apply normalization to geospatial_train_df
# Strip ' MRT STATION' and ' LRT STATION' and normalize to uppercase
unioned_gdf['Normalized_Station'] = unioned_gdf['STN_NAM_DE'].str.strip().str.upper()

# Perform the merge on 'Station_MRT_LRT' from train_stations and 'Normalized_Station' from unioned_gdf
merged_train_stations = train_stations.merge(
    unioned_gdf,
    how='left',
    left_on='Station_MRT_LRT',
    right_on='Normalized_Station'
)

# Keeping necessary columns
columns_to_keep = ['Station_Code', 'MRT_Station', 'MRT_Line', 'TYP_CD_DES', 'geometry']
merged_train_stations = merged_train_stations[columns_to_keep]

# Check the resulting column names and sample data
print(merged_train_stations.head())


  Station_Code    MRT_Station           MRT_Line TYP_CD_DES  \
0          NS1    Jurong East  North-South Line         MRT   
1          NS2    Bukit Batok  North-South Line         MRT   
2          NS3   Bukit Gombak  North-South Line         MRT   
3          NS4  Choa Chu Kang  North-South Line         MRT   
4          NS5        Yew Tee  North-South Line         MRT   

                      geometry  
0  POINT (17866.487 35045.184)  
1  POINT (18676.448 36790.872)  
2  POINT (18940.178 37860.706)  
3  POINT (18101.056 40790.989)  
4  POINT (18438.643 42159.628)  


In [8]:
#  Convert Pandas DataFrame to a GeoDataFrame
gdf = gpd.GeoDataFrame(merged_train_stations, geometry='geometry')

#  Reproject the GeoDataFrame to EPSG:4326 (WGS 84 - latitude/longitude)
gdf_4326 = gdf.to_crs(epsg=4326)

# Extract Longitude and Latitude from the reprojected geometries
gdf_4326['Longitude'] = gdf_4326.geometry.x
gdf_4326['Latitude'] = gdf_4326.geometry.y

#  Convert back to a Pandas DataFrame (if you don't need the geometry anymore)
merged_train_stations = pd.DataFrame(gdf_4326)

# Removing redundant columns
columns_to_keep = ['Station_Code', 'MRT_Station', 'MRT_Line', 'Longitude', 'Latitude']
merged_train_stations = merged_train_stations[columns_to_keep]
merged_train_stations['Train_Line'] = merged_train_stations['Station_Code'].str.extract(r'([A-Za-z]+)')
merged_train_stations['Station_No'] = merged_train_stations['Station_Code'].str.extract(r'(\d+)').fillna(1).astype(int)
print(merged_train_stations.head())

  Station_Code    MRT_Station           MRT_Line   Longitude  Latitude  \
0          NS1    Jurong East  North-South Line   103.742263  1.333209   
1          NS2    Bukit Batok  North-South Line   103.749541  1.348997   
2          NS3   Bukit Gombak  North-South Line   103.751910  1.358672   
3          NS4  Choa Chu Kang  North-South Line   103.744369  1.385172   
4          NS5        Yew Tee  North-South Line   103.747402  1.397550   

  Train_Line  Station_No  
0         NS           1  
1         NS           2  
2         NS           3  
3         NS           4  
4         NS           5  


In [9]:
#  This is the train_lines gdf created in "./parallel_routes.ipynb" solely by constructing line strings between mrt stations
train_routes_gdf = gpd.read_file("../datasets/routes/train_lines.geojson")

In [10]:
# Convert bus stops (Pandas DataFrame) to GeoDataFrame with geometry points
bus_routes_stops_gdf = gpd.GeoDataFrame(
    bus_routes_stops,
    geometry=gpd.points_from_xy(bus_routes_stops['Longitude'], bus_routes_stops['Latitude']),
    crs="EPSG:4326"
)

## Method 1

In [None]:
ALPHA = 0.005 
# Custom function to merge dictionaries
def merge_dicts(dicts):
    merged = defaultdict(list)
    for d in dicts:
        for key, value in d.items():
            merged[key].extend(value if isinstance(value, list) else [value])
    return dict(merged)

# Function to calculate parallelness and aggregate by ServiceNo across directions
def calculate_parallelness_with_aggregation(bus_routes_stops_gdf, bus_routes_gdf, train_routes_gdf):
    parallel_scores = []

    for _, bus_route in bus_routes_gdf.iterrows():
        service_no = bus_route['index'].split('_')[0]
        direction = int(bus_route['index'].split('_')[-1].replace('direction_', ''))
        
        # Filter stops for the current service and direction
        route_stops = bus_routes_stops_gdf[(bus_routes_stops_gdf['ServiceNo'] == service_no) & 
                                           (bus_routes_stops_gdf['Direction'] == direction)]
        total_stops = len(route_stops)

        # Skip if no stops for this direction
        if total_stops == 0:
            continue

        # Dictionary to store parallel stop details per train line
        train_line_parallel_data = {}
        total_softmax_score = 0  # Aggregate softmax score for all train lines

        # Process each MRT line and calculate softmax-based parallelness
        for mrt_line_id, mrt_line_geom in train_routes_gdf.set_index('Train_Line').geometry.items():
            softmax_scores = []
            parallel_stop_codes = []
            parallel_stop_coords = []

            for _, stop in route_stops.iterrows():
                bus_stop_geom = stop.geometry
                bus_stop_code = stop['BusStopCode']

                # Calculate distance from bus stop to MRT line
                distance = bus_stop_geom.distance(mrt_line_geom)

                # Calculate softmax-based score
                softmax_score = np.exp(-ALPHA * distance)

                # Consider it as parallel if the softmax score is significant (e.g., above 0.5)
                if softmax_score > 0.5:
                    softmax_scores.append(softmax_score)
                    parallel_stop_codes.append(bus_stop_code)
                    parallel_stop_coords.append((bus_stop_geom.x, bus_stop_geom.y))

            # Sum softmax scores for the current train line
            line_softmax_score = sum(softmax_scores)
            total_softmax_score += line_softmax_score

            # Store parallel stop details if any parallel stops found
            if parallel_stop_codes:
                train_line_parallel_data[mrt_line_id] = {
                    'ParallelStopCodes': parallel_stop_codes,
                    'ParallelStopCoordinates': parallel_stop_coords,
                    'LineSoftmaxScore': line_softmax_score
                }

        # Calculate normalized parallelness score across all lines
        normalized_parallelness = total_softmax_score / total_stops if total_stops > 0 else 0

        # Append the result for the current bus service and direction
        parallel_scores.append({
            'ServiceNo': service_no,
            'Direction': direction,
            'TotalStops': total_stops,
            'CombinedParallelnessScore': normalized_parallelness,
            'TrainLineParallelData': train_line_parallel_data  # Stores details for each train line
        })

    # Convert to DataFrame
    parallel_results_df = pd.DataFrame(parallel_scores)

    # Step 2: Aggregate by ServiceNo across both directions
    aggregated_results = (
        parallel_results_df.groupby('ServiceNo')
        .agg({
            'TotalStops': 'sum',
            'CombinedParallelnessScore': lambda x: np.average(x, weights=parallel_results_df.loc[x.index, 'TotalStops']),
            'TrainLineParallelData': merge_dicts  # Use custom function to merge dictionary data
        })
        .reset_index()
    )

    # Sort by CombinedParallelnessScore for top results
    aggregated_results = aggregated_results.sort_values(by='CombinedParallelnessScore', ascending=False)

    return aggregated_results


In [15]:
# Now proceed with the parallelness calculation function
final_results_1 = calculate_parallelness_with_aggregation(bus_routes_stops_gdf, bus_routes_gdf, train_routes_gdf)

# Display the top-ranked results
print(final_results_1[['ServiceNo', 'TotalStops', 'CombinedParallelnessScore', 'TrainLineParallelData']].head(15))


    ServiceNo  TotalStops  CombinedParallelnessScore  \
216       45A           6                  12.997017   
75        146          16                  12.996993   
228       53M          18                  12.996992   
22       113A          20                  12.996978   
27       116A          11                  12.996971   
15       109A          15                  12.996965   
21        113          44                  12.996965   
25        115          13                  12.996960   
26        116          37                  12.996960   
3         101          46                  12.996960   
19        112          47                  12.996939   
2        100A          13                  12.996923   
20       112A          12                  12.996919   
281       80A          31                  12.996915   
283        82          43                  12.996891   

                                 TrainLineParallelData  
216  {'BP': [{'ParallelStopCodes': ['63291', '

### Method 2 V1: Using Euclidean to calculate angle and distance difference


In [16]:
# Constants
DISTANCE_THRESHOLD = 0.001  # Distance in meters
ANGLE_THRESHOLD = 25  # Maximum angle in degrees for parallelness
CONSECUTIVE_WEIGHT = 1.2  # Weight multiplier for consecutive parallel stops

# Custom function to merge dictionaries
def merge_dicts(dicts):
    merged = defaultdict(list)
    for d in dicts:
        for key, value in d.items():
            if isinstance(value, dict):
                merged[key].append(value)
            else:
                merged[key].extend(value if isinstance(value, list) else [value])
    return dict(merged)

# Function to calculate angle between two points
def calculate_angle(bus_p1, bus_p2, train_p1, train_p2):
    bus_dx = bus_p1.x - bus_p2.x
    bus_dy = bus_p1.y - bus_p2.y
    bus_norm = math.sqrt(bus_dy ** 2 + bus_dx ** 2)
    
    train_dx = train_p1.x - train_p2.x
    train_dy = train_p1.y - train_p2.y
    train_norm = math.sqrt(train_dy ** 2 + train_dx ** 2)

    if bus_norm == 0 or train_norm == 0:
        return 0

    norm_dot_prod = (bus_dx * train_dx + bus_dy * train_dy) / (bus_norm * train_norm)
    angle = np.degrees(np.arccos(norm_dot_prod))

    if angle >= 90:
        angle = 180 - angle

    return angle

# Updated function to calculate parallelness with consecutive parallel segments and combine directions
def calculate_parallelness_combined_directions(bus_routes_stops_gdf, bus_routes_gdf, train_routes_gdf):
    parallel_scores = []

    # Group by ServiceNo and Direction to process each bus service route individually
    for service_no, service_data in bus_routes_stops_gdf.groupby('ServiceNo'):
        directions = service_data['Direction'].unique()

        # Process each direction and aggregate if both directions exist
        for direction in directions:
            route_stops = service_data[service_data['Direction'] == direction]
            bus_route_row = bus_routes_gdf[bus_routes_gdf['index'] == f"{service_no}_direction_{direction}"]
            
            if bus_route_row.empty:
                print(f"Skipping {service_no} direction {direction} as it is missing in bus_routes_gdf.")
                continue

            # Retrieve the LineString geometry for the bus route
            total_stops = len(route_stops)

            if total_stops < 2:
                continue

            train_line_parallel_data = {}
            total_parallel_count = 0
            consecutive_parallel_count = 0  # Tracks consecutive parallel segments

            # Process each MRT line to calculate both distance-based parallelness and angular similarity
            for mrt_line_id, mrt_line_geom in train_routes_gdf.set_index('Train_Line').geometry.items():
                parallel_stop_codes = []
                parallel_stop_coords = []
                line_parallel_count = 0  # Track count for this MRT line
                consecutive_segment_length = 0  # Length of the current consecutive parallel sequence

                for i in range(total_stops - 1):
                    bus_stop_geom1 = route_stops.iloc[i].geometry
                    bus_stop_geom2 = route_stops.iloc[i + 1].geometry
                    bus_stop_code = route_stops.iloc[i]['BusStopCode']

                    bus_segment = LineString([bus_stop_geom1, bus_stop_geom2])
                    segment_midpoint = bus_segment.interpolate(0.5, normalized=True)

                    # Distance and angle calculations
                    distance = segment_midpoint.distance(mrt_line_geom)
                    nearest_train_segment = mrt_line_geom.interpolate(mrt_line_geom.project(segment_midpoint))
                    nearest_train_point = mrt_line_geom.interpolate(mrt_line_geom.project(nearest_train_segment) + 0.01)
                    angle_difference = calculate_angle(bus_stop_geom1, bus_stop_geom2, nearest_train_segment, nearest_train_point)

                    # Check parallel conditions and manage consecutive parallel segments
                    if distance <= DISTANCE_THRESHOLD and angle_difference <= ANGLE_THRESHOLD:
                        line_parallel_count += 1
                        parallel_stop_codes.append(bus_stop_code)
                        parallel_stop_coords.append((bus_stop_geom1.x, bus_stop_geom1.y))

                        # Track consecutive segments
                        consecutive_segment_length += 1
                    else:
                        # Apply weight if there was a consecutive parallel segment
                        if consecutive_segment_length > 1:
                            consecutive_parallel_count += consecutive_segment_length * CONSECUTIVE_WEIGHT
                        consecutive_segment_length = 0

                # Finalize consecutive parallel count for the last sequence
                if consecutive_segment_length > 1:
                    consecutive_parallel_count += consecutive_segment_length * CONSECUTIVE_WEIGHT

                # Store parallel stop details if any parallel stops found for this MRT line
                if parallel_stop_codes:
                    train_line_parallel_data[mrt_line_id] = {
                        'ParallelStopCodes': parallel_stop_codes,
                        'ParallelStopCoordinates': parallel_stop_coords,
                        'ParallelCount': line_parallel_count
                    }
                    total_parallel_count += line_parallel_count

            # Calculate a weighted score considering consecutive segments
            weighted_parallel_score = (total_parallel_count + consecutive_parallel_count) / total_stops if total_stops > 0 else 0

            parallel_scores.append({
                'ServiceNo': service_no,
                'Direction': direction,
                'TotalStops': total_stops,
                'WeightedParallelScore': weighted_parallel_score,
                'TrainLineParallelData': train_line_parallel_data
            })

    # Convert to DataFrame
    parallel_results_df = pd.DataFrame(parallel_scores)

    # Step 2: Aggregate by ServiceNo to combine directions
    aggregated_results = (
        parallel_results_df.groupby('ServiceNo')
        .agg({
            'TotalStops': 'sum',
            'WeightedParallelScore': 'mean',  # Average score across directions
            'TrainLineParallelData': merge_dicts  # Merge parallel data across directions
        })
        .reset_index()
    )

    # Sort by WeightedParallelScore for top results
    aggregated_results = aggregated_results.sort_values(by='WeightedParallelScore', ascending=False)

    return aggregated_results

# Run the function and display the top-ranked results aggregated by ServiceNo
final_results_4 = calculate_parallelness_combined_directions(bus_routes_stops_gdf, bus_routes_gdf, train_routes_gdf)


Skipping 50A direction 1 as it is missing in bus_routes_gdf.


### Method 2 V2: Using Geodesic Distance 

In [17]:
# Constants
DISTANCE_THRESHOLD = 300  # Distance in meters
ANGLE_THRESHOLD = 25  # Maximum angle in degrees for parallelness
CONSECUTIVE_WEIGHT = 1.2  # Weight multiplier for consecutive parallel stops

# Custom function to merge dictionaries
def merge_dicts(dicts):
    merged = defaultdict(list)
    for d in dicts:
        for key, value in d.items():
            if isinstance(value, dict):
                merged[key].append(value)
            else:
                merged[key].extend(value if isinstance(value, list) else [value])
    return dict(merged)

# Function to calculate angle between two segments using geodesic distances
def calculate_geodesic_angle(bus_p1, bus_p2, train_p1, train_p2):
    # Convert points to geopy-compatible tuples (lat, lon)
    bus_start = (bus_p1.y, bus_p1.x)
    bus_end = (bus_p2.y, bus_p2.x)
    train_start = (train_p1.y, train_p1.x)
    train_end = (train_p2.y, train_p2.x)
    
    # Vector representation of segments
    bus_dx, bus_dy = bus_p2.x - bus_p1.x, bus_p2.y - bus_p1.y
    bus_norm = math.sqrt(bus_dx**2 + bus_dy**2)
    
    train_dx, train_dy = train_p2.x - train_p1.x, train_p2.y - train_p1.y
    train_norm = math.sqrt(train_dx**2 + train_dy**2)
    
    if bus_norm == 0 or train_norm == 0:
        return 0

    # Calculate dot product for angle
    norm_dot_prod = (bus_dx * train_dx + bus_dy * train_dy) / (bus_norm * train_norm)
    angle = np.degrees(np.arccos(np.clip(norm_dot_prod, -1.0, 1.0)))

    # Ensure angle is at most 90 degrees
    if angle > 90:
        angle = 180 - angle

    return angle

# Updated function to calculate parallelness with consecutive parallel segments and combine directions
def calculate_parallelness(bus_routes_stops_gdf, train_routes_gdf):
    parallel_scores = []

    # Group by ServiceNo and Direction to process each bus service route individually
    for service_no, service_data in bus_routes_stops_gdf.groupby('ServiceNo'):
        directions = service_data['Direction'].unique()

        # Process each direction
        for direction in directions:
            route_stops = service_data[service_data['Direction'] == direction]
            total_stops = len(route_stops)

            if total_stops < 2:
                continue

            train_line_parallel_data = {}
            total_parallel_count = 0
            consecutive_parallel_count = 0  # Tracks consecutive parallel segments

            # Process each MRT line to calculate both distance-based parallelness and angular similarity
            for mrt_line_id, mrt_line_geom in train_routes_gdf.set_index('Train_Line').geometry.items():
                parallel_stop_codes = []
                parallel_stop_coords = []
                line_parallel_count = 0  # Track count for this MRT line
                consecutive_segment_length = 0  # Length of the current consecutive parallel sequence

                for i in range(total_stops - 1):
                    bus_stop_geom1 = route_stops.iloc[i].geometry
                    bus_stop_geom2 = route_stops.iloc[i + 1].geometry
                    bus_stop_code = route_stops.iloc[i]['BusStopCode']

                    bus_segment = LineString([bus_stop_geom1, bus_stop_geom2])

                    # Find the closest point on the bus segment to the MRT line
                    nearest_point_on_bus, nearest_point_on_train = nearest_points(bus_segment, mrt_line_geom)
                    distance = geodesic((nearest_point_on_bus.y, nearest_point_on_bus.x), (nearest_point_on_train.y, nearest_point_on_train.x)).meters

                    # Angle calculation using geodesic
                    next_point_on_train = mrt_line_geom.interpolate(mrt_line_geom.project(nearest_point_on_train) + 0.01)
                    angle_difference = calculate_geodesic_angle(nearest_point_on_bus, Point(bus_stop_geom2.x, bus_stop_geom2.y), nearest_point_on_train, next_point_on_train)

                    # Check parallel conditions and manage consecutive parallel segments
                    if distance <= DISTANCE_THRESHOLD and angle_difference <= ANGLE_THRESHOLD:
                        line_parallel_count += 1
                        parallel_stop_codes.append(bus_stop_code)
                        parallel_stop_coords.append((bus_stop_geom1.x, bus_stop_geom1.y))

                        # Track consecutive segments
                        consecutive_segment_length += 1
                    else:
                        # Apply weight if there was a consecutive parallel segment
                        if consecutive_segment_length > 1:
                            consecutive_parallel_count += consecutive_segment_length * CONSECUTIVE_WEIGHT
                        consecutive_segment_length = 0

                # Finalize consecutive parallel count for the last sequence
                if consecutive_segment_length > 1:
                    consecutive_parallel_count += consecutive_segment_length * CONSECUTIVE_WEIGHT

                # Store parallel stop details if any parallel stops found for this MRT line
                if parallel_stop_codes:
                    train_line_parallel_data[mrt_line_id] = {
                        'ParallelStopCodes': parallel_stop_codes,
                        'ParallelStopCoordinates': parallel_stop_coords,
                        'ParallelCount': line_parallel_count
                    }
                    total_parallel_count += line_parallel_count

            # Calculate a weighted score considering consecutive segments
            weighted_parallel_score = (total_parallel_count + consecutive_parallel_count) / total_stops if total_stops > 0 else 0

            parallel_scores.append({
                'ServiceNo': service_no,
                'Direction': direction,
                'TotalStops': total_stops,
                'WeightedParallelScore': weighted_parallel_score,
                'TrainLineParallelData': train_line_parallel_data
            })

    # Convert to DataFrame
    parallel_results_df = pd.DataFrame(parallel_scores)

    # Step 2: Aggregate by ServiceNo to combine directions
    aggregated_results = (
        parallel_results_df.groupby('ServiceNo')
        .agg({
            'TotalStops': 'sum',
            'WeightedParallelScore': 'mean',  # Average score across directions
            'TrainLineParallelData': merge_dicts  # Merge parallel data across directions
        })
        .reset_index()
    )

    # Sort by WeightedParallelScore for top results
    aggregated_results = aggregated_results.sort_values(by='WeightedParallelScore', ascending=False)

    return aggregated_results

# Run the function and display the top-ranked results aggregated by ServiceNo
final_results = calculate_parallelness(bus_routes_stops_gdf, train_routes_gdf)


  return lib.shortest_line(a, b, **kwargs)
  return lib.shortest_line(a, b, **kwargs)


In [18]:
final_results.head(20)

Unnamed: 0,ServiceNo,TotalStops,WeightedParallelScore,TrainLineParallelData
278,7A,35,1.931429,"{'CC': [{'ParallelStopCodes': ['82029', '82109..."
145,190,64,1.926588,"{'BP': [{'ParallelStopCodes': ['44009', '44539..."
254,67,163,1.889949,"{'BP': [{'ParallelStopCodes': ['44461', '44451..."
198,36B,51,1.87451,"{'CC': [{'ParallelStopCodes': ['02149', '02171..."
13,107M,68,1.802941,"{'CC': [{'ParallelStopCodes': ['01629', '02089..."
12,107,79,1.797372,"{'CC': [{'ParallelStopCodes': ['01629'], 'Para..."
248,63,102,1.764706,"{'CC': [{'ParallelStopCodes': ['71221', '01039..."
249,63A,53,1.750943,"{'CC': [{'ParallelStopCodes': ['71221', '01039..."
196,36,61,1.708197,"{'CC': [{'ParallelStopCodes': ['02149', '02171..."
18,111,56,1.707143,"{'CC': [{'ParallelStopCodes': ['11009', '08138..."


In [19]:
# Filter the final results to exclude ServiceNos with alphabetic characters
filtered_final_results = final_results[~final_results['ServiceNo'].str.contains(r'[A-Za-z]')]

# Display filtered results
print(filtered_final_results[['ServiceNo', 'TotalStops', 'WeightedParallelScore', 'TrainLineParallelData']].head(20))

    ServiceNo  TotalStops  WeightedParallelScore  \
145       190          64               1.926588   
254        67         163               1.889949   
12        107          79               1.797372   
248        63         102               1.764706   
196        36          61               1.708197   
18        111          56               1.707143   
207       400          21               1.695238   
38        122          39               1.687179   
281        80         118               1.656034   
54        133          86               1.639069   
55        134          36               1.616667   
219        48         105               1.603157   
368       972          56               1.592857   
10        106          98               1.529417   
41        124          90               1.510143   
252        65         128               1.500952   
292       851         108               1.491071   
1         100         111               1.443957   
259         

In [20]:
validation_check = final_results[final_results['ServiceNo'] == '107']['TrainLineParallelData'].iloc[0]
pprint(validation_check)

{'CC': [{'ParallelCount': 1,
         'ParallelStopCodes': ['01629'],
         'ParallelStopCoordinates': [(103.856894, 1.297085)]},
        {'ParallelCount': 2,
         'ParallelStopCodes': ['03011', '62121'],
         'ParallelStopCoordinates': [(103.85342223246427, 1.28561840945474),
                                     (103.87156809881778, 1.34682778451192)]}],
 'DT': [{'ParallelCount': 7,
         'ParallelStopCodes': ['60179',
                               '60221',
                               '60039',
                               '60029',
                               '60019',
                               '01529',
                               '03019'],
         'ParallelStopCoordinates': [(103.86834025123765, 1.32634025655419),
                                     (103.87081431318353, 1.32212380559601),
                                     (103.86983042567824, 1.31996473842763),
                                     (103.86600136758759, 1.31717045698013),
             