In [24]:
import geopandas as gpd
import pandas as pd
import csv
from pyproj import Transformer
import re

In [None]:
# Import data
csv_stationpoints = pd.read_csv('tfl-stationdata-detailed/StationPoints.csv')

csv_stations = pd.read_csv('tfl-stationdata-detailed/Stations.csv')

print(csv_stationpoints)
print(csv_stations)

                        UniqueId StationUniqueId AreaName  AreaId  Level  \
0       910GACTNCTL-1001002-AC-3     910GACTNCTL       AC       3      0   
1      910GACTNCTL-1001002-Bus-1     910GACTNCTL      Bus       1      0   
2     910GACTNCTL-1001002-ENTR-7     910GACTNCTL     ENTR       7      0   
3      910GACTNCTL-1001002-RLY-4     910GACTNCTL      RLY       4      1   
4      910GACTNCTL-1001002-RPL-2     910GACTNCTL      RPL       2      1   
...                          ...             ...      ...     ...    ...   
4059       HUBZWL-1000268-RLY-16          HUBZWL      RLY      16      0   
4060        HUBZWL-1000268-RLY-7          HUBZWL      RLY       7     -1   
4061      HUBZWL-1000268-RPL-N-4          HUBZWL    RPL-N       4     -4   
4062      HUBZWL-1000268-RPL-S-5          HUBZWL    RPL-S       5     -4   
4063        HUBZWL-1000268-TH-14          HUBZWL       TH      14      1   

            Lat       Lon FriendlyName  
0     51.508624 -0.263507           AC  
1    

In [None]:
# Merge stations
merged_stations = pd.merge(csv_stationpoints, csv_stations, 
                     left_on="StationUniqueId", right_on="UniqueId", 
                     how="inner") 


Index(['UniqueId_x', 'StationUniqueId', 'AreaName', 'AreaId', 'Level', 'Lat',
       'Lon', 'FriendlyName', 'UniqueId_y', 'Name', 'FareZones',
       'HubNaptanCode', 'Wifi', 'OutsideStationUniqueId',
       'BlueBadgeCarParking', 'BlueBadgeCarParkSpaces',
       'TaxiRanksOutsideStation', 'MainBusInterchange', 'PierInterchange',
       'NationalRailInterchange', 'AirportInterchange',
       'EmiratesAirLineInterchange'],
      dtype='object')


In [None]:
# Select columns we need
merged_stations = merged_stations[['StationUniqueId', 'Name', 'FriendlyName', 'Lat', 'Lon']]

print(merged_stations.head())

  StationUniqueId           Name FriendlyName        Lat       Lon
0     910GACTNCTL  Acton Central           AC  51.508624 -0.263507
1     910GACTNCTL  Acton Central          Bus  51.506514 -0.263717
2     910GACTNCTL  Acton Central         ENTR  51.508925 -0.262602
3     910GACTNCTL  Acton Central          RLY  51.508651 -0.262930
4     910GACTNCTL  Acton Central          RPL  51.508653 -0.263059


In [None]:
# Regex patterns and replacements
friendly_name_mapping = {
    r"(?i)\bsub[-_]?wa\b|\bsub[-_]?\b": "Subway",  # Matches Subwa, Sub-W, SUB, sub
    r"(?i)\brly[-_]?.*?\b": "Railway",  # Matches RLY, rly-xyz, etc.
    r"(?i)\brpl[-_]?.*?\b": "Railplatform",  # Matches RPL, RPL-N, rpl_s
    r"(?i)\bbus[-_]?[a-z]*\b": "Bus"  # Matches BUS, BusAO, bus
}

# Apply regex replacements
def normalize_friendly_name(name):
    for pattern, replacement in friendly_name_mapping.items():
        if re.search(pattern, name):
            return replacement
    return None 

merged_stations["FriendlyName"] = merged_stations["FriendlyName"].astype(str).apply(normalize_friendly_name)

# Remove rows where FriendlyName is None
merged_stations = merged_stations.dropna(subset=["FriendlyName"])


In [None]:
# Change crs to British National Grid (EPSG:27700)
transformer = Transformer.from_crs("EPSG:4326", "EPSG:27700", always_xy=True)

# Convert Lat/Lon to Easting/Northing
merged_stations[['Easting', 'Northing']] = merged_stations.apply(
    lambda row: transformer.transform(row['Lon'], row['Lat']), axis=1, result_type="expand"
)

# Save to CSV
merged_stations.to_csv("converted_stations_norm.csv", index=False)
print("Conversion complete. Saved as 'converted_stations_norm.csv'")

Conversion complete. Saved as 'converted_stations_norm.csv'
