In [1]:
import pandas as pd
import folium

In [2]:
# Website: https://www.data.gv.at/katalog/dataset/ab4a73b6-1c2d-42e1-b4d9-049e04889cf0#resources
# ZIP: http://www.wienerlinien.at/ogd_realtime/doku/ogd/gtfs/gtfs.zip
# Load GTFS data
df_agency = pd.read_csv("data/agency.txt")
df_calendar_dates = pd.read_csv("data/calendar_dates.txt")
df_calendar = pd.read_csv("data/calendar.txt")
df_routes = pd.read_csv("data/routes.txt")
df_shapes = pd.read_csv("data/shapes.txt")
df_stop_times = pd.read_csv("data/stop_times.txt")
df_stops = pd.read_csv("data/stops.txt")
df_trips = pd.read_csv("data/trips.txt")

Rename the columns so Grafana pics up the `latitude` and `logitude` automaticaly

In [None]:
df_stops.columns = ["id", "name", "latitude", "logitude", "zone_id"]
df_stops

Unnamed: 0,id,name,latitude,logitude,zone_id
0,at:43:3121:0:1,Baden Josefsplatz,48.005988,16.233761,3045
1,at:43:3134:0:1,Baden Viadukt,48.003836,16.240965,3045
2,at:43:3134:0:2,Baden Viadukt,48.003734,16.240912,3045
3,at:43:3142:0:3,Baden Leesdorf,47.999634,16.251350,3045
4,at:43:3142:0:4,Baden Leesdorf,47.999556,16.251637,3045
...,...,...,...,...,...
4535,at:49:997:0:1,Paulinensteig,48.218717,16.299077,100
4536,at:49:997:0:2,Paulinensteig,48.218442,16.299311,100
4537,at:49:998:0:1,Pellmanngasse,48.130860,16.288109,100
4538,at:49:998:0:2,Pellmanngasse,48.130656,16.288783,100


Get a understanding of the shapes.txt and why some line have multiple versions (id)

In [3]:
# pd.set_option('display.max_rows', None)
# pd.reset_option('display.max_rows')

# rename columns 
df_shapes.columns = ["id", "latitude", "longitude", "sequence", "shape_dist_traveled"]

# Filter for lines with 'H' in the ID indicating the line direction
# H = Hin, R = Retour (just an assumption)
df_shapes_filtered = df_shapes[df_shapes['id'].str.contains(r'.*H', regex=True)].copy()

# Extract version and line from the ID
df_shapes_filtered['version'] = df_shapes_filtered['id'].str.extract(r'-j25-(.*)\.H').astype(float)
df_shapes_filtered['line'] = df_shapes_filtered['id'].str.extract(r'^[^-]+-([^-]+)-')

# Get row count per id, version, and line
rows_count = df_shapes_filtered.groupby(['id', 'version', 'line']).size().reset_index(name='count')
df_shapes_clean = rows_count.sort_values(
                            by=['line', 'count', 'version'],
                            ascending=[True, False, False]
                          ).groupby('line').head(1)

df_shapes_clean


Unnamed: 0,id,version,line,count
162,22-1-j25-31.3.H,31.30,1,307
169,22-10-j25-1.12.H,1.12,10,266
750,23-10A-j25-31.3.H,31.30,10A,211
240,22-11-j25-31.2.H,31.20,11,316
761,23-11A-j25-2.7.H,2.70,11A,134
...,...,...,...,...
93,21-U2-j25-90.2.H,90.20,U2,305
103,21-U3-j25-25.2.H,25.20,U3,188
110,21-U4-j25-31.2.H,31.20,U4,364
128,21-U6-j25-31.4.H,31.40,U6,369


In [5]:
# Pick one shape id to draw, e.g. the first unique one
shape_id = df_shapes_clean[(df_shapes_clean['line'] == 'U3') ]['id']
shape_df = df_shapes[df_shapes['id'] == shape_id.values[0]] #df_shapes_clean[df_shapes_clean['line'] == "U2"].sort_values('sequence')

# Create folium map centered at first point
m = folium.Map(location=[shape_df.iloc[0]['latitude'], shape_df.iloc[0]['longitude']], zoom_start=13)

# Add the shape line
coords = shape_df[['latitude', 'longitude']].values.tolist()
folium.PolyLine(coords, color="blue", weight=3).add_to(m)

m

In [11]:
# Add color column
def __get_color(line):
  colors = {
    'u2': 'rgb(128, 0, 128)',    # purple
    'u3': 'rgb(255, 165, 0)',    # orange
    'u4': 'rgb(0, 128, 0)',      # green
    'u6': 'rgb(165, 42, 42)'     # brown
  }
  return colors.get(str(line).lower(), 'rgb(255, 0, 0)')  # default red

df_shapes_filtered['color'] = df_shapes_filtered['line'].apply(__get_color)
df_shapes_filtered[df_shapes_filtered['id'].isin(df_shapes_clean['id'])]

Unnamed: 0,id,latitude,longitude,sequence,shape_dist_traveled,version,line,color
34591,11-WLB-j25-31.6.H,48.202024,16.370539,1,0.00,31.6,WLB,"rgb(255, 0, 0)"
34592,11-WLB-j25-31.6.H,48.202061,16.370372,2,13.02,31.6,WLB,"rgb(255, 0, 0)"
34593,11-WLB-j25-31.6.H,48.202074,16.370255,3,21.84,31.6,WLB,"rgb(255, 0, 0)"
34594,11-WLB-j25-31.6.H,48.202078,16.370125,4,31.51,31.6,WLB,"rgb(255, 0, 0)"
34595,11-WLB-j25-31.6.H,48.202088,16.370019,5,39.38,31.6,WLB,"rgb(255, 0, 0)"
...,...,...,...,...,...,...,...,...
508649,25-N91-j25-1.1.H,48.212117,16.440207,200,6667.58,1.1,N91,"rgb(255, 0, 0)"
508650,25-N91-j25-1.1.H,48.212121,16.440156,201,6671.39,1.1,N91,"rgb(255, 0, 0)"
508651,25-N91-j25-1.1.H,48.212130,16.440109,202,6675.05,1.1,N91,"rgb(255, 0, 0)"
508652,25-N91-j25-1.1.H,48.212149,16.440064,203,6679.02,1.1,N91,"rgb(255, 0, 0)"


In [4]:
# Create base map centered around first stop
center_lat = df_stops['stop_lat'].mean()
center_lon = df_stops['stop_lon'].mean()
m = folium.Map(location=[center_lat, center_lon], zoom_start=12)

# Plot stops
for _, row in df_stops.iterrows():
  folium.CircleMarker(
    location=[row['stop_lat'], row['stop_lon']],
    radius=3,
    popup=row['stop_name'],
    color='blue',
    fill=True
  ).add_to(m)

# Draw shapes if present
# if not df_shapes.empty:
df_lines = df_shapes[
  (df_shapes['shape_id'].str.contains('U1')) |
  (df_shapes['shape_id'].str.contains('U2')) |
  (df_shapes['shape_id'].str.contains('U3')) |
  (df_shapes['shape_id'].str.contains('U4')) |
  (df_shapes['shape_id'].str.contains('U6'))
  ]

for shape_id in df_lines['shape_id'].unique():
  shape_points = df_shapes[df_shapes['shape_id'] == shape_id].sort_values('shape_pt_sequence')
  coords = list(zip(shape_points['shape_pt_lat'], shape_points['shape_pt_lon']))
  folium.PolyLine(coords, weight=2, color='orange').add_to(m)


# Save or display
m.save("gtfs_map.html")