In [1]:
import json
import math
import datetime
from time import sleep, strptime
from collections import OrderedDict

import pandas as pd
import numpy as np
import requests

#import plotly.offline as pyo
#pyo.init_notebook_mode()

import plotly.express as px
import plotly.graph_objects as go

### Load csv files provided by Wienerlinien

In [2]:
# Info about line ids and stop ids. It also contains order of the stops
fahrwegverlaeufe = pd.read_csv('../../resources/wienerlinien_csv/1_wienerlinien-ogd-fahrwegverlaeufe.csv', sep=";")

# Polygon coordinates between stops. Also contains distances in meters.
gps_punkte = pd.read_csv('../../resources/wienerlinien_csv/2_wienerlinien-ogd-gps-punkte.csv', sep=";")

# Stops. Contains StopID which is used for realtime data requests.
haltepunkte = pd.read_csv('../../resources/wienerlinien_csv/3_wienerlinien-ogd-haltepunkte.csv', sep=";")

# Similar to stops but on DIVA level. DIVA - Summary of multiple Stops of a Station area
haltestellen = pd.read_csv('../../resources/wienerlinien_csv/4_wienerlinien-ogd-haltestellen.csv', sep=";")

# Lines info
linien = pd.read_csv('../../resources/wienerlinien_csv/5_wienerlinien-ogd-linien.csv', sep=";")

# Staircase info
steige = pd.read_csv('../../resources/wienerlinien_csv/6_wienerlinien-ogd-steige.csv', sep=";")

# Distances between stops. Similar to gps_punkte but without inbetween points. Also probably more accurate since not polygon.
verbindungen = pd.read_csv('../../resources/wienerlinien_csv/7_wienerlinien-ogd-verbindungen.csv', sep=";")

# Data validity. Not needed.
version = pd.read_csv('../../resources/wienerlinien_csv/8_wienerlinien-ogd-version.csv', sep=";")

# No description. Not needed.
teilstrecken_lonlat = pd.read_csv('../../resources/wienerlinien_csv/wienerlinien-ogd-teilstrecken-lonlat.csv', sep=";")
teilstrecken_wkt = pd.read_csv('../../resources/wienerlinien_csv/wienerlinien-ogd-teilstrecken-wkt.csv', sep=";")

In [3]:
# Prepare the ubahn_df (same as in data retrieval notebook)

ubahn_linien = linien[linien.MeansOfTransport == "ptMetro"]
ubahn_df = fahrwegverlaeufe.merge(ubahn_linien, how="inner", on="LineID")
ubahn_df = ubahn_df.merge(haltepunkte, how="inner", on="StopID")

# clean up
# Meidling has 2 different StopText names. Lets unify them
ubahn_df.loc[ubahn_df.StopText.str.contains("Meidling"), "StopText"] = "Meidling Hauptstraße"

# turns out some stations are in the wrong order
ubahn_df.loc[ubahn_df[(ubahn_df.LineText=="U2") & (ubahn_df.PatternID==1) & (ubahn_df.StopText=="Donaustadtbrücke")].index,"StopSeqCount"] = 8
ubahn_df.loc[ubahn_df[(ubahn_df.LineText=="U2") & (ubahn_df.PatternID==1) & (ubahn_df.StopText=="Donaumarina")].index,"StopSeqCount"] = 9

#ubahn_df.head()

### Functions

In [4]:
def get_diva_from_name(diva_df, stop_text):
    """
    Accepts name of the station.
    Returns DIVA id of the station.
    """
    ubahn_df_reduced = diva_df[["DIVA", "StopText"]].drop_duplicates()
    diva = ubahn_df_reduced[ubahn_df_reduced.StopText == stop_text].DIVA.astype(int)
    if len(diva) > 1:
        raise ValueError("stop_text is not unique")
    if len(diva) == 0:
        raise ValueError("stop_text not found")
    return str(diva.values[0])


def get_name_from_diva(diva_df, diva):
    """
    Accepts DIVA id of the station.
    Returns name of the station.
    """
    ubahn_df_reduced = diva_df[["DIVA", "StopText"]].drop_duplicates()
    name = ubahn_df_reduced[ubahn_df_reduced.DIVA == diva].StopText
    return str(name.values[0])

### Data preparation

In [5]:
# load data from file (data was retireved for 1st of December 2021 at 12h)

travel_df = pd.read_csv("../../resources/data/retrieved_data/ubahn_travel_times_2021_12_01__12_00.csv")
travel_df.set_index("idx", inplace=True)
travel_df.index.name = None

# lets remove stations that were skipped
diva_ids_to_remove = []

# both Neue Donau DIVA ids
diva_ids_to_remove.append(60200455)
diva_ids_to_remove.append(60201668)
diva_ids_to_remove.append(int(get_diva_from_name(ubahn_df, "Museumsquartier")))
diva_ids_to_remove.append(int(get_diva_from_name(ubahn_df, "Rathaus")))

travel_df.drop(labels=diva_ids_to_remove, axis=0, inplace=True)
travel_df.drop(labels=[str(x) for x in diva_ids_to_remove], axis=1, inplace=True)

#travel_df.head()

In [6]:
# transform matrix into a long format dataframe

travel_df_long = travel_df.unstack()
travel_df_long = pd.DataFrame(travel_df_long)
travel_df_long.reset_index(inplace=True)
travel_df_long.columns = ["source", "destination", "time"]
travel_df_long.source = travel_df_long.source.astype(int)

travel_df_long.head()

Unnamed: 0,source,destination,time
0,60200014,60200014,0
1,60200014,60200027,2160
2,60200014,60200031,1380
3,60200014,60200033,120
4,60200014,60200048,1920


In [7]:
# Let's add station names to the dataframe

# create a "DIVA <-> StopName" mapping dataframe
diva_stop_mapping = ubahn_df[["DIVA", "StopText"]].drop_duplicates()
diva_stop_mapping = diva_stop_mapping[diva_stop_mapping.DIVA.isin(list(travel_df_long.source))]

travel_df_long = travel_df_long.merge(diva_stop_mapping, left_on="source", right_on="DIVA")
travel_df_long.drop("DIVA", axis=1, inplace=True)
travel_df_long.columns = ["source", "destination", "time", "source_name"]
travel_df_long = travel_df_long.merge(diva_stop_mapping, left_on="destination", right_on="DIVA")
travel_df_long.drop("DIVA", axis=1, inplace=True)
travel_df_long.columns = ["source", "destination", "time", "source_name", "destination_name"]

travel_df_long.head()

Unnamed: 0,source,destination,time,source_name,destination_name
0,60200014,60200014,0,Alaudagasse,Alaudagasse
1,60200027,60200014,1920,Alser Straße,Alaudagasse
2,60200031,60200014,1260,Alte Donau,Alaudagasse
3,60200033,60200014,120,Altes Landgut,Alaudagasse
4,60200048,60200014,1800,Schöpfwerk,Alaudagasse


In [8]:
# Lets prepare dataset with coordinates

travel_df_long = travel_df_long[["source","destination","time","source_name","destination_name"]]

# coordinates dataframe
coord_df = travel_df_long.merge(ubahn_df[["DIVA", "Longitude", "Latitude"]].groupby("DIVA").mean().reset_index(), how="inner", left_on="source", right_on="DIVA")
coord_df.drop("DIVA", axis=1, inplace=True)
coord_df.rename(mapper={"Longitude":"source_longitude", "Latitude":"source_latitude"}, axis=1, inplace=True)
coord_df = coord_df.merge(ubahn_df[["DIVA", "Longitude", "Latitude"]].groupby("DIVA").mean().reset_index(), how="inner", left_on="destination", right_on="DIVA")
coord_df.drop("DIVA", axis=1, inplace=True)
coord_df.rename(mapper={"Longitude":"destination_longitude", "Latitude":"destination_latitude"}, axis=1, inplace=True)

coord_df.head()

Unnamed: 0,source,destination,time,source_name,destination_name,source_longitude,source_latitude,destination_longitude,destination_latitude
0,60200014,60200014,0,Alaudagasse,Alaudagasse,16.382337,48.153693,16.382337,48.153693
1,60200027,60200014,1920,Alser Straße,Alaudagasse,16.341828,48.21682,16.382337,48.153693
2,60200031,60200014,1260,Alte Donau,Alaudagasse,16.424612,48.23819,16.382337,48.153693
3,60200033,60200014,120,Altes Landgut,Alaudagasse,16.383596,48.161794,16.382337,48.153693
4,60200048,60200014,1800,Schöpfwerk,Alaudagasse,16.324468,48.161045,16.382337,48.153693


The dataset with stations, travel times and coordinates is now ready.
Next we need to perform a transformation from space coordinates to time coordinates

### Time-space tranformation

In [9]:
# Let's make a copy of the coord_df dataframe
transformed_df = coord_df.copy()

# Translate destination coordinates to the center (0,0)
# (Subtract source coordinates from destination coordinates)
transformed_df['destination_latitude_center'] = transformed_df['destination_latitude'] - transformed_df['source_latitude']
transformed_df["destination_longitude_center"] = transformed_df['destination_longitude'] - transformed_df['source_longitude']

# Let's scale 
# calculate max coordinate in each row and scale both longitude and latitude to that value
max_coord = abs(transformed_df[["destination_longitude_center","destination_latitude_center"]]).max(axis=1)
transformed_df.destination_latitude_center /= max_coord
transformed_df.destination_longitude_center /= max_coord

# lastly lets scale each coordinate with travel time to get time dependent coordinates
#transformed_df.destination_longitude_center *= transformed_df.time_scaled
#transformed_df.destination_latitude_center *= transformed_df.time_scaled

# time into minutes
transformed_df.time = transformed_df.time/60

# we want the distance from the origin (0,0) to be same as travel time
# first we need to calculate the angle and then recalculate the lat (x) and long (y)
transformed_df["angle"] = np.arctan(abs(transformed_df.destination_longitude_center)/abs(transformed_df.destination_latitude_center))

#transformed_df.destination_latitude_center *= transformed_df.time
#transformed_df.destination_longitude_center *= transformed_df.time

transformed_df.destination_latitude_center = (np.cos(transformed_df.angle) * transformed_df.time) * np.copysign(1,transformed_df.destination_latitude_center)
transformed_df.destination_longitude_center = (np.sin(transformed_df.angle) * transformed_df.time) * np.copysign(1,transformed_df.destination_longitude_center)

transformed_df.destination_latitude_center.fillna(0, inplace=True)
transformed_df.destination_longitude_center.fillna(0, inplace=True)

# add station info
transformed_df = transformed_df.merge(ubahn_df[["DIVA","LineText"]].drop_duplicates().groupby(['DIVA']).agg(lambda col: ','.join(col)), how="inner", left_on="destination", right_on="DIVA").drop_duplicates()
transformed_df

Unnamed: 0,source,destination,time,source_name,destination_name,source_longitude,source_latitude,destination_longitude,destination_latitude,destination_latitude_center,destination_longitude_center,angle,LineText
0,60200014,60200014,0.0,Alaudagasse,Alaudagasse,16.382337,48.153693,16.382337,48.153693,0.000000,0.000000,,U1
1,60200027,60200014,32.0,Alser Straße,Alaudagasse,16.341828,48.216820,16.382337,48.153693,-26.931716,17.282439,0.570528,U1
2,60200031,60200014,21.0,Alte Donau,Alaudagasse,16.424612,48.238190,16.382337,48.153693,-18.780612,-9.396202,0.463899,U1
3,60200033,60200014,2.0,Altes Landgut,Alaudagasse,16.383596,48.161794,16.382337,48.153693,-1.976309,-0.306924,0.154071,U1
4,60200048,60200014,30.0,Schöpfwerk,Alaudagasse,16.324468,48.161045,16.382337,48.153693,-3.780947,29.760787,1.444429,U1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9020,60201860,60201894,22.0,Rennbahnweg,Stadion,16.449594,48.257574,16.420228,48.210852,-18.626365,-11.707200,0.561133,U2
9021,60201891,60201894,6.0,Taborstraße,Stadion,16.381411,48.219182,16.420228,48.210852,-1.258841,5.866457,1.359419,U2
9022,60201892,60201894,2.0,Messe,Stadion,16.404998,48.217768,16.420228,48.210852,-0.826912,1.821048,1.144550,U2
9023,60201893,60201894,1.0,Krieau,Stadion,16.413788,48.214719,16.420228,48.210852,-0.514709,0.857365,1.030128,U2


In [10]:
# Let's make a copy of the coord_df dataframe
transformed_df = coord_df.copy()

# Translate destination coordinates to the center (0,0)
# (Subtract source coordinates from destination coordinates)
transformed_df['destination_latitude_center'] = transformed_df['destination_latitude'] - transformed_df['source_latitude']
transformed_df["destination_longitude_center"] = transformed_df['destination_longitude'] - transformed_df['source_longitude']

# Transform time from seconds into minutes
transformed_df.time = transformed_df.time/60

# We want the distance from the origin (0,0) to be the same as the travel time
# First we need to calculate the angle and then recalculate the latitude (x) and longitude (y)
transformed_df["angle"] = np.arctan(abs(transformed_df.destination_longitude_center)/abs(transformed_df.destination_latitude_center))

# Let's calculate the final latitude and longitude
# by calculating sine and cosine of the angle and multiplying them by time
# Use np.copysign to determine the sign of the coordinate
transformed_df.destination_latitude_center = (np.cos(transformed_df.angle) * transformed_df.time) * np.copysign(1,transformed_df.destination_latitude_center)
transformed_df.destination_longitude_center = (np.sin(transformed_df.angle) * transformed_df.time) * np.copysign(1,transformed_df.destination_longitude_center)

transformed_df.destination_latitude_center.fillna(0, inplace=True)
transformed_df.destination_longitude_center.fillna(0, inplace=True)

# Add line info
transformed_df = transformed_df.merge(ubahn_df[["DIVA","LineText"]].drop_duplicates().groupby(['DIVA']).agg(lambda col: ','.join(col)), how="inner", left_on="destination", right_on="DIVA").drop_duplicates()
transformed_df

Unnamed: 0,source,destination,time,source_name,destination_name,source_longitude,source_latitude,destination_longitude,destination_latitude,destination_latitude_center,destination_longitude_center,angle,LineText
0,60200014,60200014,0.0,Alaudagasse,Alaudagasse,16.382337,48.153693,16.382337,48.153693,0.000000,0.000000,,U1
1,60200027,60200014,32.0,Alser Straße,Alaudagasse,16.341828,48.216820,16.382337,48.153693,-26.931716,17.282439,0.570528,U1
2,60200031,60200014,21.0,Alte Donau,Alaudagasse,16.424612,48.238190,16.382337,48.153693,-18.780612,-9.396202,0.463899,U1
3,60200033,60200014,2.0,Altes Landgut,Alaudagasse,16.383596,48.161794,16.382337,48.153693,-1.976309,-0.306924,0.154071,U1
4,60200048,60200014,30.0,Schöpfwerk,Alaudagasse,16.324468,48.161045,16.382337,48.153693,-3.780947,29.760787,1.444429,U1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9020,60201860,60201894,22.0,Rennbahnweg,Stadion,16.449594,48.257574,16.420228,48.210852,-18.626365,-11.707200,0.561133,U2
9021,60201891,60201894,6.0,Taborstraße,Stadion,16.381411,48.219182,16.420228,48.210852,-1.258841,5.866457,1.359419,U2
9022,60201892,60201894,2.0,Messe,Stadion,16.404998,48.217768,16.420228,48.210852,-0.826912,1.821048,1.144550,U2
9023,60201893,60201894,1.0,Krieau,Stadion,16.413788,48.214719,16.420228,48.210852,-0.514709,0.857365,1.030128,U2


In [11]:
# Store data as csv
transformed_df.to_csv("../../resources/data/prepared_data/time_space_map_ubahn_2021_12_01__12_00.csv", index=False)

### Interactive chart
Unfortunatelly it is not possible to run interactive charts in jupyter notebooks.

For this purpose the demo was created: https://martinvolk91.github.io/time-space-maps/