In [470]:
import pandas as pd
import numpy as np
import scipy.spatial.distance as scidist
import json

In [471]:
from math import radians, degrees, sin, cos, asin, acos, sqrt

In [472]:
def great_circle_dist(lonlat1,lonlat2):
    '''https://medium.com/@petehouston/calculate-distance-of-two-locations-on-earth-using-python-1501b1944d97'''
    lon1,lat1=lonlat1
    lon2,lat2=lonlat2
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
    return 6371 * (
        acos(sin(lat1) * sin(lat2) + cos(lat1) * cos(lat2) * cos(lon1 - lon2))
    )

In [473]:
hs = pd.read_csv('OEFFHALTESTOGD.csv')

In [474]:
# LTYP 4 = subway
# LTYP 1 = tram
hs = hs.loc[hs['LTYP'] == 1]

In [475]:
hs.drop(labels=['FID','OBJECTID','HTXT','DIVA_ID','WEBLINK1','SE_ANNO_CAD_DATA','LTYP'], axis=1,inplace=True)

In [476]:
hs.reset_index(inplace=True)

In [477]:
hs["LINES"] = hs["HLINIEN"].str.split(', ').str.join(',')

In [478]:
#hs["LINES"] =hs['HLINIEN'].str.findall(r'(?P<LINE>U\d{1}[EZ]?)').str.join(',')

In [479]:
hspos = hs.SHAPE.str.extract("POINT \((?P<lon>\d{2}\.\d{10,15}) (?P<lat>\d{2}\.\d{10,15})\)")

In [480]:
hs = pd.concat([hs, hspos],axis=1)

In [481]:
hs.drop(labels=['SHAPE', 'HLINIEN'], axis=1,inplace=True)
hs.reset_index(inplace=True)

In [482]:
hs['lat'] = hs['lat'].astype('float64')

In [483]:
hs['lon'] = hs['lon'].astype('float64')

In [484]:
# filter some stations
#hs = hs.loc[hs['HTXTK'].isin(['Stephansplatz', 'Schwedenplatz', 'Stubentor', 'Mitte-Landstraße', 'Neubaugasse', 'Ottakring', 'Simmering', 'Oberlaa'])]
# filter lines on ring
# merge stations with identical name
uniq_stations = hs[['HTXTK','lon','lat']].groupby('HTXTK').mean()
uniq_stations.reset_index(inplace=True)

# filter lines 
hs['LINES_SPLIT'] = hs['LINES'].str.split(',')
ring_lines = set(['1','2','71','D'])
hs=hs.loc[hs['LINES_SPLIT'].apply(lambda x: len(set(x).intersection(ring_lines))>0)]
hs['LINES_SPLIT']=hs['LINES_SPLIT'].apply(lambda x: set(x).intersection(ring_lines))
hs.drop(columns=['lon','lat','LINES'],inplace=True)
#hs['LINES']=hs['LINES_SPLIT'].str.join(',')

# match to unique stations
hs.drop(columns=['level_0','index'],inplace=True)



In [485]:
hs = hs.groupby(by='HTXTK').agg({'LINES_SPLIT':lambda x: set.union(*x)})
hs = pd.merge(uniq_stations, hs, how='inner',on='HTXTK')
hs['LINES']=hs['LINES_SPLIT'].str.join(',')
hs.drop(columns=['LINES_SPLIT'],inplace=True)
hs

Unnamed: 0,HTXTK,lon,lat,LINES
0,Absberggasse,16.390614,48.176064,D
1,Albertgasse,16.343960,48.210668,2
2,Alfred-Adler-Straße,16.381552,48.182055,D
3,Althanstraße,16.359489,48.228849,D
4,Am Heumarkt,16.376822,48.198641,71
...,...,...,...,...
129,Zentralfriedhof 1.Tor,16.433115,48.159185,71
130,Zentralfriedhof 2.Tor,16.441950,48.154547,71
131,Zentralfriedhof 3.Tor,16.448481,48.152716,71
132,Zentralfriedhof 4.Tor,16.453219,48.151706,71


In [486]:
hs_dist = scidist.squareform(scidist.pdist(hs[['lon','lat']], great_circle_dist))

In [487]:
lines = list(sorted(set([item for sublist in hs['LINES'].str.split(',').tolist() for item in sublist])))
lines_vec = [ list(map(lambda l: 1 if l in lines_at_station else 0, lines)) for lines_at_station in hs['LINES'].str.split(',').tolist()]
hs_dist_lines = scidist.squareform(scidist.pdist(np.array(lines_vec), scidist.jaccard))

In [488]:
with open('wienerlinien_ring.json', 'w', encoding='utf8') as f:
    jsonstr = {
        'E': hs['HTXTK'].tolist(),
        'EA': hs_dist.tolist(),
        'SR': hs['LINES'].str.split(',').tolist(),
        'S': lines,
        'SA': hs_dist_lines.tolist()
    }
    json.dump(jsonstr, f, ensure_ascii=False)