# make_stations_with_ags prototype
Prototype for the script

In [1]:
'''
This script is used to create the stations_with_ags.csv
for the hystreet aggregator. Needs to be run only when
new stations are added.
'''

import os
import requests



# Get Hystreet Locations
headers = {'Content-Type': 'application/json',
           'X-API-Token': os.getenv('HYSTREET_TOKEN')}
res = requests.get('https://hystreet.com/api/locations/', headers=headers)
locations = res.json()



In [2]:
# Search Lat/Lon for Hystreet Locations

from geopy.geocoders import Nominatim
from tqdm import tqdm

geolocator = Nominatim(user_agent="everyonecounts")
for location in tqdm(locations):
    query = location["city"]+" "+location["name"].split("(")[0]
    geoloc = geolocator.geocode(query,exactly_one=True)
    if geoloc==None and "seite" in query:
        # Berlin Kurfürstendamm Nordseite --> Berlin Kurfürstendamm
        query2 = query.replace("Nordseite","").replace("Ostseite","").replace("Südseite","").replace("Westseite","").split("(")[0]
        geoloc = geolocator.geocode(query2,exactly_one=True)
    if geoloc==None:
        print("!!! NOT FOUND: ",query,query2)
        location["lat"]=None
        location["lon"]=None
        location["address"]=None
    else:
        #print(query,"==>",geoloc.address)
        location["lat"]=geoloc.latitude
        location["lon"]=geoloc.longitude
        location["address"]=geoloc.address

100%|████████████████████████████████████████████████████████████████████████████████| 127/127 [01:06<00:00,  1.92it/s]


In [7]:
import sys
import pandas as pd
sys.path.append('../../')
from coords_to_kreis import coords_convert
df_locations = pd.DataFrame(locations)
df_locations['landkreis'] = coords_convert(df_locations)

  "(%s != %s)" % (left_df.crs, right_df.crs)


In [6]:
# Write output to file
outfile = "stations_with_ags.csv"
df_locations = df_locations[["id","name","city","landkreis","lat","lon"]]
df_locations = df_locations.sort_values(by="id")
df_locations = df_locations.rename(columns={'id':'stationid'})
df_locations.to_csv(outfile, index=False)


# Validation

## Validate data
Test by how much old and new station positions differ

### Load data

In [88]:
import pandas as pd
from IPython.display import display

file1 = "../stations_with_ags.csv"
file2 = "stations_with_ags.csv"

with open(file1,"r") as f:
    df1 = pd.read_csv(f)
with open(file2,"r") as f:
    df2 = pd.read_csv(f)
df = df1.join(df2, on="stationid", how='left', lsuffix='old', rsuffix='new', sort=False)
print("--------------\nOLD FILE:")
display(df1.describe())
print(df1.keys())
print("--------------\nNEW FILE:")
display(df2.describe())
print(df2.keys())

--------------
OLD FILE:


Unnamed: 0,stationid,lon,lat,ags,distanceinmeters
count,117.0,117.0,117.0,117.0,117.0
mean,127.111111,8.971354,50.936013,3192945.0,710.405872
std,55.872512,1.825708,1.561711,3901324.0,540.968477
min,47.0,6.091435,47.994755,2000.0,5.681937
25%,84.0,7.461371,49.795161,5711.0,281.143654
50%,123.0,8.574648,51.044405,1003000.0,612.612593
75%,159.0,10.083537,52.263388,5774032.0,923.236858
max,254.0,13.735441,54.784319,14713000.0,2424.48525


Index(['name', 'city', 'stationid', 'lon', 'lat', 'earliest_measurement_at',
       'latest_measurement_at', 'ags', 'ascii', 'distanceinmeters'],
      dtype='object')
--------------
NEW FILE:


Unnamed: 0,stationid,ags,lat,lon,distance_m
count,127.0,127.0,127.0,127.0,127.0
mean,131.866142,4810596.0,51.025878,8.956859,987.937008
std,61.223551,3773963.0,1.572162,1.845314,801.949725
min,47.0,1003.0,47.991972,6.092855,83.0
25%,84.5,1001500.0,49.882458,7.30526,356.0
50%,125.0,5124000.0,51.224626,8.57864,815.0
75%,162.5,6872004.0,52.263998,10.043159,1203.5
max,261.0,16051000.0,54.784201,13.735514,3771.0


Index(['stationid', 'name', 'city', 'ags', 'lat', 'lon', 'address',
       'distance_m'],
      dtype='object')


### Distances
Calculate distances in meter between old and new positions:

In [89]:
from geopy import distance
merged = df1.merge(df2, on="stationid")
merged["distance"] = merged.apply(lambda x: distance.distance( (x["lat_x"],x["lon_x"]), (x["lat_y"],x["lon_y"]) ), axis=1)
merged["distance"] = merged.apply(lambda x: x["distance"].m, 1) # distance in meter
merged["distance"].describe()

count     121.000000
mean      339.768499
std      1267.045401
min         1.412466
25%        43.974709
50%        97.141962
75%       187.154503
max      9687.414294
Name: distance, dtype: float64