In [1]:
import sys
from time import sleep
import argparse
import requests  # for calling api
import logging
import pandas as pd
import json
import os.path
pd.set_option('display.max_colwidth', 100)
pd.set_option('display.width', 180)
pd.set_option('display.max_columns', 20)

In [2]:
api_stations = f"https://geo.irceline.be/sos/api/v1/stations/?expanded=false"
api_stations_exp = "https://geo.irceline.be/sos/api/v1/stations/?expanded=true"
api_stations_gent = f"https://geo.irceline.be/sos/api/v1/stations/1207?expanded=true"

In [3]:
def data_from_api(url, norm):
    response_status = requests.get(url)
    response = response_status.json()
    if isinstance(response, dict): print(response)
    else: print(response[0])
    if norm: df = pd.json_normalize(response)
    else: df = pd.DataFrame(response)
    return df

In [4]:
stations_info = data_from_api(api_stations, norm=True)
stations_info

{'properties': {'id': 1030, 'label': '40AL01 - Linkeroever'}, 'geometry': {'coordinates': [4.385223684454717, 51.23619419990248, 'NaN'], 'type': 'Point'}, 'type': 'Feature'}


Unnamed: 0,type,properties.id,properties.label,geometry.coordinates,geometry.type
0,Feature,1030,40AL01 - Linkeroever,"[4.385223684454717, 51.23619419990248, NaN]",Point
1,Feature,1031,40AL02 - Beveren,"[4.234832753144059, 51.30452079034428, NaN]",Point
2,Feature,1032,40AL03 - Beveren,"[4.201460395126572, 51.25396488066997, NaN]",Point
3,Feature,1033,40AL04 - Beveren,"[4.293329476940385, 51.2906675267468, NaN]",Point
4,Feature,1034,40AL05 - Beveren,"[4.278889821667828, 51.2631177168737, NaN]",Point
...,...,...,...,...,...
118,Feature,1241,47E716 - Mariakerke,"[3.6828130172031206, 51.06698790152168, NaN]",Point
119,Feature,1716,47E814 - Ham,"[5.129718074812025, 51.08218722351412, NaN]",Point
120,Feature,1752,48R515 - Aeroport 1,"[4.425464746587246, 50.45160018054414, NaN]",Point
121,Feature,1753,48R516 - Aeroport 2,"[4.484957959246502, 50.46913160387411, NaN]",Point


In [5]:
# get timeseries ids for parameters of interest from station gent
station_gent = data_from_api(api_stations_gent, norm=False)
timeseries_gent = pd.DataFrame(station_gent.loc["timeseries", "properties"]).transpose().reset_index(names="timeseries_id")


{'properties': {'timeseries': {'7087': {'service': {'id': '1', 'label': 'IRCEL - CELINE: timeseries-api (SOS 2.0)'}, 'offering': {'id': '7087', 'label': '7087 - FIDAS 200 - procedure'}, 'feature': {'id': '1207', 'label': '44R701 - Gent'}, 'procedure': {'id': '7087', 'label': '7087 - FIDAS 200 - procedure'}, 'phenomenon': {'id': '6001', 'label': 'Particulate Matter < 2.5 µm'}, 'category': {'id': '6001', 'label': 'Particulate Matter < 2.5 µm'}}, '99906': {'service': {'id': '1', 'label': 'IRCEL - CELINE: timeseries-api (SOS 2.0)'}, 'offering': {'id': '99906', 'label': '99906 - Unknown device - procedure'}, 'feature': {'id': '1207', 'label': '44R701 - Gent'}, 'procedure': {'id': '99906', 'label': '99906 - Unknown device - procedure'}, 'phenomenon': {'id': '61102', 'label': 'Wind Direction'}, 'category': {'id': '61102', 'label': 'Wind Direction'}}, '7078': {'service': {'id': '1', 'label': 'IRCEL - CELINE: timeseries-api (SOS 2.0)'}, 'offering': {'id': '7078', 'label': '7078 - Env. O341M tot

In [6]:
def get_tsinfo_per_station(station_id):
    response_status = requests.get(f"https://geo.irceline.be/sos/api/v1/stations/{str(station_id)}?expanded=true")
    response = response_status.json()
    df = pd.DataFrame(response)
    df = pd.DataFrame(df.loc["timeseries", "properties"]).transpose().reset_index(names="timeseries_id")
    return df

# get timeseries ids for per station
for station_id in stations_info["properties.id"]:
   which_ts_per_station = get_tsinfo_per_station(station_id)


In [7]:
which_ts_per_station

Unnamed: 0,timeseries_id,service,offering,feature,procedure,phenomenon,category
0,10756,"{'id': '1', 'label': 'IRCEL - CELINE: timeseries-api (SOS 2.0)'}","{'id': '10756', 'label': '10756 - Temperature 80m - procedure'}","{'id': '1725', 'label': 'T2H801 - Zwijndrecht'}","{'id': '10756', 'label': '10756 - Temperature 80m - procedure'}","{'id': '62101', 'label': 'Temperature'}","{'id': '62101', 'label': 'Temperature'}"
1,10755,"{'id': '1', 'label': 'IRCEL - CELINE: timeseries-api (SOS 2.0)'}","{'id': '10755', 'label': '10755 - Temperature 48m - procedure'}","{'id': '1725', 'label': 'T2H801 - Zwijndrecht'}","{'id': '10755', 'label': '10755 - Temperature 48m - procedure'}","{'id': '62101', 'label': 'Temperature'}","{'id': '62101', 'label': 'Temperature'}"
2,10758,"{'id': '1', 'label': 'IRCEL - CELINE: timeseries-api (SOS 2.0)'}","{'id': '10758', 'label': '10758 - Temperature 153m - procedure'}","{'id': '1725', 'label': 'T2H801 - Zwijndrecht'}","{'id': '10758', 'label': '10758 - Temperature 153m - procedure'}","{'id': '62101', 'label': 'Temperature'}","{'id': '62101', 'label': 'Temperature'}"
3,10757,"{'id': '1', 'label': 'IRCEL - CELINE: timeseries-api (SOS 2.0)'}","{'id': '10757', 'label': '10757 - Temperature 114m - procedure'}","{'id': '1725', 'label': 'T2H801 - Zwijndrecht'}","{'id': '10757', 'label': '10757 - Temperature 114m - procedure'}","{'id': '62101', 'label': 'Temperature'}","{'id': '62101', 'label': 'Temperature'}"
4,10753,"{'id': '1', 'label': 'IRCEL - CELINE: timeseries-api (SOS 2.0)'}","{'id': '10753', 'label': '10753 - Temperature 8m - procedure'}","{'id': '1725', 'label': 'T2H801 - Zwijndrecht'}","{'id': '10753', 'label': '10753 - Temperature 8m - procedure'}","{'id': '62101', 'label': 'Temperature'}","{'id': '62101', 'label': 'Temperature'}"


In [8]:
# get timeseries metadata for those timeseries available in station of interest
def get_timeseries_meta(ts_id):
    response_status = requests.get(f"https://geo.irceline.be/sos/api/v1/timeseries/{ts_id}")
    response = response_status.json()
    try:
        os.makedirs("../local_data/metadata")
    except Exception:
        pass
    fn = f"../local_data/metadata/{ts_id}.json"
    with open(fn, 'w') as fp:
        json.dump(response, fp)
    # timeseries_meta = pd.json_normalize(response)  # 1 rij voor 1 timeseries, maar sommige timeseries hebben 26 kolommen sommige 27

In [9]:
for ts_id in timeseries_gent.timeseries_id:
    get_timeseries_meta(ts_id)

In [None]:
# "https://geo.irceline.be/sos/api/v1/timeseries/7087"
# https://geo.irceline.be/sos/api/v1/timeseries/7087/getData?timespan=PT24H/2023-11-27
# get timeseries datapoints

def get_timeseries_datapoints(ts_id, date="2023-08-01"):
    date = date
    datefn = date.replace("-", "")
    timespan = f"?timespan=PT24H/{date}"
    #timespan = ""
    response_status = requests.get(f"https://geo.irceline.be/sos/api/v1/timeseries/{ts_id}/getData{timespan}")
    response = response_status.json()
    df = pd.DataFrame(response)
    df = pd.DataFrame(df['values'].values.tolist())
    try:
        os.makedirs(f"../local_data/timeseries_data/{datefn}/Gent")
    except Exception:
        pass
    df.to_csv(f"../local_data/timeseries_data/{datefn}/Gent/{ts_id}_data.txt", sep="\t", index=False)

In [None]:
dates = ["2023-08-01", "2023-08-02", "2023-08-03", "2023-08-04"]

for date in dates:
    for ts_id in timeseries_gent.timeseries_id:
        get_timeseries_datapoints(ts_id, date)

# EXtra: more days for 1 station

In [None]:
def get_timeseries_datapts_1M(ts_id, date="2023-08-01"):
    date = date
    datefn = date.replace("-", "")
    timespan = f"?timespan=P31DT24H/{date}"
    #timespan = ""
    response_status = requests.get(f"https://geo.irceline.be/sos/api/v1/timeseries/{ts_id}/getData{timespan}")
    response = response_status.json()
    df = pd.DataFrame(response)
    df = pd.DataFrame(df['values'].values.tolist())
    try:
        os.makedirs(f"../local_data_Gent_1M/")
    except Exception:
        pass
    df.to_csv(f"../local_data_Gent_1M/{ts_id}_data.txt", sep="\t", index=False)


for ts_id in timeseries_gent.timeseries_id:
        get_timeseries_datapts_1M(ts_id)
