In [None]:
# This code downloads precipitation or river discharge data from SMHI's OpenData API 
# Author: Marlon Passos
# Adapted from Kristoffer Bäckman https://github.com/thebackman/SMHI

In [None]:
# Hydrological Observations (for river discharge)
ADR_VERSION   = "http://opendata-download-hydroobs.smhi.se/api/version/1.0.json"
ADR_PARAMETER = "http://opendata-download-hydroobs.smhi.se/api/version/1.0/parameter/{parameter}.json" 
ADR_STATION = "http://opendata-download-hydroobs.smhi.se/api/version/1.0/parameter/{parameter}/station/{station}.json"
ADR_LATEST_MONTHS = "http://opendata-download-hydroobs.smhi.se/api/version/1.0/parameter/{parameter}/station/{station}/period/latest-months/data.json"
ADR_CORRECTED = "http://opendata-download-hydroobs.smhi.se/api/version/1.0/parameter/{parameter}/station/{station}/period/corrected-archive/data.csv"

In [None]:
# Meteorological Observations (for precipitation)
ADR_VERSION   = "http://opendata-download-metobs.smhi.se/api/version/1.0.json"
ADR_PARAMETER = "http://opendata-download-metobs.smhi.se/api/version/1.0/parameter/{parameter}.json" 
ADR_STATION = "http://opendata-download-metobs.smhi.se/api/version/1.0/parameter/{parameter}/station/{station}.json"
ADR_LATEST_MONTHS = "http://opendata-download-metobs.smhi.se/api/version/1.0/parameter/{parameter}/station/{station}/period/latest-months/data.json"
ADR_LATEST_DAY = "http://opendata-download-metobs.smhi.se/api/version/1.0/parameter/{parameter}/station/{station}/period/latest-day/data.json"
ADR_CORRECTED = "http://opendata-download-metobs.smhi.se/api/version/1.0/parameter/{parameter}/station/{station}/period/corrected-archive/data.csv"

In [None]:
import datetime
import sys
import logging
import json
import requests

# -- logging

FORMAT = '%(asctime)s %(levelname)s: %(module)s: %(funcName)s(): %(message)s'
logging.basicConfig(level=logging.DEBUG, format = FORMAT, filename = "smhi.log", filemode = "w")
logging.getLogger("requests").setLevel(logging.WARNING)
logging.getLogger("urllib3").setLevel(logging.WARNING)

# -- functions

def write_json(json_obj, file_name = 'file.json'):
    """ write a json file to wd/file.json"""
    with open(file_name, 'w') as outfile:
        json.dump(json_obj, outfile)

def api_return_data(adr):
    """ initate API call and return the JSON data """
    # initiate the call
    req_obj = requests.get(adr)
    # try to get the json data (exceptions will be catched later)
    json_data = req_obj.json()
    return json_data

In [None]:
import datetime
import sys
import logging
import json
import requests

# -- logging

FORMAT = '%(asctime)s %(levelname)s: %(module)s: %(funcName)s(): %(message)s'
logging.basicConfig(level=logging.DEBUG, format = FORMAT, filename = "smhi.log", filemode = "w")
logging.getLogger("requests").setLevel(logging.WARNING)
logging.getLogger("urllib3").setLevel(logging.WARNING)

# -- functions

def write_json(json_obj, file_name = 'file.json'):
    """ write a json file to wd/file.json"""
    with open(file_name, 'w') as outfile:
        json.dump(json_obj, outfile)

def api_return_data(adr):
    """ initate API call and return the JSON data """
    # initiate the call
    req_obj = requests.get(adr)
    # try to get the json data (exceptions will be catched later)
    json_data = req_obj.json()
    return json_data
    

In [None]:
#import api_endpoints
#import helpers
import requests
import pandas as pd
import json
import logging

# -- functions

def list_params():
    """ Lists avaliable parameters """

    # -- API call

    data_json = api_return_data(ADR_VERSION)

    # -- print collected data

    # subset and loop over all avaliable parameters
    resource = data_json["resource"]
    # loop over the json entries and print each parameter that is avaliable

    # store all param keys in a dict for later use (maybe)
    params = []
    for param in resource:
        print(param["title"] + " | " +  param["summary"] + " | " + param["key"] )
        params.append(param["key"])
    # return params


def list_stations(param):
    """ list stations that have a certain wheather parameter """

    # -- API call
    
    # create the API adress
    adr = ADR_PARAMETER
    adr_full = adr.format(parameter = param)

    # send request and get data
    data1 = api_return_data(adr_full)
    print("Parameter choosen: " + data1["title"])

    # -- gather and wrangle the data about avaliable stations
    
    # take out an array with all the stations
    stations = data1["station"]
    
    # convert the JSON data to a pandas DF
    df_raw = pd.DataFrame(stations)
    
    # limit the data frame
    df_clean = df_raw[["name", "id", "latitude", "longitude", "active", "from", "to", "updated", "title", "key"]]
    
    # rename columns to abide to python reserved keywords
    df_clean = df_clean.rename(columns={"from": "starting", "to": "ending"})
    
    # fix the date and time variables into something readable
    tmp1 = pd.to_datetime(df_clean["starting"], unit = "ms")
    tmp2 = pd.to_datetime(df_clean["ending"], unit = "ms")
    tmp3 = pd.to_datetime(df_clean["updated"], unit = "ms")
    df_clean = df_clean.assign(starting = tmp1)
    df_clean = df_clean.assign(ending = tmp2)
    df_clean = df_clean.assign(updated = tmp3)
    
    return(df_clean)


def get_latest_months(param, station):
    """
    get data for latest months via JSON download """

    # -- API call
    
    # create the API adress
    adr = ADR_LATEST_MONTHS
    adr_full = adr.format(parameter = param, station = station)
    
    # send request and get data
    data1 = api_return_data(adr_full)
    
    # create a data frame from the JSON data
    df = pd.DataFrame(data1["value"])

    # fix the timestamps
    df = df.rename(columns={"from": "starting", "to": "ending"})
    tmp1 = pd.to_datetime(df["starting"], unit = "ms")
    tmp2 = pd.to_datetime(df["ending"], unit = "ms")
    df = df.assign(starting = tmp1)
    df = df.assign(ending = tmp2)

    # convert value to float64
    df["value"] = df.value.astype(float)

    # add the station id
    df["station_id"] = station
    
    return df


def get_corrected(param, station):
    """ get corrected archive via CSV download """
    # -- API call

    # create the API adress
    adr = ADR_CORRECTED
    adr_full = adr.format(parameter = param, station = station)

    # download the csv data
    df = pd.read_csv(filepath_or_buffer= adr_full, skiprows= 7, delimiter=";")

    # remove columns not needed and reorder to match latest months data
    df_lim = df.iloc[:,[0, 1, 2, 5]]

    # rename the columns
    df_lim.columns = ["date", "value", "quality", "ref"]

    # fix datetime columns
    df_lim["date"] = pd.to_datetime(df_lim["date"])

    # add the station id
    df_lim["station_id"] = station

    return df_lim


def get_stations(param, station_keys):
    """
    gets both latest months and corrected archive for
    a set of stations. Contains the try catch logic needed
    if any of the calls fail
    """

    # -- create the iterable

    if isinstance(station_keys, tuple):
        iterable = station_keys
    elif isinstance(station_keys, pd.DataFrame):
        iterable = station_keys["key"]
    
    # -- Construct some holder structures for data frames

    df_new = dict()
    df_old = dict()
    
    # -- loop through set of stations
    
    # start loop over each station id and collect the data if avaliable
    print(">>> Start downloading each station")
    for station_id in iterable:
        print(f">>> Downloading {station_id}")
        #logging.info(f"# -- Downloading station {station_id}")
        # get the latest months
       # logging.info(f"Downloading latest months for {station_id}")
       # try:
            #df_new[station_id] = get_latest_months(param = param, station = station_id )
           # logging.debug(f"downloading latest months for {station_id} successful")
       # except: #json.decoder.JSONDecodeError:
            #logging.error(f"not possible to download latest months for {station_id}")
        # get the corrected archive
        logging.info(f"downloading corrected archive for {station_id}")
        try:
            df_old[station_id] = get_corrected(param = param, station = station_id)
            logging.debug(f"downloaded corrected archive for {station_id} successful")
        except Exception as error:
            logging.error(f"not possible to download corrected archive for {station_id}")
    
    # -- gather the data

    # get the number of data frames in each dict
    len_new = len(df_new)
    len_old = len(df_old)

    # Stack the latest months into one data frame for each station
    if len_new > 0:
        df_latest = pd.concat(df_new.values(), ignore_index=True)
    else:
        df_latest = None

    # Stack the corrected archive into one data frame for each station
    if len_old > 0:
        df_corrected = pd.concat(df_old.values(), ignore_index=True)
    else:
        df_corrected = None

    # return all data
    print("Check smhi.log for data download details!")
    if df_latest is not None and df_corrected is not None :
        logging.debug("both df_latest and df_corrected contains data")
        dictus = {"df_latest": df_latest, "df_corrected": df_corrected}
    elif df_latest is not None:
        logging.info("only df_latest contain data")
        dictus = {"df_latest": df_latest}
    elif df_corrected is not None:
        logging.info("only df_corrected contain data")
        dictus = {"df_corrected": df_corrected}
    else:
        logging.info("no data frame contains data")
        dictus = None
    
    # -- shutdown logging

    logging.shutdown()
    
    return(dictus)

In [None]:

# -- import the libs and some reload 

import pandas as pd
#import smhi as smhi
#import importlib
#importlib.reload(smhi)

# -- listing the parameters that are avaliable

params=list_params()

# -- for one parameter, see what stations have it and in what timeframe, lon lat area etc.

# all stations 
df_stations = list_stations(param = 1)
station = (2506, 2372)

# download for station(s)
dict_df = get_stations(param = 1, station_keys = station)

# access the data
#dict_df["df_latest"]
dict_df["df_corrected"]

In [None]:
df_stations.sort_values('starting')

In [None]:
import numpy as np
df_stations['duration']=df_stations['ending']-df_stations['starting']
df_stations['duration']=df_stations['duration']/ np.timedelta64(1, 'Y') # Convert from days to years
df_stations.sort_values(by='duration',ascending=False).head(30)

In [None]:
#df_stations.to_excel('discharge_stations.xlsx')

In [None]:
import pandas as pd
df_stations= pd.read_excel('discharge_stations.xlsx')

In [None]:
df_stations= pd.read_excel('rain_stations.xlsx')

In [None]:
df_stations

In [None]:
import pandas as pd

# Convert strings to datetime
df_stations['starting'] = pd.to_datetime(df_stations['starting'], errors='coerce')

# Filter your DataFrame using datetime comparison
start_date_str = '1922-01-01 00:00:00'
start_date = pd.to_datetime(start_date_str)

filtered_df = df_stations[df_stations['starting'] > start_date]

# Now you can use the filtered_df for plotting


In [None]:
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import cartopy.io.img_tiles as cimgt
from cartopy.io.shapereader import Reader
import numpy as np
import matplotlib
import matplotlib.colors as mcolors

# Sort your DataFrame
df_stations = df_stations.sort_values(by='duration')

plt.figure(figsize=(8, 5))

# Creates the map
sw_map = plt.axes(projection=ccrs.PlateCarree())

# To add county lines
mypath = r'C:\Users\MarlonVieiraPassos\OneDrive - SEI\Documents\HydroHazards\Codes\OSM\halmstad\data\shapefiles\\'
data = Reader(mypath + 'se_10km.shp')
sw_border = list(data.geometries())
sw_border2 = cfeature.ShapelyFeature(sw_border, ccrs.PlateCarree())
sw_map.add_feature(sw_border2, facecolor='none', edgecolor='gray')

request = cimgt.MapboxTiles(map_id='light-v10',access_token="pk.eyJ1IjoibXZwYXNzb3MiLCJhIjoiY2t6em4xcnpwMGJ2bDNicDYyemM0bWU4YyJ9.fR55BgEafulrz4yLfY2oRA")
sw_map.add_image(request, 5, cmap='gray', interpolation='spline36', regrid_shape=2000)

sw_map.xaxis.set_visible(True)
sw_map.yaxis.set_visible(True)

# Plots the data onto map
cmap = matplotlib.cm.coolwarm
norm = matplotlib.colors.Normalize(vmin=0, vmax=100)

plt.scatter(df_stations['longitude'], df_stations['latitude'], alpha=0.8,
            s=df_stations['duration'] / 2,
            c=df_stations['duration'],
            edgecolors='black',
            cmap=cmap,
            norm=norm,  # Apply the normalization to the scatter plot
            transform=ccrs.PlateCarree())

# Colorbar
cbar = plt.colorbar(extend='max')
cbar.set_label('Data Availability (years)', fontsize=16)
cbar.ax.tick_params(labelsize=14)
# Plot labels
plt.ylabel("Latitude", fontsize=14)
plt.xlabel("Longitude", fontsize=14)
plt.title('Precipitation Observation Stations (1922-2021)', fontsize=16)

plt.savefig('precipitation_stations_smhi2.png', format="png", dpi=120)
plt.show()


In [None]:
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import cartopy.io.img_tiles as cimgt
from cartopy.io.shapereader import Reader
import numpy as np
import matplotlib
import matplotlib.colors as mcolors

# Sort your DataFrame
df_stations = df_stations.sort_values(by='duration')

plt.figure(figsize=(8, 5))

# Creates the map
sw_map = plt.axes(projection=ccrs.PlateCarree())

# To add county lines
mypath = r'C:\Users\MarlonVieiraPassos\OneDrive - SEI\Documents\HydroHazards\Codes\OSM\halmstad\data\shapefiles\\'
data = Reader(mypath + 'se_10km.shp')
sw_border = list(data.geometries())
sw_border2 = cfeature.ShapelyFeature(sw_border, ccrs.PlateCarree())
sw_map.add_feature(sw_border2, facecolor='none', edgecolor='gray')

request = cimgt.MapboxTiles(map_id='light-v10',access_token="pk.eyJ1IjoibXZwYXNzb3MiLCJhIjoiY2t6em4xcnpwMGJ2bDNicDYyemM0bWU4YyJ9.fR55BgEafulrz4yLfY2oRA")
sw_map.add_image(request, 5, cmap='gray', interpolation='spline36', regrid_shape=2000)

sw_map.xaxis.set_visible(True)
sw_map.yaxis.set_visible(True)

# Plots the data onto map
cmap = matplotlib.cm.coolwarm
norm = matplotlib.colors.Normalize(vmin=0, vmax=100)

plt.scatter(df_stations['longitude'], df_stations['latitude'], alpha=0.8,
            s=df_stations['duration'] / 2,
            c=df_stations['duration'],
            edgecolors='black',
            cmap=cmap,
            norm=norm,  # Apply the normalization to the scatter plot
            transform=ccrs.PlateCarree())

# Colorbar
cbar = plt.colorbar(extend='max')
cbar.set_label('Data Availability (years)', fontsize=16)
cbar.ax.tick_params(labelsize=14)
# Plot labels
plt.ylabel("Latitude", fontsize=14)
plt.xlabel("Longitude", fontsize=14)
plt.title('Modelled Discharge Locations (1922-2021)', fontsize=16)

plt.savefig('discharge_stations_smhi2.png', format="png", dpi=120)
plt.show()


In [None]:
# Download data for all stations (monthly/daily rainfall)
pnumber = 1 # 23: monthly rainfall, 5: daily rainfall
df_all=pd.DataFrame()
id_list=df_stations['id'].unique()
for station_id in id_list:
    try:
        sid = (station_id,)
        dict_df = get_stations(param = pnumber, station_keys = sid) 
        df_all=df_all.append(dict_df["df_corrected"])
    except:
        pass