In [1]:
from pathlib import Path
import getpass

import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import geopandas as gpd
import rasterio
import rasterio.plot
from rasterio import features
import datetime as dt
import time
import os

from sentinelhub import (
    SHConfig,
    CRS,
    BBox,
    DataCollection,
    DownloadRequest,
    MimeType,
    MosaickingOrder,
    SentinelHubDownloadClient,
    SentinelHubStatisticalDownloadClient,
    SentinelHubRequest,
    bbox_to_dimensions,
    SentinelHubStatistical,
    Geometry,
    parse_time,
)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Commented out to avoid overwriting existing config
# Comment in to create new config:

# config = SHConfig()
# config.sh_client_id = getpass.getpass("Enter your SentinelHub client id")
# config.sh_client_secret = getpass.getpass("Enter your SentinelHub client secret")
# config.sh_token_url = "https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token"
# config.sh_base_url = "https://sh.dataspace.copernicus.eu"
# config.save("cdse")
config = SHConfig("cdse")

In [3]:
# We also need to define the evalscript as a Python variable
evalscript_raw = """
//VERSION=3
function setup() {
   return {
    input: ["NO2", "dataMask"],
    output: 
      {
        id: "default",
        bands: 1,
        sampleType: "FLOAT32"
      },
    mosaicking: "SIMPLE"
  };
}
function evaluatePixel(sample) {
  if (sample.dataMask == 1)  {
    return [sample.NO2];
  } else {
    return [NaN];
  }
}
"""

In [4]:
# define area of interest
cologne_coords_wgs84 = [6.8, 50.8, 7.2, 51.1]
ruhrgebiet_coords_wgs84 = [6.380946, 51.315164, 7.93203, 51.738085]
germany_coords_wgs84 = [5.866315, 47.270111, 15.041896, 55.099161]

aoi_bbox = BBox(bbox=ruhrgebiet_coords_wgs84, crs=CRS.WGS84).transform(CRS(3857))

In [5]:
# define time interval
time_range = ("2019-01-01", "2021-12-31")

start, end = pd.to_datetime(time_range[0]), pd.to_datetime(time_range[1])

daily_intervals = [
    (
        day.strftime("%Y-%m-%dT00:00:00Z"),
        day.strftime("%Y-%m-%dT23:59:59Z")
    )
    for day in pd.date_range(start, end, freq="D")
]

In [6]:
def download_tropomi_data(
    evalscript: str,
    aoi_bbox,
    time_range,
    freq="D",
    resolution=(1000, 1000),
    save_data=True,
    save_data_folder="./data",
):

    start, end = pd.to_datetime(time_range[0]), pd.to_datetime(time_range[1])

    daily_intervals = [
        (
            day.strftime("%Y-%m-%dT00:00:00Z"),
            day.strftime("%Y-%m-%dT23:59:59Z")
        )
        for day in pd.date_range(start, end, freq=freq)
    ]

    raw_data_list = []
    records = []  # rows for the dataframe

    data_5p = DataCollection.SENTINEL5P.define_from("5p", service_url=config.sh_base_url)

    for i, (t_from, t_to) in enumerate(daily_intervals):

        print(f"Downloading: {t_from} -> {t_to}")

        # Build request
        request_raw = SentinelHubRequest(
            evalscript=evalscript,
            input_data=[
                SentinelHubRequest.input_data(
                    data_collection=data_5p,
                    time_interval=(t_from, t_to)
                )
            ],
            responses=[SentinelHubRequest.output_response("default", MimeType.TIFF)],
            bbox=aoi_bbox,
            resolution=resolution,
            config=config,
            data_folder=os.path.join(save_data_folder, f"{t_from[:10]}"),
        )

        # Execute request with timing + safety
        t0 = time.time()
        try:
            raw = request_raw.get_data(save_data=save_data, redownload=True)
            arr = raw[0]
            success = True
        except Exception as e:
            print("Request failed:", e)
            arr = None
            success = False
        load_time = time.time() - t0

        # Record download result
        if success and arr is not None:
            total_px = arr.size
            valid_px = np.count_nonzero(np.isfinite(arr))
            frac_valid = valid_px / total_px if total_px > 0 else 0.0
            mean_val = np.nanmean(arr) if valid_px > 0 else np.nan
        else:
            total_px = valid_px = 0
            frac_valid = 0.0
            mean_val = np.nan

        # Save raw array
        raw_data_list.append(arr)

        # Append one row to the report
        records.append({
            "date": t_from[:10],
            "success": success,
            "load_time_s": load_time,
            "total_pixels": total_px,
            "valid_pixels": valid_px,
            "fraction_valid": frac_valid,
            "mean_NO2": mean_val,
        })

        print(f"  success={success}, fraction_valid={frac_valid}, mean={mean_val}\n")

    # Build the DataFrame
    df_report = pd.DataFrame(records)

    return raw_data_list, df_report




In [7]:
raw_data_list, df_report = download_tropomi_data(
    evalscript=evalscript_raw,
    aoi_bbox=aoi_bbox,
    time_range=time_range,
    resolution=(5000, 3500),
    save_data_folder="./data/ruhrgebiet",
)

Downloading: 2019-01-01T00:00:00Z -> 2019-01-01T23:59:59Z
  success=True, fraction_valid=0.04415584415584416, mean=4.247078686603345e-05

Downloading: 2019-01-02T00:00:00Z -> 2019-01-02T23:59:59Z
  success=True, fraction_valid=0.6402597402597403, mean=2.755404966592323e-05

Downloading: 2019-01-03T00:00:00Z -> 2019-01-03T23:59:59Z
  success=True, fraction_valid=0.01948051948051948, mean=1.972021891560871e-05

Downloading: 2019-01-04T00:00:00Z -> 2019-01-04T23:59:59Z
  success=True, fraction_valid=0.0, mean=nan

Downloading: 2019-01-05T00:00:00Z -> 2019-01-05T23:59:59Z
  success=True, fraction_valid=0.0, mean=nan

Downloading: 2019-01-06T00:00:00Z -> 2019-01-06T23:59:59Z
  success=True, fraction_valid=0.03766233766233766, mean=-6.968668913032161e-06

Downloading: 2019-01-07T00:00:00Z -> 2019-01-07T23:59:59Z
  success=True, fraction_valid=0.0, mean=nan

Downloading: 2019-01-08T00:00:00Z -> 2019-01-08T23:59:59Z
  success=True, fraction_valid=0.274025974025974, mean=-1.568251354910899e-05


In [8]:
df_report.to_csv("./data/ruhrgebiet/tropomi_no2_report.csv", index=False)

In [9]:
df_report.head()

Unnamed: 0,date,success,load_time_s,total_pixels,valid_pixels,fraction_valid,mean_NO2
0,2019-01-01,True,1.604921,770,34,0.044156,4.2e-05
1,2019-01-02,True,1.188231,770,493,0.64026,2.8e-05
2,2019-01-03,True,1.1647,770,15,0.019481,2e-05
3,2019-01-04,True,1.127776,770,0,0.0,
4,2019-01-05,True,1.229359,770,0,0.0,
