## DOWNLOADING SCRIPT

This notebook is meant for use on the CDSE platform.

This notebook requires no input data.

The output of the notebook will be GeoJSON files with ground-level data on ambient particulate matter in the atmosphere.

See: CAMS Air Qu

_CAUTION:_ CAMS downloads have a tendency to crash, in which case the user must babysit the downloading process a bit.

*TODO:* rework the downloading process.

In [44]:
#!pip install xarray  numpy  cdsapi  
# Python Standard Libraries
import os
import datetime as dt
import zipfile

# Data Manipulation Libraries
import numpy as np
import xarray as xr

# libraries
import geopandas
from shapely.geometry import Point

# Climate Data Store API for retrieving climate data
import cdsapi

In [49]:
# key to atmosphere monitoring platform
URL =  'https://ads.atmosphere.copernicus.eu/api'
KEY = # get from platform

In [47]:
# define working directory
%cd /home/jovyan/mystorage/testdir

/home/jovyan/mystorage/testdir


In [90]:
# decide later if these variables are needed
variables = ["co", "ectot", "no", "no2", "o3", "pm10", "sia"]
c = cdsapi.Client(url=URL, key=KEY)

# swe fi bbox
# redefine bounding box if desired
xmin = 11.00
ymin = 55.30
xmax = 32.00
ymax = 70.17

# define any desired years
#years = ["2019", "2020", "2021", "2022", "2023"]
years = ["2022", "2023"]
# define any desired months
months = ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12"]

# API request automatically generated by the CAMS service
dataset = "cams-europe-air-quality-reanalyses"

for yyyy in years:
    for mm in months:
        if ((yyyy == "2022") & ((mm == "02"))): # for some reason request fails for this month???
            continue
        if ((yyyy != "2022") & ((mm != "01") )):
            request = {
                "variable": [ # TODO: remove unnecessary variables
                    "carbon_monoxide",
                    "nitrogen_dioxide",
                    "nitrogen_monoxide",
                    "ozone",
                    "secondary_inorganic_aerosol",
                    "total_elementary_carbon",
                    "particulate_matter_10um",
                    "sulphur_dioxide"
                ],
                "model": ["lotos"], # EUROS-LOTOS model reanalysis by ECMWF
                "level": ["0"], # ground-level
                "type": ["interim_reanalysis"],
                "year": [yyyy],
                "month": [
                    mm
                ],
                 "data_format": "nc5", # faster download
                "download_format": "archived" # faster download
            }
            
            c.retrieve(dataset, request, target="download.nc")
            with zipfile.ZipFile("download.nc", "r") as zip_ref:
                zip_ref.extractall("download"+yyyy)

            # extract SO2 first
            fname = "download"+yyyy+"/cams.eaq.ira.KNMa.so2.l0."+yyyy+"-"+mm+".nc"
            xr_so2 = xr.open_dataset(fname, engine="netcdf4")
            # slice sweden-finland
            data_swefi = xr_so2.sel(lon=slice(xmin, xmax), lat=slice(ymin, ymax))
        
    
        for var in variables:
    
            fname = "download"+yyyy+"/cams.eaq.ira.KNMa."+var+".l0."+yyyy+"-"+mm+".nc"
            xrds = xr.open_dataset(fname, engine="netcdf4")
            data_swefi[var] = xrds.sel(lon=slice(xmin, xmax), lat=slice(ymin, ymax))[var]
    
        agg_data = data_swefi.mean(dim="time")
        #agg_data.to_netcdf("./data/demo2-y"+yyyy+"m"+mm+".nc")

        # collection cell: I am too lazy to clean this notebook
        
        # dataframe-ify and turn into geojson
        agg_data_df = agg_data.to_dataframe()
        agg_data_df = agg_data_df.reset_index()
        geom = [Point(x,y) for x, y in zip(agg_data_df['lon'], agg_data_df['lat'])]
        geodf_agg = geopandas.GeoDataFrame(agg_data_df, geometry=geom)
        geodf_agg.head()
        geodf_agg.to_file("./data/"+yyyy+"-"+mm+"demo.geojson", driver="GeoJSON")
        

FileNotFoundError: [Errno 2] No such file or directory: '/home/jovyan/mystorage/testdir/download2022/cams.eaq.ira.KNMa.co.l0.2022-04.nc'

### DEPRECATED CODE CELLS BELOW

## DO NOT RUN, ONLY READ OR USE AS MODEL

In [52]:
with zipfile.ZipFile("download", "r") as zip_ref:
        zip_ref.extractall("download"+yyyy)

In [94]:
# deprecated
"""
request = {
    "variable": [
        "carbon_monoxide",
        "nitrogen_dioxide",
        "nitrogen_monoxide",
        "ozone",
        "secondary_inorganic_aerosol",
        "total_elementary_carbon",
        "particulate_matter_10um",
        "sulphur_dioxide"
    ],
    "model": ["lotos"],
    "level": ["0"],
    "type": ["interim_reanalysis"],
    "year": ["2022"],
    "month": [
        "02"
    ],
     "data_format": "nc5",
    "download_format": "archived"
}
"""
#c.retrieve(dataset, request, target="download.nc")

2025-03-09 19:26:32,051 INFO Request ID is bf7c070b-589c-47b0-9957-c11dd52b7b8b
2025-03-09 19:26:32,234 INFO status has been updated to accepted
2025-03-09 19:26:44,724 INFO status has been updated to running
2025-03-09 19:30:49,776 INFO status has been updated to successful


c67140dbece64e2df15f05486e4d6511.zip:   0%|          | 0.00/5.89G [00:00<?, ?B/s]

'download.nc'

In [57]:
# downloading keeps crashing
# try processing manually
variables

['co', 'dust', 'ectot', 'no', 'no2', 'o3', 'pm10', 'sia']

In [67]:
fname = "download"+yyyy+"/cams.eaq.ira.KNMa.so2.l0.2022-03.nc"
xr_so2 = xr.open_dataset(fname, engine="netcdf4")

In [68]:
data_swefi = xr_so2.sel(lon=slice(xmin, xmax), lat=slice(ymin, ymax))

In [69]:
variables = ["co", "ectot", "no", "no2", "o3", "pm10", "sia"]

#yyyy = "2023"
yyyy = "2022"

for var in variables:

    fname = "download2022/cams.eaq.ira.KNMa."+var+".l0.2022-03.nc"
    xrds = xr.open_dataset(fname, engine="netcdf4")
    data_swefi[var] = xrds.sel(lon=slice(xmin, xmax), lat=slice(ymin, ymax))[var]

agg_data = data_swefi.mean(dim="time")
agg_data.to_netcdf("./data/demo2-y"+yyyy+".nc")

In [70]:
agg_data

In [6]:
import zipfile

In [7]:
with zipfile.ZipFile("e19b02f99a17469232c0db6c09b6dd96.zip","r") as zip_ref:
    zip_ref.extractall("data/download")

In [4]:
# import the SO2
nc_so2 = "data/download/cams.eaq.ira.KNMa.so2.l0.2023-07.nc"

xr_so2 = xr.open_dataset(nc_so2, engine="netcdf4")

In [5]:
xr_so2

In [6]:
#bbox_swefin = BBox([11.020, 55.30, 32.00, 70.17], crs=CRS.WGS84).transform(CRS(3857))

In [7]:
xmin = 11.00
ymin = 55.30
xmax = 32.00
ymax = 70.17

In [8]:
xr_so2_swefi = xr_so2.sel(lon=slice(xmin, xmax), lat=slice(ymin, ymax))

In [9]:
xr_so2_swefi

In [10]:
xr_so2_swefi.so2

### Downloading values experiment

In [11]:
xr_so2_swefi

In [12]:
data_swefi = xr_so2_swefi

In [16]:
variables = ["co", "dust", "ectot", "no", "no2", "o3", "pm10", "sia"]

for var in variables:

    fname = "data/download/cams.eaq.ira.KNMa."+var+".l0.2023-07.nc"
    xrds = xr.open_dataset(fname, engine="netcdf4")
    data_swefi[var] = xrds[var]

In [19]:
t0 = "2023-07-01T12:00:00"
t1 = "2023-07-31T12:00:00"

In [22]:
data_swefi.sel(time=[t0, t1])

In [24]:
data_swefi.sel(time=[t0, t1]).to_netcdf("./data/demo1.nc")

### GeoPandas experiments

In [71]:
# collection cell: I am too lazy to clean this notebook
# libraries
import geopandas
from shapely.geometry import Point

# dataframe-ify the 
agg_data_df = agg_data.to_dataframe()
agg_data_df = agg_data_df.reset_index()
geom = [Point(x,y) for x, y in zip(agg_data_df['lon'], agg_data_df['lat'])]
geodf_agg = geopandas.GeoDataFrame(agg_data_df, geometry=geom)
geodf_agg.head()
geodf_agg.to_file("./data/0322demo.geojson", driver="GeoJSON")

In [72]:
geodf_agg

Unnamed: 0,lon,lat,so2,co,ectot,no,no2,o3,pm10,sia,geometry
0,11.05,55.35,0.686881,161.124741,0.453183,1.159885,7.804810,75.554634,17.661661,9.834899,POINT (11.05000 55.35000)
1,11.05,55.45,0.611537,158.876892,0.405424,0.589704,5.123747,78.184448,17.464296,9.846318,POINT (11.05000 55.45000)
2,11.05,55.55,0.603566,158.734665,0.397017,0.421696,4.188167,78.497643,17.309443,9.755754,POINT (11.05000 55.55000)
3,11.05,55.65,0.874059,165.914688,0.457504,0.674063,5.732825,75.593452,17.877903,9.925507,POINT (11.05000 55.65000)
4,11.05,55.75,0.692359,161.418015,0.403106,0.412463,4.057068,77.179749,16.793957,9.578770,POINT (11.05000 55.75000)
...,...,...,...,...,...,...,...,...,...,...,...
31285,31.95,69.75,1.380116,144.081696,0.038266,0.013655,0.348096,86.602531,6.369429,1.603353,POINT (31.95000 69.75000)
31286,31.95,69.85,1.297200,146.069855,0.034849,0.013400,0.327246,87.804222,7.178958,1.593327,POINT (31.95000 69.85000)
31287,31.95,69.95,1.182105,147.992767,0.033112,0.012102,0.296824,89.076843,6.792434,1.608530,POINT (31.95000 69.95000)
31288,31.95,70.05,1.007965,147.916702,0.031571,0.009904,0.269554,89.041519,9.390265,1.583653,POINT (31.95000 70.05000)


In [34]:
import geopandas
from shapely.geometry import Point

In [28]:
df_t0 = data_swefi.sel(time=t0).to_dataframe()
df_t1 = data_swefi.sel(time=t1).to_dataframe()

In [32]:
df_t0 = df_t0.reset_index()
df_t0.head()

Unnamed: 0,lon,lat,time,so2,co,dust,ectot,no,no2,o3,pm10,sia
0,11.049999,55.349998,2023-07-01 12:00:00,0.091888,132.0,0.097046,0.086121,0.507568,2.430664,73.8125,5.386719,1.483398
1,11.049999,55.449997,2023-07-01 12:00:00,0.127136,130.5625,0.085693,0.073669,0.34314,1.89209,72.75,5.582031,1.84668
2,11.049999,55.549999,2023-07-01 12:00:00,0.110168,128.75,0.07486,0.061172,0.242615,1.455078,71.59375,5.443359,1.98291
3,11.049999,55.650002,2023-07-01 12:00:00,0.151306,129.25,0.074005,0.061676,0.347046,1.778809,68.8125,3.973633,1.881348
4,11.049999,55.75,2023-07-01 12:00:00,0.106079,128.5625,0.089691,0.057281,0.298584,1.622559,67.0625,4.089844,1.75293


In [35]:
#df_t0.reset_index()
geom = [Point(x,y) for x, y in zip(df_t0['lon'], df_t0['lat'])]
geodf_t0 = geopandas.GeoDataFrame(df_t0, geometry=geom)
geodf_t0.head()

Unnamed: 0,lon,lat,time,so2,co,dust,ectot,no,no2,o3,pm10,sia,geometry
0,11.049999,55.349998,2023-07-01 12:00:00,0.091888,132.0,0.097046,0.086121,0.507568,2.430664,73.8125,5.386719,1.483398,POINT (11.05000 55.35000)
1,11.049999,55.449997,2023-07-01 12:00:00,0.127136,130.5625,0.085693,0.073669,0.34314,1.89209,72.75,5.582031,1.84668,POINT (11.05000 55.45000)
2,11.049999,55.549999,2023-07-01 12:00:00,0.110168,128.75,0.07486,0.061172,0.242615,1.455078,71.59375,5.443359,1.98291,POINT (11.05000 55.55000)
3,11.049999,55.650002,2023-07-01 12:00:00,0.151306,129.25,0.074005,0.061676,0.347046,1.778809,68.8125,3.973633,1.881348,POINT (11.05000 55.65000)
4,11.049999,55.75,2023-07-01 12:00:00,0.106079,128.5625,0.089691,0.057281,0.298584,1.622559,67.0625,4.089844,1.75293,POINT (11.05000 55.75000)


In [36]:
## geometry Point
df_t1 = df_t1.reset_index()
df_t1.head()
geom1 = [Point(x,y) for x, y in zip(df_t1['lon'], df_t1['lat'])]
geodf_t1 = geopandas.GeoDataFrame(df_t1, geometry=geom)
geodf_t1.head()

Unnamed: 0,lon,lat,time,so2,co,dust,ectot,no,no2,o3,pm10,sia,geometry
0,11.049999,55.349998,2023-07-31 12:00:00,0.227295,141.1875,0.193054,0.088104,0.556396,1.63916,70.84375,14.195312,1.34082,POINT (11.05000 55.35000)
1,11.049999,55.449997,2023-07-31 12:00:00,0.157532,139.4375,0.18219,0.074066,0.44519,1.381348,72.375,12.558594,1.095215,POINT (11.05000 55.45000)
2,11.049999,55.549999,2023-07-31 12:00:00,0.105804,136.0625,0.172729,0.063293,0.488525,1.425293,71.125,12.027344,0.895264,POINT (11.05000 55.55000)
3,11.049999,55.650002,2023-07-31 12:00:00,0.154053,136.5,0.14624,0.052872,0.36084,1.100586,71.125,10.777344,0.737061,POINT (11.05000 55.65000)
4,11.049999,55.75,2023-07-31 12:00:00,0.124512,135.0,0.118744,0.039719,0.206116,0.669922,68.46875,11.085938,0.633545,POINT (11.05000 55.75000)


In [40]:
geodf_t0.drop("time", axis=1).to_file("./data/0701demo.geojson", driver="GeoJSON")
geodf_t1.drop("time", axis=1).to_file("./data/0731demo.geojson", driver="GeoJSON")