# Retrieving the Roemmich-Gilson ARGO based T/S Climatology

In [1]:
import xarray as xr
from pathlib import Path
from glob import glob
import requests
import warnings
import subprocess as sp

warnings.filterwarnings("ignore")

def url_retrieve(url: str, outfile: Path):
    R = requests.get(url, allow_redirects=True, verify=False)
    if R.status_code != 200:
        raise ConnectionError('could not download {}\nerror code: {}'.format(url, R.status_code))

    outfile.write_bytes(R.content)

In [2]:
urlroot = "https://sio-argo.ucsd.edu/gilson/argo_climatology/"

## Download the 2004-2018 climatology

In [3]:
url_retrieve(f"{urlroot}/RG_ArgoClim_Temperature_2019.nc.gz", Path("./RG_ArgoClim_Temperature_2019.nc.gz"))
url_retrieve(f"{urlroot}/RG_ArgoClim_Salinity_2019.nc.gz", Path("./RG_ArgoClim_Salinity_2019.nc.gz"))

## Download the extension

In [4]:
files_ext = []
lastyear = 2021
for year in range(2019, lastyear+1):
    for month in range(1, 12+1):
        print(f"retrieving RG_ArgoClim_{year}{month:02g}_2019.nc.gz")
        files_ext.append(f"RG_ArgoClim_{year}{month:02g}_2019.nc")
        url_retrieve(f"{urlroot}/RG_ArgoClim_{year}{month:02g}_2019.nc.gz", Path(f"./RG_ArgoClim_{year}{month:02g}_2019.nc.gz"))

retrieving RG_ArgoClim_201901_2019.nc.gz
retrieving RG_ArgoClim_201902_2019.nc.gz
retrieving RG_ArgoClim_201903_2019.nc.gz
retrieving RG_ArgoClim_201904_2019.nc.gz
retrieving RG_ArgoClim_201905_2019.nc.gz
retrieving RG_ArgoClim_201906_2019.nc.gz
retrieving RG_ArgoClim_201907_2019.nc.gz
retrieving RG_ArgoClim_201908_2019.nc.gz
retrieving RG_ArgoClim_201909_2019.nc.gz
retrieving RG_ArgoClim_201910_2019.nc.gz
retrieving RG_ArgoClim_201911_2019.nc.gz
retrieving RG_ArgoClim_201912_2019.nc.gz
retrieving RG_ArgoClim_202001_2019.nc.gz
retrieving RG_ArgoClim_202002_2019.nc.gz
retrieving RG_ArgoClim_202003_2019.nc.gz
retrieving RG_ArgoClim_202004_2019.nc.gz
retrieving RG_ArgoClim_202005_2019.nc.gz
retrieving RG_ArgoClim_202006_2019.nc.gz
retrieving RG_ArgoClim_202007_2019.nc.gz
retrieving RG_ArgoClim_202008_2019.nc.gz
retrieving RG_ArgoClim_202009_2019.nc.gz
retrieving RG_ArgoClim_202010_2019.nc.gz
retrieving RG_ArgoClim_202011_2019.nc.gz
retrieving RG_ArgoClim_202012_2019.nc.gz
retrieving RG_Ar

### Decompress files

In [5]:
for f in glob("*.gz"):
    sp.check_call(f"gunzip --keep --force {f}", shell=True)

## Merge all into a single file

In [6]:
ds_clim = xr.open_mfdataset(["RG_ArgoClim_Temperature_2019.nc", "RG_ArgoClim_Salinity_2019.nc"], decode_times=False, chunks={"TIME": 12})

In [7]:
ds_extension = xr.open_mfdataset(files_ext, decode_times=False, chunks={"TIME": 1})

We need to first concatenate time-varying variables then add back the invariant fields:

In [8]:
invariants = ["ARGO_SALINITY_MEAN", "ARGO_TEMPERATURE_MEAN", "BATHYMETRY_MASK", "MAPPING_MASK"]

ds = xr.concat([ds_clim.drop_vars(invariants), ds_extension], dim="TIME")

for var in invariants:
    ds[var] = ds_clim[var]

In [9]:
ds.to_netcdf(f"RG_ArgoClim_Extended_200401-{lastyear}12.nc", format="NETCDF3_64BIT")