# Preproccessing script for CMIP6 potential temperature, salinity, and DIC fields

All preprocessing is to be done on the *native* CMIP6 output fields, then included when calculating depth-integrated temperature, salinity, and DIC

Our preprocessing includes the following:
1. Calculation of annual averages using momlevel (takes into account irregular month lengths) -- saving this to a zarr directory
2. Calculation of linear drifts from preindustrial control (piControl) runs. The use of a linear drift rather than a higher-order drift is in line with how XMIP and pangeo operate (double-check this)
3. Regridding CMIP fields to a standard 1x1 (or 1.25x1.25) grid (1x1 for theoretical work, 1.25x1.25 for work with Argo data)
3. Integrating CMIP fields to various depths (100m, 500m, 1000m, 2000m)
4. Concatenating the integrated CMIP fields into a super-timeseries

In [1]:
import xarray as xr
import momlevel as ml
import numpy as np

In [2]:
from dask.distributed import Client
client = Client('tcp://127.0.0.1:8786')

In [3]:
client

0,1
Connection method: Direct,
Dashboard: http://127.0.0.1:8787/status,

0,1
Comm: tcp://192.168.0.4:8786,Workers: 4
Dashboard: http://192.168.0.4:8787/status,Total threads: 8
Started: Just now,Total memory: 14.90 GiB

0,1
Comm: tcp://127.0.0.1:50933,Total threads: 2
Dashboard: http://127.0.0.1:50935/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:50928,
Local directory: /Users/keturner/ENOI/calculate_drifts/dask-worker-space/worker-uu8ku8eb,Local directory: /Users/keturner/ENOI/calculate_drifts/dask-worker-space/worker-uu8ku8eb
Tasks executing: 0,Tasks in memory: 0
Tasks ready: 0,Tasks in flight: 0
CPU usage: 0.0%,Last seen: Just now
Memory usage: 122.20 MiB,Spilled bytes: 0 B
Read bytes: 8.00 kiB,Write bytes: 6.00 kiB

0,1
Comm: tcp://127.0.0.1:50934,Total threads: 2
Dashboard: http://127.0.0.1:50936/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:50925,
Local directory: /Users/keturner/ENOI/calculate_drifts/dask-worker-space/worker-5rq1t80z,Local directory: /Users/keturner/ENOI/calculate_drifts/dask-worker-space/worker-5rq1t80z
Tasks executing: 0,Tasks in memory: 0
Tasks ready: 0,Tasks in flight: 0
CPU usage: 0.0%,Last seen: Just now
Memory usage: 123.59 MiB,Spilled bytes: 0 B
Read bytes: 7.99 kiB,Write bytes: 5.99 kiB

0,1
Comm: tcp://127.0.0.1:50937,Total threads: 2
Dashboard: http://127.0.0.1:50938/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:50926,
Local directory: /Users/keturner/ENOI/calculate_drifts/dask-worker-space/worker-wa1xz5er,Local directory: /Users/keturner/ENOI/calculate_drifts/dask-worker-space/worker-wa1xz5er
Tasks executing: 0,Tasks in memory: 0
Tasks ready: 0,Tasks in flight: 0
CPU usage: 0.0%,Last seen: Just now
Memory usage: 123.41 MiB,Spilled bytes: 0 B
Read bytes: 10.00 kiB,Write bytes: 8.00 kiB

0,1
Comm: tcp://127.0.0.1:50939,Total threads: 2
Dashboard: http://127.0.0.1:50940/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:50927,
Local directory: /Users/keturner/ENOI/calculate_drifts/dask-worker-space/worker-knscqhx3,Local directory: /Users/keturner/ENOI/calculate_drifts/dask-worker-space/worker-knscqhx3
Tasks executing: 0,Tasks in memory: 0
Tasks ready: 0,Tasks in flight: 0
CPU usage: 0.0%,Last seen: Just now
Memory usage: 123.30 MiB,Spilled bytes: 0 B
Read bytes: 10.00 kiB,Write bytes: 8.00 kiB


In [4]:
#path = "/Users/keturner/Downloads"
path = "/Volumes/KT-TOSHIBA/ENOI/CMIP6/preindustrial_controls"
var = "dissic"
model = "ACCESS"

In [None]:
#years = [196, 200, 205, 210, 215]

In [None]:
#for i in np.arange(len(years)):
#    ds = xr.open_mfdataset(f"{path}/{var}_Omon_{model}*{years[i]}*.nc", use_cftime=True,
#                          chunks={"time":12, "lev":25, "j":330, "i":120})
#    so_ann = ml.util.annual_average(ds.so)
#    so_ann = so_ann.astype("float32").to_dataset(name="so_annual")
#    so_ann.chunk({"time":20}).to_netcdf(f"ann_{var}_{model}{i}.nc")

In [5]:
ds = xr.open_mfdataset(f"{path}/{var}_Omon_{model}*.nc",
                      use_cftime=True,
                      chunks={"time":120, "lev":10, "j":50, "i":360})
ds.dissic

Unnamed: 0,Array,Chunk
Bytes,48.28 GiB,82.40 MiB
Shape,"(2400, 50, 300, 360)","(120, 10, 50, 360)"
Count,1220 Tasks,600 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 48.28 GiB 82.40 MiB Shape (2400, 50, 300, 360) (120, 10, 50, 360) Count 1220 Tasks 600 Chunks Type float32 numpy.ndarray",2400  1  360  300  50,

Unnamed: 0,Array,Chunk
Bytes,48.28 GiB,82.40 MiB
Shape,"(2400, 50, 300, 360)","(120, 10, 50, 360)"
Count,1220 Tasks,600 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,843.75 kiB,140.62 kiB
Shape,"(300, 360)","(50, 360)"
Count,489 Tasks,6 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 843.75 kiB 140.62 kiB Shape (300, 360) (50, 360) Count 489 Tasks 6 Chunks Type float64 numpy.ndarray",360  300,

Unnamed: 0,Array,Chunk
Bytes,843.75 kiB,140.62 kiB
Shape,"(300, 360)","(50, 360)"
Count,489 Tasks,6 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,843.75 kiB,140.62 kiB
Shape,"(300, 360)","(50, 360)"
Count,489 Tasks,6 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 843.75 kiB 140.62 kiB Shape (300, 360) (50, 360) Count 489 Tasks 6 Chunks Type float64 numpy.ndarray",360  300,

Unnamed: 0,Array,Chunk
Bytes,843.75 kiB,140.62 kiB
Shape,"(300, 360)","(50, 360)"
Count,489 Tasks,6 Chunks
Type,float64,numpy.ndarray


In [8]:
dissic_ann = ml.util.annual_average(ds.dissic)
dissic_ann = dissic_ann.astype("float32").to_dataset(name="dissic_annual")

In [9]:
dissic_ann.dissic_annual

Unnamed: 0,Array,Chunk
Bytes,4.02 GiB,703.12 kiB
Shape,"(200, 50, 300, 360)","(1, 10, 50, 360)"
Count,103222 Tasks,6000 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 4.02 GiB 703.12 kiB Shape (200, 50, 300, 360) (1, 10, 50, 360) Count 103222 Tasks 6000 Chunks Type float32 numpy.ndarray",200  1  360  300  50,

Unnamed: 0,Array,Chunk
Bytes,4.02 GiB,703.12 kiB
Shape,"(200, 50, 300, 360)","(1, 10, 50, 360)"
Count,103222 Tasks,6000 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,843.75 kiB,140.62 kiB
Shape,"(300, 360)","(50, 360)"
Count,489 Tasks,6 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 843.75 kiB 140.62 kiB Shape (300, 360) (50, 360) Count 489 Tasks 6 Chunks Type float64 numpy.ndarray",360  300,

Unnamed: 0,Array,Chunk
Bytes,843.75 kiB,140.62 kiB
Shape,"(300, 360)","(50, 360)"
Count,489 Tasks,6 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,843.75 kiB,140.62 kiB
Shape,"(300, 360)","(50, 360)"
Count,489 Tasks,6 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 843.75 kiB 140.62 kiB Shape (300, 360) (50, 360) Count 489 Tasks 6 Chunks Type float64 numpy.ndarray",360  300,

Unnamed: 0,Array,Chunk
Bytes,843.75 kiB,140.62 kiB
Shape,"(300, 360)","(50, 360)"
Count,489 Tasks,6 Chunks
Type,float64,numpy.ndarray


In [10]:
%%time
dissic_ann.chunk({"time":20}).to_zarr(f"ann_{var}_{model}")

CPU times: user 10.7 s, sys: 717 ms, total: 11.4 s
Wall time: 1h 28s


<xarray.backends.zarr.ZarrStore at 0x7fc5f8c76970>

In [None]:
dic_ann.close()
ds.close()

In [None]:
test = xr.open_mfdataset("ann_so_UKESM*", use_cftime=True)

In [None]:
test.dt.year