# Read in netcdfs

In [9]:
import dask.distributed
import tempfile
tempdir = tempfile.TemporaryDirectory("dask-worker-space")
dask.distributed.Client(local_directory=tempdir.name, memory_limit='14gb')


0,1
Client  Scheduler: tcp://127.0.0.1:37943  Dashboard: http://127.0.0.1:8787/status,Cluster  Workers: 4  Cores: 8  Memory: 52.15 GiB


In [10]:
import xarray as xr, matplotlib.pyplot as plt
from importlib import reload # need to use this if I edit a function file
import os
import numpy as np
import pandas as pd
import cartopy.crs as ccrs # to add in continents and change map projections 
from matplotlib.colors import LinearSegmentedColormap # to change colour bar????
import dask.diagnostics # dask allows you to check how long something is taking to load
import climtas # needed to count event statistics with a specified duration

In [11]:
# import custom functions
import sys 
sys.path.append('/home/563/kb6999/Functions') # use this if the function file is in a different directory to the notebook

import frequently_used_functions as func
import plotting_functions as fplot
import model_functions as funcM
import reanalysis_functions as funcR

# scott way of opening files

In [12]:
# create a list of member names excluding member 70 cos that file is problematic 
members = [*range(1,70),*range(71,81)]

## monthly anomalies

In [13]:
# create a list of the paths
pr_paths = [f"/g/data/w48/kb6999/20CR_PRATE_raw_members/R_raw_Glob_PRATE{m:02d}.nc" for m in members]

In [14]:
# # open all members of rainfall 
ds_pr = xr.open_mfdataset(pr_paths, combine='nested', concat_dim='member', chunks={'time': 200})
ds_pr.coords['member'] = members
ds_pr

Unnamed: 0,Array,Chunk
Bytes,76.84 GiB,100.00 MiB
Shape,"(79, 1992, 256, 512)","(1, 200, 256, 512)"
Count,2449 Tasks,790 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 76.84 GiB 100.00 MiB Shape (79, 1992, 256, 512) (1, 200, 256, 512) Count 2449 Tasks 790 Chunks Type float32 numpy.ndarray",79  1  512  256  1992,

Unnamed: 0,Array,Chunk
Bytes,76.84 GiB,100.00 MiB
Shape,"(79, 1992, 256, 512)","(1, 200, 256, 512)"
Count,2449 Tasks,790 Chunks
Type,float32,numpy.ndarray


In [15]:
# combine precipitation and temperature into one dataset
reanal = xr.Dataset({'pr': ds_pr.PRATE})
# change the pr units
reanal['pr'] = reanal.pr*86400
reanal

Unnamed: 0,Array,Chunk
Bytes,153.68 GiB,200.00 MiB
Shape,"(79, 1992, 256, 512)","(1, 200, 256, 512)"
Count,3239 Tasks,790 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 153.68 GiB 200.00 MiB Shape (79, 1992, 256, 512) (1, 200, 256, 512) Count 3239 Tasks 790 Chunks Type float64 numpy.ndarray",79  1  512  256  1992,

Unnamed: 0,Array,Chunk
Bytes,153.68 GiB,200.00 MiB
Shape,"(79, 1992, 256, 512)","(1, 200, 256, 512)"
Count,3239 Tasks,790 Chunks
Type,float64,numpy.ndarray


## area weighting and landmask

In [17]:
# area weighting 
reanal_w = reanal*np.cos(reanal.lat*(np.pi/180))
reanal_w

Unnamed: 0,Array,Chunk
Bytes,153.68 GiB,200.00 MiB
Shape,"(79, 1992, 256, 512)","(1, 200, 256, 512)"
Count,4030 Tasks,790 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 153.68 GiB 200.00 MiB Shape (79, 1992, 256, 512) (1, 200, 256, 512) Count 4030 Tasks 790 Chunks Type float64 numpy.ndarray",79  1  512  256  1992,

Unnamed: 0,Array,Chunk
Bytes,153.68 GiB,200.00 MiB
Shape,"(79, 1992, 256, 512)","(1, 200, 256, 512)"
Count,4030 Tasks,790 Chunks
Type,float64,numpy.ndarray


In [18]:
monthly_raw_glob = reanal_w

In [19]:
!ls /g/data/ua8/C20C/v3/

about_land_masks.nc  land.sflx.nc				      untar.sh
land.nc		     README_20C_Reanalysis_version_3_everymember.txt
LAND.nc		     tmp


In [20]:
# import landmask dataset 
# RAINFALL
landfrac_ds = xr.open_dataset('/g/data/w48/kb6999/20CR_data_netcdfs/land_pr_20CR.nc')
# TEMPERATURE
# landfrac_ds = xr.open_dataset('/g/data/w48/kb6999/20CR_data_netcdfs/land_20CR.nc')
landmask = landfrac_ds

In [21]:
landmask

In [22]:
# Select out SH and NH anomalies 
monthly_raw_SH = monthly_raw_glob.sel(lat=slice(0,-90)) 
monthly_raw_NH = monthly_raw_glob.sel(lat=slice(90,0)) 
# select out the Australian and E Australian anomalies 
with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    monthly_raw_Aus = monthly_raw_glob.sel(lat=slice(-10,-50), lon=slice(110,160)).where(landmask.LAND==1.0, drop=True)
    monthly_raw_EA = monthly_raw_glob.sel(lat=slice(-10,-50), lon=slice(140,155)).where(landmask.LAND==1.0, drop=True)

## means and percentiles

In [23]:
# take the multi-member mean
mmm_raw_glob = monthly_raw_glob.mean(dim=['member'])

In [24]:
# take lat lon mean
llm_Glob = monthly_raw_glob.mean(dim=['lat','lon'])
llm_SH = monthly_raw_SH.mean(dim=['lat','lon'])
llm_NH = monthly_raw_NH.mean(dim=['lat','lon'])
llm_Aus = monthly_raw_Aus.mean(dim=['lat','lon'])
llm_EA = monthly_raw_EA.mean(dim=['lat','lon'])

In [25]:
# calculate the percentiles and then take the mean along the model dimension for monthly anomalies
p10_mon_Glob = llm_Glob.chunk({'member': -1}).quantile(0.1, dim=['member'])
p10_mon_NH = llm_NH.chunk({'member': -1}).quantile(0.1, dim=['member'])
p10_mon_SH = llm_SH.chunk({'member': -1}).quantile(0.1, dim=['member'])
p10_mon_Aus = llm_Aus.chunk({'member': -1}).quantile(0.1, dim=['member'])
p10_mon_EA = llm_EA.chunk({'member': -1}).quantile(0.1, dim=['member'])
# calculate the percentiles and then take the mean along the model dimension for monthly anomalies
p90_mon_Glob = llm_Glob.chunk({'member': -1}).quantile(0.9, dim=['member'])
p90_mon_NH = llm_NH.chunk({'member': -1}).quantile(0.9, dim=['member'])
p90_mon_SH = llm_SH.chunk({'member': -1}).quantile(0.9, dim=['member'])
p90_mon_Aus = llm_Aus.chunk({'member': -1}).quantile(0.9, dim=['member'])
p90_mon_EA = llm_EA.chunk({'member': -1}).quantile(0.9, dim=['member'])

In [26]:
# Take the multi-member mean for each region
mmm_mon_Glob = llm_Glob.mean(dim='member')
mmm_mon_NH = llm_NH.mean(dim='member')
mmm_mon_SH = llm_SH.mean(dim='member')
mmm_mon_Aus = llm_Aus.mean(dim='member')
mmm_mon_EA = llm_EA.mean(dim='member')

## Write to netcdf

In [27]:
#monthly
path = '/g/data/w48/kb6999/20CR_PRATE_raw_data_for_plots/'

In [28]:
with dask.diagnostics.ProgressBar():
    mmm_raw_glob.to_netcdf(f'{path}mmm_raw_glob.nc')

distributed.core - ERROR - Exception while handling op heartbeat_worker
Traceback (most recent call last):
  File "/g/data3/hh5/public/apps/miniconda3/envs/analysis3-21.04/lib/python3.8/site-packages/distributed/core.py", line 497, in handle_comm
    result = handler(comm, **msg)
  File "/g/data3/hh5/public/apps/miniconda3/envs/analysis3-21.04/lib/python3.8/site-packages/distributed/scheduler.py", line 3861, in heartbeat_worker
    ws._executing = {
  File "/g/data3/hh5/public/apps/miniconda3/envs/analysis3-21.04/lib/python3.8/site-packages/distributed/scheduler.py", line 3862, in <dictcomp>
    parent._tasks[key]: duration for key, duration in executing.items()
KeyError: "('mean_chunk-87ce0dfedbdce26dea39a3781fe95f16', 3, 0, 0, 0)"


KilledWorker: ("('open_dataset-6ab3580965b33cc1314d52a9c00f7f39PRATE-ba1cc2391b45d6f32e91e519563ff188', 3, 0, 0)", <WorkerState 'tcp://127.0.0.1:40055', name: 0, memory: 0, processing: 139>)

In [None]:
with dask.diagnostics.ProgressBar():
    mmm_mon_Glob.to_netcdf(f'{path}mmm_mon_pr_Glob.nc')
    mmm_mon_NH.to_netcdf(f'{path}mmm_mon_pr_NH.nc')
    mmm_mon_SH.to_netcdf(f'{path}mmm_mon_pr_SH.nc')
    mmm_mon_Aus.to_netcdf(f'{path}mmm_mon_pr_Aus.nc')
    mmm_mon_EA.to_netcdf(f'{path}mmm_mon_pr_EA.nc')

In [None]:
with dask.diagnostics.ProgressBar():
    llm_Glob.to_netcdf(f'{path}llm_pr_Glob.nc')
    llm_SH.to_netcdf(f'{path}llm_pr_SH.nc')
    llm_NH.to_netcdf(f'{path}llm_pr_NH.nc')
    llm_Aus.to_netcdf(f'{path}llm_pr_Aus.nc')
    llm_EA.to_netcdf(f'{path}llm_pr_EA.nc')

In [None]:
with dask.diagnostics.ProgressBar():
    p10_mon_Glob.to_netcdf(f'{path}p10_mon_pr_Glob.nc')
    p10_mon_NH.to_netcdf(f'{path}p10_mon_pr_NH.nc')
    p10_mon_SH.to_netcdf(f'{path}p10_mon_pr_SH.nc')
    p10_mon_Aus.to_netcdf(f'{path}p10_mon_pr_Aus.nc')
    p10_mon_EA.to_netcdf(f'{path}p10_mon_pr_EA.nc')

In [None]:
with dask.diagnostics.ProgressBar():
    p90_mon_Glob.to_netcdf(f'{path}p90_mon_pr_Glob.nc')
    p90_mon_NH.to_netcdf(f'{path}p90_mon_pr_NH.nc')
    p90_mon_SH.to_netcdf(f'{path}p90_mon_pr_SH.nc')
    p90_mon_Aus.to_netcdf(f'{path}p90_mon_pr_Aus.nc')
    p90_mon_EA.to_netcdf(f'{path}p90_mon_pr_EA.nc')

In [29]:
xr.open_dataset(f'{path}mmm_raw_glob')

ValueError: did not find a match in any of xarray's currently installed IO backends ['netcdf4', 'h5netcdf', 'scipy', 'cfgrib', 'pydap', 'rasterio', 'zarr']. Consider explicitly selecting one of the installed backends via the ``engine`` parameter to xarray.open_dataset(), or installing additional IO dependencies:
http://xarray.pydata.org/en/stable/getting-started-guide/installing.html
http://xarray.pydata.org/en/stable/user-guide/io.html