# Read in netcdfs

In [1]:
import dask.distributed
import tempfile
tempdir = tempfile.TemporaryDirectory("dask-worker-space")
dask.distributed.Client(local_directory=tempdir.name, memory_limit='14gb')


0,1
Client  Scheduler: tcp://127.0.0.1:37447  Dashboard: http://127.0.0.1:8787/status,Cluster  Workers: 4  Cores: 8  Memory: 52.15 GiB


In [2]:
import xarray as xr, matplotlib.pyplot as plt
from importlib import reload # need to use this if I edit a function file
import os
import numpy as np
import pandas as pd
import cartopy.crs as ccrs # to add in continents and change map projections 
from matplotlib.colors import LinearSegmentedColormap # to change colour bar????
import dask.diagnostics # dask allows you to check how long something is taking to load
import climtas # needed to count event statistics with a specified duration

In [3]:
# import custom functions
import sys 
sys.path.append('/home/563/kb6999/Functions') # use this if the function file is in a different directory to the notebook

import frequently_used_functions as func
import plotting_functions as fplot
import model_functions as funcM
import reanalysis_functions as funcR

# scott way of opening files

In [4]:
# create a list of member names excluding member 70 cos that file is problematic 
members = [*range(1,70),*range(71,81)]

## monthly anomalies

In [5]:
# create a list of the paths
pr_paths = [f"/g/data/w48/kb6999/20CR_PRATE_raw_members/R_raw_Glob_PRATE{m:02d}.nc" for m in members]

In [6]:
# # open all members of rainfall 
ds_pr = xr.open_mfdataset(pr_paths, combine='nested', concat_dim='member', chunks={'time': 10})
ds_pr.coords['member'] = members
ds_pr

Unnamed: 0,Array,Chunk
Bytes,76.84 GiB,5.00 MiB
Shape,"(79, 1992, 256, 512)","(1, 10, 256, 512)"
Count,47479 Tasks,15800 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 76.84 GiB 5.00 MiB Shape (79, 1992, 256, 512) (1, 10, 256, 512) Count 47479 Tasks 15800 Chunks Type float32 numpy.ndarray",79  1  512  256  1992,

Unnamed: 0,Array,Chunk
Bytes,76.84 GiB,5.00 MiB
Shape,"(79, 1992, 256, 512)","(1, 10, 256, 512)"
Count,47479 Tasks,15800 Chunks
Type,float32,numpy.ndarray


In [7]:
# combine precipitation and temperature into one dataset
reanal = xr.Dataset({'pr': ds_pr.PRATE})
# change the pr units
reanal['pr'] = reanal.pr*86400
reanal

Unnamed: 0,Array,Chunk
Bytes,153.68 GiB,10.00 MiB
Shape,"(79, 1992, 256, 512)","(1, 10, 256, 512)"
Count,63279 Tasks,15800 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 153.68 GiB 10.00 MiB Shape (79, 1992, 256, 512) (1, 10, 256, 512) Count 63279 Tasks 15800 Chunks Type float64 numpy.ndarray",79  1  512  256  1992,

Unnamed: 0,Array,Chunk
Bytes,153.68 GiB,10.00 MiB
Shape,"(79, 1992, 256, 512)","(1, 10, 256, 512)"
Count,63279 Tasks,15800 Chunks
Type,float64,numpy.ndarray


## area weighting and landmask

In [8]:
# area weighting 
reanal_w = reanal*np.cos(reanal.lat*(np.pi/180))
reanal_w

Unnamed: 0,Array,Chunk
Bytes,153.68 GiB,10.00 MiB
Shape,"(79, 1992, 256, 512)","(1, 10, 256, 512)"
Count,79080 Tasks,15800 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 153.68 GiB 10.00 MiB Shape (79, 1992, 256, 512) (1, 10, 256, 512) Count 79080 Tasks 15800 Chunks Type float64 numpy.ndarray",79  1  512  256  1992,

Unnamed: 0,Array,Chunk
Bytes,153.68 GiB,10.00 MiB
Shape,"(79, 1992, 256, 512)","(1, 10, 256, 512)"
Count,79080 Tasks,15800 Chunks
Type,float64,numpy.ndarray


In [9]:
monthly_raw_glob = reanal_w

In [10]:
!ls /g/data/ua8/C20C/v3/

about_land_masks.nc  land.sflx.nc				      untar.sh
land.nc		     README_20C_Reanalysis_version_3_everymember.txt
LAND.nc		     tmp


In [11]:
# import landmask dataset 
# RAINFALL
landfrac_ds = xr.open_dataset('/g/data/w48/kb6999/20CR_data_netcdfs/land_pr_20CR.nc')
# TEMPERATURE
# landfrac_ds = xr.open_dataset('/g/data/w48/kb6999/20CR_data_netcdfs/land_20CR.nc')
landmask = landfrac_ds

In [12]:
landmask

In [16]:
# Select out SH and NH anomalies 
monthly_raw_SH = monthly_raw_glob.sel(lat=slice(0,-90)) 
monthly_raw_NH = monthly_raw_glob.sel(lat=slice(90,0)) 
# select out the Australian and E Australian anomalies 
# with dask.config.set(**{'array.slicing.split_large_chunks': True}):
monthly_raw_Aus = monthly_raw_glob.sel(lat=slice(-10,-50), lon=slice(110,160)).where(landmask.LAND==1.0, drop=True)
monthly_raw_EA = monthly_raw_glob.sel(lat=slice(-10,-50), lon=slice(140,155)).where(landmask.LAND==1.0, drop=True)

    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  value = value[(slice(None),) * axis + (subkey,)]
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  value = value[(slice(None),) * axis + (subkey,)]


In [17]:
monthly_raw_Aus

Unnamed: 0,Array,Chunk
Bytes,2.94 GiB,195.94 kiB
Shape,"(79, 1992, 44, 57)","(1, 10, 44, 57)"
Count,142281 Tasks,15800 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 2.94 GiB 195.94 kiB Shape (79, 1992, 44, 57) (1, 10, 44, 57) Count 142281 Tasks 15800 Chunks Type float64 numpy.ndarray",79  1  57  44  1992,

Unnamed: 0,Array,Chunk
Bytes,2.94 GiB,195.94 kiB
Shape,"(79, 1992, 44, 57)","(1, 10, 44, 57)"
Count,142281 Tasks,15800 Chunks
Type,float64,numpy.ndarray


## means and percentiles

In [18]:
# take the multi-member mean
mmm_raw_glob = monthly_raw_glob.mean(dim=['member'])

In [19]:
# take lat lon mean
llm_Glob = monthly_raw_glob.mean(dim=['lat','lon'])
llm_SH = monthly_raw_SH.mean(dim=['lat','lon'])
llm_NH = monthly_raw_NH.mean(dim=['lat','lon'])
llm_Aus = monthly_raw_Aus.mean(dim=['lat','lon'])
llm_EA = monthly_raw_EA.mean(dim=['lat','lon'])

In [20]:
# calculate the percentiles and then take the mean along the model dimension for monthly anomalies
p10_mon_Glob = llm_Glob.chunk({'member': -1}).quantile(0.1, dim=['member'])
p10_mon_NH = llm_NH.chunk({'member': -1}).quantile(0.1, dim=['member'])
p10_mon_SH = llm_SH.chunk({'member': -1}).quantile(0.1, dim=['member'])
p10_mon_Aus = llm_Aus.chunk({'member': -1}).quantile(0.1, dim=['member'])
p10_mon_EA = llm_EA.chunk({'member': -1}).quantile(0.1, dim=['member'])
# calculate the percentiles and then take the mean along the model dimension for monthly anomalies
p90_mon_Glob = llm_Glob.chunk({'member': -1}).quantile(0.9, dim=['member'])
p90_mon_NH = llm_NH.chunk({'member': -1}).quantile(0.9, dim=['member'])
p90_mon_SH = llm_SH.chunk({'member': -1}).quantile(0.9, dim=['member'])
p90_mon_Aus = llm_Aus.chunk({'member': -1}).quantile(0.9, dim=['member'])
p90_mon_EA = llm_EA.chunk({'member': -1}).quantile(0.9, dim=['member'])

In [21]:
# Take the multi-member mean for each region
mmm_mon_Glob = llm_Glob.mean(dim='member')
mmm_mon_NH = llm_NH.mean(dim='member')
mmm_mon_SH = llm_SH.mean(dim='member')
mmm_mon_Aus = llm_Aus.mean(dim='member')
mmm_mon_EA = llm_EA.mean(dim='member')

## Write to netcdf

In [22]:
#monthly
path = '/g/data/w48/kb6999/20CR_PRATE_raw_data_for_plots/'

In [19]:
# try scott other way of saving to netcdf
# this slows down how many chunks Dask will load at one time, it's a bit slower but can help with memory issues
with dask.diagnostics.ProgressBar():
    climtas.io.to_netcdf_throttled(mmm_raw_glob, f"{path}mmm_raw_glob.nc")

  0%|          | 0/200 [00:00<?, ?it/s]

In [23]:
with dask.diagnostics.ProgressBar():
#     climtas.io.to_netcdf_throttled(mmm_mon_Glob, f"{path}mmm_mon_pr_Glob.nc")
#     climtas.io.to_netcdf_throttled(mmm_mon_NH, f"{path}mmm_mon_pr_NH.nc")
#     climtas.io.to_netcdf_throttled(mmm_mon_SH, f"{path}mmm_mon_pr_SH.nc")
    climtas.io.to_netcdf_throttled(mmm_mon_Aus, f"{path}mmm_mon_pr_Aus.nc")
    climtas.io.to_netcdf_throttled(mmm_mon_EA, f"{path}mmm_mon_pr_EA.nc")

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

In [24]:
with dask.diagnostics.ProgressBar():
#     climtas.io.to_netcdf_throttled(llm_Glob, f"{path}llm_pr_Glob.nc")
#     climtas.io.to_netcdf_throttled(llm_SH, f"{path}llm_pr_SH.nc")
#     climtas.io.to_netcdf_throttled(llm_NH, f"{path}llm_pr_NH.nc")
    climtas.io.to_netcdf_throttled(llm_Aus, f"{path}llm_pr_Aus.nc")
    climtas.io.to_netcdf_throttled(llm_EA, f"{path}llm_pr_EA.nc")

  0%|          | 0/15800 [00:00<?, ?it/s]

  0%|          | 0/15800 [00:00<?, ?it/s]

In [None]:
with dask.diagnostics.ProgressBar():
    climtas.io.to_netcdf_throttled(p10_mon_Glob, f"{path}p10_mon_pr_Glob.nc")
    climtas.io.to_netcdf_throttled(p10_mon_NH, f"{path}p10_mon_pr_NH.nc")
    climtas.io.to_netcdf_throttled(p10_mon_SH, f"{path}p10_mon_pr_SH.nc")
    climtas.io.to_netcdf_throttled(p10_mon_Aus, f"{path}p10_mon_pr_Aus.nc")
    climtas.io.to_netcdf_throttled(p10_mon_EA, f"{path}p10_mon_pr_EA.nc")

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

In [None]:
with dask.diagnostics.ProgressBar():
    climtas.io.to_netcdf_throttled(p90_mon_Glob, f"{path}p90_mon_pr_Glob.nc")
    climtas.io.to_netcdf_throttled(p90_mon_NH, f"{path}p90_mon_pr_NH.nc")
    climtas.io.to_netcdf_throttled(p90_mon_SH, f"{path}p90_mon_pr_SH.nc")
    climtas.io.to_netcdf_throttled(p90_mon_Aus, f"{path}p90_mon_pr_Aus.nc")
    climtas.io.to_netcdf_throttled(p90_mon_EA, f"{path}p90_mon_pr_EA.nc")

In [22]:
xr.open_dataset('/g/data/w48/kb6999/20CR_PRATE_raw_data_for_plots/mmm_raw_glob.nc')