# Read in netcdfs of raw data for each member 
- take Tmax and Tmin
- take the mmm for the globe
- take out the regions and then take the lat lon mean and save as a netcdf
- take the llm AND mmm and save as a netcdf 

In [1]:
import xarray as xr, matplotlib.pyplot as plt
from importlib import reload # need to use this if I edit a function file
import os
import numpy as np
import pandas as pd
import cartopy.crs as ccrs # to add in continents and change map projections 
from matplotlib.colors import LinearSegmentedColormap # to change colour bar????
import dask.diagnostics # dask allows you to check how long something is taking to load
import climtas # needed to count event statistics with a specified duration

In [2]:
# import custom functions
import sys 
sys.path.append('/home/563/kb6999/Functions') # use this if the function file is in a different directory to the notebook

import frequently_used_functions as func
import plotting_functions as fplot
import model_functions as funcM
import reanalysis_functions as funcR

# scott way of opening files

In [3]:
# create a list of member names excluding member 70 cos that file is problematic 
members = [*range(1,70),*range(71,81)]

## monthly raw

In [4]:
# # create a list of the paths
tmp_paths = [f"/g/data/w48/kb6999/20CR_TMP_raw_members/R_raw_Glob_TMP{m:02d}.nc" for m in members]
# tmp_paths = [f"/g/data/w48/kb6999/20CR_TMP_members/R_anom_Glob_TMP{m:02d}.nc" for m in members]

# pr_paths = [f"/g/data/w48/kb6999/20CR_PRATE_members/R_anom_Glob_PRATE{m:02d}.nc" for m in members]

In [5]:
# open all members of temperature 
ds_tmp = xr.open_mfdataset(tmp_paths, combine='nested', concat_dim='member', chunks={'time': 200})
ds_tmp.coords['member'] = members
ds_tmp

Unnamed: 0,Array,Chunk
Bytes,82.51 GB,104.86 MB
Shape,"(79, 1992, 256, 512)","(1, 200, 256, 512)"
Count,2449 Tasks,790 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 82.51 GB 104.86 MB Shape (79, 1992, 256, 512) (1, 200, 256, 512) Count 2449 Tasks 790 Chunks Type float32 numpy.ndarray",79  1  512  256  1992,

Unnamed: 0,Array,Chunk
Bytes,82.51 GB,104.86 MB
Shape,"(79, 1992, 256, 512)","(1, 200, 256, 512)"
Count,2449 Tasks,790 Chunks
Type,float32,numpy.ndarray


In [6]:
# rename variable and put into celsius
reanal = xr.Dataset({'tmp': ds_tmp.TMP})#, 'pr': ds_pr.PRATE})
reanal['tmp'] = reanal.tmp-273
reanal

Unnamed: 0,Array,Chunk
Bytes,82.51 GB,104.86 MB
Shape,"(79, 1992, 256, 512)","(1, 200, 256, 512)"
Count,3239 Tasks,790 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 82.51 GB 104.86 MB Shape (79, 1992, 256, 512) (1, 200, 256, 512) Count 3239 Tasks 790 Chunks Type float32 numpy.ndarray",79  1  512  256  1992,

Unnamed: 0,Array,Chunk
Bytes,82.51 GB,104.86 MB
Shape,"(79, 1992, 256, 512)","(1, 200, 256, 512)"
Count,3239 Tasks,790 Chunks
Type,float32,numpy.ndarray


## area weighting and landmask

In [16]:
# area weighting 
reanal_w = reanal*np.cos(reanal.lat*(np.pi/180))
reanal_w

Unnamed: 0,Array,Chunk
Bytes,82.51 GB,104.86 MB
Shape,"(79, 1992, 256, 512)","(1, 200, 256, 512)"
Count,4030 Tasks,790 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 82.51 GB 104.86 MB Shape (79, 1992, 256, 512) (1, 200, 256, 512) Count 4030 Tasks 790 Chunks Type float32 numpy.ndarray",79  1  512  256  1992,

Unnamed: 0,Array,Chunk
Bytes,82.51 GB,104.86 MB
Shape,"(79, 1992, 256, 512)","(1, 200, 256, 512)"
Count,4030 Tasks,790 Chunks
Type,float32,numpy.ndarray


In [18]:
reanal_w.tmp.mean(dim=['lat','lon','member'])[-1].values

array(9.2151985, dtype=float32)

## trying to figure out amplitude of seasonal cycle

In [None]:
def seasonal_amp(dataset):
    # find the max monthly anomaly for each year
    smax = dataset.groupby('time.year').max(dim='time')
    # find the min monthly anomaly for each year
    smin = dataset.groupby('time.year').min(dim='time')
    # find the amplitude of the seasonal cycle (max-min) for each year 
    seasonal_cycle_amp = smax-smin
    
    return seasonal_cycle_amp

In [19]:
monthly_raw_glob = reanal_w

In [20]:
# import landmask dataset 
landfrac_ds = xr.open_dataset('/g/data/w48/kb6999/20CR_data_netcdfs/land_20CR.nc')
landmask = landfrac_ds

In [21]:
# Select out SH and NH anomalies 
monthly_raw_SH = monthly_raw_glob.sel(lat=slice(0,-90)) 
monthly_raw_NH = monthly_raw_glob.sel(lat=slice(90,0)) 
# select out the Australian and E Australian anomalies 
with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    monthly_raw_Aus = monthly_raw_glob.sel(lat=slice(-10,-50), lon=slice(110,160)).where(landmask.LAND==1.0, drop=True)
    monthly_raw_EA = monthly_raw_glob.sel(lat=slice(-10,-50), lon=slice(140,155)).where(landmask.LAND==1.0, drop=True)

In [22]:
monthly_raw_Aus

Unnamed: 0,Array,Chunk
Bytes,1.58 GB,12.80 kB
Shape,"(79, 1992, 44, 57)","(1, 200, 4, 4)"
Count,277701 Tasks,130350 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.58 GB 12.80 kB Shape (79, 1992, 44, 57) (1, 200, 4, 4) Count 277701 Tasks 130350 Chunks Type float32 numpy.ndarray",79  1  57  44  1992,

Unnamed: 0,Array,Chunk
Bytes,1.58 GB,12.80 kB
Shape,"(79, 1992, 44, 57)","(1, 200, 4, 4)"
Count,277701 Tasks,130350 Chunks
Type,float32,numpy.ndarray


## means and percentiles

In [23]:
# take the multi-member mean
mmm_raw_glob = monthly_raw_glob.mean(dim=['member'])

In [24]:
# take lat lon mean
llm_Glob = monthly_raw_glob.mean(dim=['lat','lon'])
llm_SH = monthly_raw_SH.mean(dim=['lat','lon'])
llm_NH = monthly_raw_NH.mean(dim=['lat','lon'])
llm_Aus = monthly_raw_Aus.mean(dim=['lat','lon'])
llm_EA = monthly_raw_EA.mean(dim=['lat','lon'])

In [25]:
# calculate the percentiles and then take the mean along the model dimension for monthly anomalies
p10_mon_Glob = llm_Glob.chunk({'member': -1}).quantile(0.1, dim=['member'])
p10_mon_NH = llm_NH.chunk({'member': -1}).quantile(0.1, dim=['member'])
p10_mon_SH = llm_SH.chunk({'member': -1}).quantile(0.1, dim=['member'])
p10_mon_Aus = llm_Aus.chunk({'member': -1}).quantile(0.1, dim=['member'])
p10_mon_EA = llm_EA.chunk({'member': -1}).quantile(0.1, dim=['member'])
# calculate the percentiles and then take the mean along the model dimension for monthly anomalies
p90_mon_Glob = llm_Glob.chunk({'member': -1}).quantile(0.9, dim=['member'])
p90_mon_NH = llm_NH.chunk({'member': -1}).quantile(0.1, dim=['member'])
p90_mon_SH = llm_SH.chunk({'member': -1}).quantile(0.9, dim=['member'])
p90_mon_Aus = llm_Aus.chunk({'member': -1}).quantile(0.9, dim=['member'])
p90_mon_EA = llm_EA.chunk({'member': -1}).quantile(0.9, dim=['member'])

In [26]:
# Take the multi-member mean for each region
mmm_mon_Glob = llm_Glob.mean(dim='member')
mmm_mon_NH = llm_NH.mean(dim='member')
mmm_mon_SH = llm_SH.mean(dim='member')
mmm_mon_Aus = llm_Aus.mean(dim='member')
mmm_mon_EA = llm_EA.mean(dim='member')

## Write to netcdf

In [27]:
#raw 
path = '/g/data/w48/kb6999/20CR_TMP_raw_data_for_plots/'

In [28]:
# write seasonal cycle amplitude to netcdf
with dask.diagnostics.ProgressBar():
    mmm_raw_glob.to_netcdf(f'{path}mmm_raw_glob.nc')

[########################################] | 100% Completed |  4min 21.9s


In [None]:
# mmm and llm
with dask.diagnostics.ProgressBar():
    mmm_mon_Glob.to_netcdf(f'{path}mmm_mon_Glob.nc')
    mmm_mon_NH.to_netcdf(f'{path}mmm_mon_NH.nc')
    mmm_mon_SH.to_netcdf(f'{path}mmm_mon_SH.nc')
    mmm_mon_Aus.to_netcdf(f'{path}mmm_mon_Aus.nc')
    mmm_mon_EA.to_netcdf(f'{path}mmm_mon_EA.nc')

In [None]:
with dask.diagnostics.ProgressBar():
    llm_Glob.to_netcdf(f'{path}llm_mon_Glob.nc')
    llm_SH.to_netcdf(f'{path}llm_mon_NH.nc') 
    llm_NH.to_netcdf(f'{path}llm_mon_SH.nc')
    llm_Aus.to_netcdf(f'{path}llm_mon_Aus.nc')
    llm_EA.to_netcdf(f'{path}llm_mon_EA.nc')

In [None]:
with dask.diagnostics.ProgressBar():
    p10_mon_Glob.to_netcdf(f'{path}p10_mon_Glob.nc')
    p10_mon_NH.to_netcdf(f'{path}p10_mon_NH.nc')
    p10_mon_SH.to_netcdf(f'{path}p10_mon_SH.nc')
    p10_mon_Aus.to_netcdf(f'{path}p10_mon_Aus.nc')
    p10_mon_EA.to_netcdf(f'{path}p10_mon_EA.nc')

In [None]:
with dask.diagnostics.ProgressBar():
    p90_mon_Glob.to_netcdf(f'{path}p90_mon_Glob.nc')
    p90_mon_NH.to_netcdf(f'{path}p90_mon_NH.nc')
    p90_mon_SH.to_netcdf(f'{path}p90_mon_SH.nc')
    p90_mon_Aus.to_netcdf(f'{path}p90_mon_Aus.nc')
    p90_mon_EA.to_netcdf(f'{path}p90_mon_EA.nc')

In [None]:
mmm = xr.open_dataset(f'{path}mmm_mon_Aus.nc')