# save netcdfs of all model data 
- notebook that reads in all temp, pr and pressure data from models 
- then saves data off as netcdfs 



In [1]:
import xarray as xr, matplotlib.pyplot as plt
from importlib import reload # need to use this if I edit a function file
import os
import numpy as np
import pandas as pd
import cartopy.crs as ccrs # to add in continents and change map projections 
from matplotlib.colors import LinearSegmentedColormap # to change colour bar????
import dask.diagnostics # dask allows you to check how long something is taking to load
import climtas # needed to count event statistics with a specified duration

In [2]:
# import custom functions
import sys 
sys.path.append('/home/563/kb6999/Functions') # use this if the function file is in a different directory to the notebook

import frequently_used_functions as func
import plotting_functions as fplot
import model_functions as funcM
import reanalysis_functions as funcR

In [3]:
import dask.distributed
import tempfile

tempdir = tempfile.TemporaryDirectory("dask-worker-space")
dask.distributed.Client(local_directory=tempdir.name, memory_limit='16gb')

Perhaps you already have a cluster running?
Hosting the HTTP server on port 41345 instead


0,1
Client  Scheduler: tcp://127.0.0.1:41399  Dashboard: http://127.0.0.1:41345/status,Cluster  Workers: 2  Cores: 2  Memory: 11.23 GiB


In [4]:
# store each section of the directory as a string
institution_dir = '/g/data/lp01/CMIP6/CMIP/'
tas_dir = '/historical/r1i1p1f1/Amon/tas/gr1.5/' # surface temperature
pr_dir = '/historical/r1i1p1f1/Amon/pr/gr1.5/' # precipitation
psl_dir = '/historical/r1i1p1f1/Amon/psl/gr1.5/' # sea level pressure
ts_dir = '/historical/r1i1p1f1/Amon/ts/gr1.5/' # (sea) surface temperature
print(institution_dir, tas_dir, pr_dir, ts_dir, psl_dir)

/g/data/lp01/CMIP6/CMIP/ /historical/r1i1p1f1/Amon/tas/gr1.5/ /historical/r1i1p1f1/Amon/pr/gr1.5/ /historical/r1i1p1f1/Amon/ts/gr1.5/ /historical/r1i1p1f1/Amon/psl/gr1.5/


In [5]:
models_tas = funcM.read_models(institution_dir, tas_dir, '1850-01','2015-01')

52 model paths found and loaded into the dictionary "models"
52 models have been successfully loaded into an xarray


In [6]:
models_pr = funcM.read_models(institution_dir, pr_dir, '1850-01','2015-01')

51 model paths found and loaded into the dictionary "models"
Path for MCM-UA-1-0 does not exist
50 models have been successfully loaded into an xarray


In [7]:
models_ts = funcM.read_models(institution_dir, ts_dir, '1850-01','2015-01')

52 model paths found and loaded into the dictionary "models"
52 models have been successfully loaded into an xarray


In [8]:
models_psl = funcM.read_models(institution_dir, psl_dir, '1850-01','2015-01')

52 model paths found and loaded into the dictionary "models"
Path for MCM-UA-1-0 does not exist
51 models have been successfully loaded into an xarray


In [9]:
# make sure the models are the same for both temperature and rainfall 
# delete the height from the models_tas array (if it hasn't been deleted already)
if 'height' in models_tas:
    del models_tas['height']
# delete models that aren't common between variables
models_tas = models_tas.where(models_tas.model.isin(models_pr.model), drop = True)
models_pr = models_pr.where(models_pr.model.isin(models_tas.model), drop = True)

#chekc that the models are the same for both variables and if so print True
models_tas.model.equals(models_pr.model)

True

In [10]:
# change the pr units
models_pr['pr'] = models_pr.pr*86400

In [11]:
# 43 models i'm using in my project, common to both tas and pr
models_43 = ['ACCESS-CM2','ACCESS-ESM1-5','AWI-CM-1-1-MR','AWI-ESM-1-1-LR','BCC-CSM2-MR','BCC-ESM1',
                             'CAMS-CSM1-0', 'CAS-ESM2-0', 'CESM2', 'CESM2-FV2', 'CESM2-WACCM', 'CESM2-WACCM-FV2', 
                             'CMCC-CM2-HR4', 'CMCC-CM2-SR5' ,'CanESM5', 'E3SM-1-1' ,'E3SM-1-1-ECA' ,'EC-Earth3', 
                             'EC-Earth3-AerChem' , 'EC-Earth3-Veg-LR', 'FGOALS-f3-L' ,'FGOALS-g3', 'FIO-ESM-2-0', 
                             'GFDL-CM4' ,'GFDL-ESM4', 'GISS-E2-1-G' ,'GISS-E2-1-G-CC', 'GISS-E2-1-H' ,'IITM-ESM', 
                             'INM-CM4-8', 'INM-CM5-0' ,'IPSL-CM6A-LR', 'KACE-1-0-G', 'MIROC6' ,'MPI-ESM-1-2-HAM',
                             'MPI-ESM1-2-LR' ,'MRI-ESM2-0', 'NESM3', 'NorCPM1' ,'NorESM2-LM', 'NorESM2-MM',
                             'SAM0-UNICON', 'TaiESM1']

In [12]:
# select out the 43 models i'm using for tas and pr 
models_tas = models_tas.sel(model= models_43)
models_pr = models_pr.sel(model= models_43)
models_tas

Unnamed: 0,Array,Chunk
Bytes,9.13 GiB,1.32 MiB
Shape,"(43, 1980, 120, 240)","(1, 12, 120, 240)"
Count,49488 Tasks,7095 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 9.13 GiB 1.32 MiB Shape (43, 1980, 120, 240) (1, 12, 120, 240) Count 49488 Tasks 7095 Chunks Type float32 numpy.ndarray",43  1  240  120  1980,

Unnamed: 0,Array,Chunk
Bytes,9.13 GiB,1.32 MiB
Shape,"(43, 1980, 120, 240)","(1, 12, 120, 240)"
Count,49488 Tasks,7095 Chunks
Type,float32,numpy.ndarray


In [13]:
# change units to mm/day
models_pr['pr'] = models_pr.pr*86400

In [14]:
# area weighting 
models_w = models_tas*np.cos(models_tas.lat*(np.pi/180))
# models_w = models_pr*np.cos(models_pr.lat*(np.pi/180))
# models_w = models_psl*np.cos(models_psl.lat*(np.pi/180))

## calculate anomalies

In [16]:
# use functions to calculate the monthly anomalies for the globe
monthly_anom_glob = func.monthly_anomaly(models_w, '1850-01', '1881-01')

In [17]:
# import land fraction data
landfrac_ds = xr.open_dataset('/home/563/kb6999/Models/landfraction_file_grid1.5.nc')
landmask = landfrac_ds.mean(dim='time')

  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  return np.asarray(array[self.key], dtype=None)


In [20]:
# take the raw llm for the SEA 
raw_llm_glob = models_w.mean(dim=['lat','lon'])

In [21]:
# select out each region for the raw llm for the SEA 
raw_SH = models_w.sel(lat=slice(-90,0))
raw_Aus = models_w.sel(lat=slice(-50,-10), lon=slice(110,160)).where(landmask.data==1, drop=True)
raw_EA = models_w.sel(lat=slice(-50,-10), lon=slice(140,155)).where(landmask.data==1, drop=True)

In [22]:
# select out each region for the raw llm for the SEA 
raw_llm_SH = raw_SH.mean(dim=['lat','lon'])
raw_llm_Aus = raw_Aus.mean(dim=['lat','lon'])
raw_llm_EA = raw_EA.mean(dim=['lat','lon'])

In [23]:
# select out each region
monthly_anom_SH = monthly_anom_glob.sel(lat=slice(-90,0))
monthly_anom_Aus = monthly_anom_glob.sel(lat=slice(-50,-10), lon=slice(110,160)).where(landmask.data==1, drop=True)
monthly_anom_EA = monthly_anom_glob.sel(lat=slice(-50,-10), lon=slice(140,155)).where(landmask.data==1, drop=True)

In [24]:
# take the multi-member mean (but not llm) for spatial plots
mmm_anom_glob = monthly_anom_glob.mean(dim=['model'])

In [29]:
# take the std without taking the lat lon mean (to be used for spatial plots)
# std_s_Glob = monthly_anom_glob.std(dim = ['model']).sel(time=slice(1880,1886)).sel(season=['DJF','JJA'])

In [30]:
# take the percentiles without taking the lat lon mean (to be used for spatial plots)
# # calculate the percentiles and then take the mean along the member dimension for monthly anomalies
# p10_s_Glob = monthly_anom_glob.chunk({'member': -1}).quantile(0.1, dim=['member']).sel(seasonyear=slice(1880,1886)).sel(season=['DJF','JJA'])
# p05_s_Glob = monthly_anom_glob.chunk({'member': -1}).quantile(0.05, dim=['member']).sel(seasonyear=slice(1880,1886)).sel(season=['DJF','JJA'])
# # calculate the percentiles and then take the mean along the member dimension for monthly anomalies
# p95_s_Glob = monthly_anom_glob.chunk({'member': -1}).quantile(0.95, dim=['member']).sel(seasonyear=slice(1880,1886)).sel(season=['DJF','JJA'])
# p90_s_Glob = monthly_anom_glob.chunk({'member': -1}).quantile(0.9, dim=['member']).sel(seasonyear=slice(1880,1886)).sel(season=['DJF','JJA'])
# p10_s_Glob

In [31]:
# take lat lon mean
llm_Glob = monthly_anom_glob.mean(dim=['lat','lon'])
llm_SH = monthly_anom_SH.mean(dim=['lat','lon'])
# llm_NH = monthly_anom_NH.mean(dim=['lat','lon'])
llm_Aus = monthly_anom_Aus.mean(dim=['lat','lon'])
llm_EA = monthly_anom_EA.mean(dim=['lat','lon'])

In [32]:
# calculate the percentiles and then take the mean along the model dimension for monthly anomalies
p10_mon_Glob = llm_Glob.chunk({'model': -1}).quantile(0.1, dim=['model'])
# p10_mon_NH = llm_NH.chunk({'member': -1}).quantile(0.1, dim=['member'])
p10_mon_SH = llm_SH.chunk({'model': -1}).quantile(0.1, dim=['model'])
p10_mon_Aus = llm_Aus.chunk({'model': -1}).quantile(0.1, dim=['model'])
p10_mon_EA = llm_EA.chunk({'model': -1}).quantile(0.1, dim=['model'])
# calculate the percentiles and then take the mean along the model dimension for monthly anomalies
p90_mon_Glob = llm_Glob.chunk({'model': -1}).quantile(0.9, dim=['model'])
# p90_mon_NH = llm_NH.chunk({'member': -1}).quantile(0.1, dim=['member'])
p90_mon_SH = llm_SH.chunk({'model': -1}).quantile(0.9, dim=['model'])
p90_mon_Aus = llm_Aus.chunk({'model': -1}).quantile(0.9, dim=['model'])
p90_mon_EA = llm_EA.chunk({'model': -1}).quantile(0.9, dim=['model'])

In [33]:
# Take the multi-member mean for each region
mmm_mon_Glob = llm_Glob.mean(dim='model')
# mmm_mon_NH = llm_NH.mean(dim='member')
mmm_mon_SH = llm_SH.mean(dim='model')
mmm_mon_Aus = llm_Aus.mean(dim='model')
mmm_mon_EA = llm_EA.mean(dim='model')

# save netcdfs

In [38]:
path = '/g/data/w48/kb6999/Models/data_for_plots/'

In [39]:
var='tas'

In [40]:
# causes kernel to die 
# with dask.diagnostics.ProgressBar():
#     mmm_anom_glob.to_netcdf(f'{path}mmm_anom_glob_{var}.nc')

In [41]:
# save off raw llm for SEA plots 
with dask.diagnostics.ProgressBar():
    raw_llm_glob.to_netcdf(f'{path}raw_llm_glob_{var}.nc')



In [None]:
with dask.diagnostics.ProgressBar():
    raw_llm_SH.to_netcdf(f'{path}raw_llm_SH_{var}.nc')
    raw_llm_Aus.to_netcdf(f'{path}raw_llm_Aus_{var}.nc')
    raw_llm_EA.to_netcdf(f'{path}raw_llm_EA_{var}.nc')


In [26]:
def read_model(dataset, m, var):
    
    import xarray as xr, numpy as np
    # import custom functions
    import sys 
    sys.path.append('/home/563/kb6999/Functions') # use this if the function file is in a different directory to the notebook
    import frequently_used_functions as func

    # group the data by month (take the monthly mean)
#     dataset = dataset.resample(time='M').mean(dim='time').chunk(chunks={'time':-1, 'lat':120, 'lon':240})

    # use functions to calculate the monthly anomalies for the globe
    monthly_anom_glob = func.monthly_anomaly(dataset, '1850-01', '1881-01')
    
    #write file to netcdf
    monthly_anom_glob.to_netcdf(f'/g/data/w48/kb6999/Models/models_{var}_data/M_anom_Glob_{var}{m}.nc')
    #monthly_anom_glob.to_netcdf(f'/g/data/w48/kb6999/20CR_temp_members/R_anom_Glob_{var}{m}.nc', encoding={f'{var}': {'zlib':True,'complevel':2}}) # this compression version of reading in netcdfs takes over an hour per file 
    print(f'Read in model {m}, taken anomalies and written to netcdf file in folder "models_{var}_data"')
    
    return

In [14]:
path_models = '/g/data/w48/kb6999/Models/'

In [15]:
!ls /g/data/w48/kb6999/Models

M_SAM_index.nc


In [None]:
monthly_anom_glob.to_netcdf(f'{path_models}models_tas.nc')

In [None]:
xr.open_dataset(f'{path_models}models_tas+pr.nc')