# Read in SAM data and save as netcdfs 

In [1]:
import xarray as xr, matplotlib.pyplot as plt
from importlib import reload # need to use this if I edit a function file
import os
import numpy as np
import pandas as pd
import cartopy.crs as ccrs # to add in continents and change map projections 
from matplotlib.colors import LinearSegmentedColormap # to change colour bar????
import dask.diagnostics # dask allows you to check how long something is taking to load
import climtas # needed to count event statistics with a specified duration
import csv

In [2]:
# import custom functions
import sys 
sys.path.append('/home/563/kb6999/Functions') # use this if the function file is in a different directory to the notebook

import frequently_used_functions as func
import plotting_functions as fplot
import model_functions as funcM
import reanalysis_functions as funcR

## read in reanalysis pressure data to caluclate SAM

In [7]:
# !ls /g/data/ua8/LE_models/20CRv3/mean_daily/prmsl

In [6]:
path_p = '/g/data/ua8/LE_models/20CRv3/mean_daily/prmsl/*.nc'

In [8]:
# read in daily mean sea level pressure (mean across all 20CR members)
P_ds = xr.open_mfdataset(path_p)
del P_ds['time_bnds']
P_ds

Unnamed: 0,Array,Chunk
Bytes,15.96 GiB,90.98 MiB
Shape,"(65744, 181, 360)","(366, 181, 360)"
Count,540 Tasks,180 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 15.96 GiB 90.98 MiB Shape (65744, 181, 360) (366, 181, 360) Count 540 Tasks 180 Chunks Type float32 numpy.ndarray",360  181  65744,

Unnamed: 0,Array,Chunk
Bytes,15.96 GiB,90.98 MiB
Shape,"(65744, 181, 360)","(366, 181, 360)"
Count,540 Tasks,180 Chunks
Type,float32,numpy.ndarray


In [9]:
# resample to monthly
P_mon = P_ds.resample(time='M').min(dim='time')
P_mon

Unnamed: 0,Array,Chunk
Bytes,536.90 MiB,254.53 kiB
Shape,"(2160, 181, 360)","(1, 181, 360)"
Count,11340 Tasks,2160 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 536.90 MiB 254.53 kiB Shape (2160, 181, 360) (1, 181, 360) Count 11340 Tasks 2160 Chunks Type float32 numpy.ndarray",360  181  2160,

Unnamed: 0,Array,Chunk
Bytes,536.90 MiB,254.53 kiB
Shape,"(2160, 181, 360)","(1, 181, 360)"
Count,11340 Tasks,2160 Chunks
Type,float32,numpy.ndarray


In [10]:
with dask.diagnostics.ProgressBar():
    P_mon.to_netcdf('MSLP_mon.nc')

[########################################] | 100% Completed |  1min 53.7s


P*40�S and P*65�S are the normalized monthly zonal sea level pressure (SLP) at 40�S and 65�S
The 30-year period from 1971-2000 is used for the normalization period. Note that this period is arbitrary and a switch to a different period would change the exact SAM values but would be unlikely to significantly alter any trends.
(true zonal mean was simply the mean of the 144 points along the appropriate parallel)

In [11]:
# read in netcdf of monthly pressure
P_mon_ds = xr.open_dataset('MSLP_mon.nc')

In [12]:
# calculate the zonal mean mslp at 40 and 65S (to be used in calculating SAM)
P40S = P_mon_ds.sel(lat=-40).mean(dim='lon')
P65S = P_mon_ds.sel(lat=-65).mean(dim='lon')
P65S

In [13]:
# set the dates for the normalisation period
start_date = '1971'
end_date = '2000'

In [14]:
# calculate the anomaly for the normalised zonal mean, x, (x-mean)/std
anom40 = func.monthly_anomaly(P40S, start_date, end_date)
anom65 = func.monthly_anomaly(P65S, start_date, end_date)
anom65

In [15]:
# calculate the std for the normalised zonal mean, x, (x-mean)/std
std40 = anom40.sel(time=slice(start_date, end_date)).std(dim='time')
std65 = anom65.sel(time=slice(start_date, end_date)).std(dim='time')
std40

In [16]:
# normalise the pressure values by minusing the mean (ie calculating anomaly) and dividing by 30-year average
P40S_n = anom40/std40
P65S_n = anom65/std65

In [17]:
# calculate the SAM index by finding the normalised pressure difference between the two latitude bands
SAM = P40S_n - P65S_n
SAM

In [21]:
with dask.diagnostics.ProgressBar():
    SAM.prmsl.to_netcdf('R_SAM_index.nc')

## read in Abram and Marshall SAM index data

In [3]:
path = '/g/data/w48/kb6999'

In [19]:
# !ls /g/data/w48/kb6999

In [5]:
# read in daily mean sea level pressure (mean across all 20CR members)
P_df = pd.read_csv(f'{path}/abram2014sam_data.txt', delimiter='\t')
# set the date as teh index column
P_df = P_df.set_index(['age_AD'])
P_df

Unnamed: 0_level_0,SAM_Index,SAM_Index_err,RE,RE_crit,SAM_Index_70yloess,SAM_Index_+error_70yloess,SAM_Index_-error_70yloess,Marshall_SAM_Index
age_AD,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2007,-2.655296,3.217976,-0.015027,-0.237163,1.815786,4.752763,-1.119229,-0.083
2006,2.516332,3.217976,-0.015027,-0.237163,1.762162,4.682995,-1.156849,2.197
2005,0.250082,3.236963,-0.027041,-0.115012,1.708567,4.613422,-1.194603,0.987
2004,2.043958,3.236963,-0.027041,-0.115012,1.655018,4.544066,-1.232481,1.857
2003,0.834730,3.236963,-0.027041,-0.115012,1.601538,4.474956,-1.270464,1.037
...,...,...,...,...,...,...,...,...
1004,-1.813489,3.011280,0.111179,0.006600,-1.579751,1.431294,-4.590993,
1003,-1.529136,3.011280,0.111179,0.006600,-1.563468,1.447598,-4.574714,
1002,-1.007336,3.011280,0.111179,0.006600,-1.548646,1.462439,-4.559895,
1001,-1.523829,3.011280,0.111179,0.006600,-1.535509,1.475590,-4.546760,


In [6]:
# convert to xarra
P_xr = P_df.to_xarray()
P_xr

In [7]:
# rename age to time
P_time = P_xr.rename({'age_AD':'time'})
# reorder time from smallest to largest
P_time = P_time.sortby('time', ascending=True)
P_time

In [9]:
# select out years from 1850-present
start_date, end_date = '1850','2007'
P_final = P_time.sel(time=slice(start_date, end_date))
time = pd.date_range(pd.to_datetime(start_date), pd.to_datetime(end_date), freq='YS')
P_final['time'] = time
P_final

In [10]:
# convert to netcdf
with dask.diagnostics.ProgressBar():
    P_final.to_netcdf(f'{path}/SAM_data_abram_1850_2007.nc')