#### This notebook processes and analyzes daily and monthly zonal wind data (ua) from Atmospheric Model Intercomparison Project (AMIP) simulations to compute the Southern Annular Mode (SAM) index across multiple climate models. Daily SAM index will be then used to calculate SAM persistence timescale in later notebooks.

##### import packages

In [3]:
from utils import *
import pandas as pd
import numpy as np
import xarray as xr
from sklearn.decomposition import PCA
import os
import pickle

##### define functions

In [4]:
def calculate_jet_latitude(file_name, level):
    """
    Calculate the daily latitude of the jet stream for a given pressure level 
    from a NetCDF dataset.

    Parameters:
    file_name : str
        Path to NetCDF file containing wind data.
    level : int
        Index of the pressure level in the dataset.

    Returns:
    jet_lat_pfj_daily_zonal_mean : pandas.Series
        Daily jet latitudes indexed by time.
    """
    # Load NetCDF dataset using xarray
    dataset = xr.open_dataset(file_name, decode_cf=True)

    # Select zonal wind data at the specified pressure level
    zonal_wind = dataset['ua'].isel(plev=level)

    # Compute daily zonal mean wind between -65° and -20° latitude
    ua_pfj_zonal_mean = zonal_wind.sel(lat=slice(-65, -20)).mean(dim='lon')

    # Get number of time steps (days)
    numdays = len(dataset.variables['time'][:])

    # Initialize arrays to store daily jet latitude and wind speed
    arrays = [np.full(numdays, np.nan) for _ in range(2)]
    jet_lat_pfj_daily_zonal_mean, jet_speed_pfj_daily_zonal_mean = arrays

    # Loop over each day and calculate the jet latitude and strength
    for i in range(numdays):
        jet_lat_pfj_daily_zonal_mean[i], jet_speed_pfj_daily_zonal_mean[i] = find_jet_lat(
            ua_pfj_zonal_mean[i, :], ua_pfj_zonal_mean.lat.values
        )

    # Return the jet latitude series with time index
    jet_lat_pfj_daily_zonal_mean = pd.Series(jet_lat_pfj_daily_zonal_mean, index=dataset.variables['time'][:])
    return jet_lat_pfj_daily_zonal_mean

In [5]:
def calculate_sam_models(file):
    """
    Calculate the leading principal component (PC) time series of zonal wind anomalies
    over the Southern Hemisphere mid-to-high latitudes for CMIP6 model outputs, often used as a proxy 
    for the Southern Annular Mode (SAM).

    Parameters:
    file : str
        Path to the NetCDF file containing 'ua' (zonal wind) data.

    Returns:
    leading_pc_timeseries : pandas.Series
        Time series of the leading principal component of the zonal wind anomalies.
    """

    # 1. Load dataset and subset the zonal wind (ua) field
    # Select latitudes between -70° and -20° and pressure levels between 1000 hPa and 200 hPa
    ua_subset = xr.open_dataset(file)['ua'].sel(lat=slice(-70, -20), plev=slice(100000, 20000)).mean('lon')
    print('read in done')

    # 2. Remove the seasonal cycle (de-seasonalize)
    # Compute the climatological daily mean (seasonal cycle) by grouping by day of year
    climatology = ua_subset.groupby("time.dayofyear").mean("time")

    # Subtract the climatology from the data to get daily anomalies
    ua_anomalies = ua_subset.groupby("time.dayofyear") - climatology

    # 3. Flatten the 2D fields (plev x lat) into a 1D feature vector per time step for PCA
    ua_reshaped = ua_anomalies.stack(features=("plev", "lat")).fillna(0)
    print(ua_reshaped.shape)

    # 4. Apply Principal Component Analysis (PCA)
    # Keep only the leading component (most dominant mode of variability)
    pca = PCA(n_components=1)
    leading_pc = pca.fit_transform(ua_reshaped)

    # 5. Convert the leading PC into a pandas Series with time index
    leading_pc_timeseries = pd.Series(leading_pc[:, 0], index=ua_subset.time.values)

    return leading_pc_timeseries


#### Calculating daily SAM indices for each model

In [None]:
sam_models_daily = {}
#directory = 'directory that contains daily ua from amip runs for all models'
directory = '/OWC/huiyu/CMIP6/ua/amip_daily/combined'
file_list = os.listdir(directory)
for file in file_list:
    file_name = os.path.join(directory, file)
    print(f"Processing file: {file_name}")
    model_name = file.split('_')[2]
    if file_name:
        sam_models_daily[model_name] = calculate_sam_models(file_name)
        if isinstance(sam_models_daily[model_name].index, pd.DatetimeIndex):
            print("The index is a pandas DatetimeIndex.")
        else:
            sam_models_daily[model_name].index = cftime_to_datetime(sam_models_daily[model_name].index)
            print("The index is not a pandas DatetimeIndex.")

        sam_models_daily[model_name] = sam_models_daily[model_name].loc['2000':'2014']

### Calculating monthly SAM indices for each model

In [None]:
sam_models_monthly = {}
climatological_jet_lat_models_monthly = {}
#directory = 'directory that contains monthly ua from amip runs for all models'
directory = '/OWC/huiyu/CMIP6/ua/amip_monthly/combined'
file_list = os.listdir(directory)
for file in file_list:
    if file.startswith('ua'):
        file_name = os.path.join(directory, file)
        print(f"Processing file: {file_name}")
        model_name = file.split('_')[2]
        if file_name:
            sam_models_monthly[model_name] = calculate_sam_models(file_name)
            jet_lat = calculate_jet_latitude(file_name, 2) # 850hPa
            if isinstance(sam_models_monthly[model_name].index, pd.DatetimeIndex):
                print("The index is a pandas DatetimeIndex.")
            else:
                sam_models_monthly[model_name].index = cftime_to_datetime(sam_models_monthly[model_name].index)
                jet_lat.index = cftime_to_datetime(jet_lat.index)
                print("The index is not a pandas DatetimeIndex.")

            sam_models_monthly[model_name] = sam_models_monthly[model_name].loc['2000':'2014']
            climatological_jet_lat_models_monthly[model_name] = jet_lat.loc['2000':'2014'].mean()

In [8]:
save_directory = '../data'

# Ensure the directory exists
os.makedirs(save_directory, exist_ok=True)

# Save the dictionaries using pickle
#with open(os.path.join(save_directory, 'sam_models.pkl'), 'wb') as f:
    #pickle.dump(sam_models_daily, f)

with open(os.path.join(save_directory, 'sam_models_monthly_2000_2014.pkl'), 'wb') as f:
    pickle.dump(sam_models_monthly, f)

with open(os.path.join(save_directory, 'climatological_jet_lat_models_monthly_2000_2014.pkl'), 'wb') as f:
    pickle.dump(climatological_jet_lat_models_monthly, f)