<a name="top"></a>
<div style="width:1000 px">

<div style="float:right; width:98 px; height:98px;">
<img src="https://cdn.miami.edu/_assets-common/images/system/um-logo-gray-bg.png" alt="Miami Logo" style="height: 98px;">
</div>

<div style="float:right; width:98 px; height:98px;">
<img src="https://media.licdn.com/dms/image/C4E0BAQFlOZSAJABP4w/company-logo_200_200/0/1548285168598?e=2147483647&v=beta&t=g4jl8rEhB7HLJuNZhU6OkJWHW4cul_y9Kj_aoD7p0_Y" alt="STI Logo" style="height: 98px;">
</div>


<h1>Compute Harmonics on the sFWRD Database</h1>
By: Tyler M. Fenske
    <br>
Last Edited: 2024-02-01
<br>
<br>    
<br>
This notebook applies harmonics analysis to various forecast and renalysis data as part of the data pre-processing for future forecasting work. 
<br>    
<br>
Note: some of the datasets were too large to be processed all at once. The work around is to use Split_BigData.ipynb that takes a given model and splits each yearly variable file into 4 distinct quadrant files spatially and rejoining them after harmonic processing. 
<br>    
<br>
<div style="clear:both"></div>
</div>

<hr style="height:2px;">

### Imports & Functions

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import xarray as xr
import pandas as pd
import os
import glob
from datetime import datetime
import scipy

In [2]:
%run File_concat_mod_functions.ipynb
#include many of the existing functions to handle the NOAA S2S database

In [3]:
def model_concat(model_list, model):

    '''
    Takes a list of files and concatenates them along the time dimension.

    Inputs:
    
    model_list: (list of str) list of filenames to be opened and concatenates them along the time dimension 
    model: (str) one of the six available model outputs in the database (does not work for UFS)
    
    Outputs:

    df1: (xarray dataset) combined dataset of all files along the time dimension

    '''

    

    if model == 'CONUS404':
        cc_dim = 'Time'
        df1 = xr.open_dataset(model_list[0]).astype('float32') #.chunk(get_chunk_database(model))
    
        for f in model_list[1:]:
            df2 = xr.open_dataset(f).astype('float32') #.chunk(get_chunk_database(model))
            df1 = xr.concat([df1, df2], dim = cc_dim)
    else:
        cc_dim = 'time'
    
        df1 = xr.open_dataset(model_list[0]).astype('float32') #.chunk(get_chunk_database(model))
    
        for f in model_list[1:]:
            df2 = xr.open_dataset(f).astype('float32')#.chunk(get_chunk_database(model))
            print(f)
            df1 = xr.concat([df1, df2], dim = cc_dim)
    
    return df1

In [4]:
def open_database_abs_file_xr(model, var, year):
    
    '''This function opens and returns an xarray dataset for a given:
       model : one of the six available model outputs in the database (does not work for UFS)
       var   : any var present in the database (must match an available var or will throw an error)
       year  : same as var, but for year instead'''

    path = f'/raid60B/s2sfire/NOAA_S2S/database_files_final/{model}/'
    base = get_filename(model)
    name = f'{path}{var}_{base}_Abs_{str(year)}.nc'

    df = xr.open_dataset(name, decode_times=True)[var]
    return df

In [5]:
encoding = {
    'time': {
        'units': 'hours since 2000-01-01 0Z',
        'calendar': 'standard'
    }
}

### Harmonics Function

In [None]:
def compute_harmonics(yt, k=10, offbyone=False):
    
    '''This function is based off of Section 8.4 (Frequency Domain - 1. Harmonic Analysis)
        from the textbook Statistical Methods in the Atmospheric Sciences by Wilks (2006).
        This function calculates a specified number of harmonics, or fitted cosine curves,
        for a given time series. It functions very similiarly to fourier analysis.
        
        Inputs: 
        yt : a 1-d time series to perform harmonics analysis on
        k  : the number of harmonics to compute
        
        Outputs: 
        yt0  : the sum of the k-harmonics computed
        ybar : the mean of yt
        C    : the coefficient for each harmonic
        w    : the frequency adjustment for each harmonic
        
        KEYWORDS
        offbyone : use if your iterator should start at 1 (Python iterators start at 0)
                   (do not use if unsure; for long time series (100+), it won't matter)'''

    n = yt.shape[-1]                                                                                                          # Make time the last dimension 
                       
    if (k > int(n/2)):                                                                                                        # Ensure the number of harmonics is within the valid range
        k = int(n/2)                                                                                                          # Set k to the maximum valid number of harmonics
                       
    x    = np.linspace(1, n, n) - 1                                                                                           # Create an array of time indices
    ybar = np.nanmean(yt, axis=-1, keepdims=True)                                                                             # Compute the mean of yt along the time dimension
    freq = 2 * np.pi * x / n                                                                                                  # Compute the frequency array
                   
    harmonics_range = np.arange(1, k + 1)                                                                                     # Create an array of harmonic indices
                   
    cos_terms = np.cos(freq * harmonics_range[:, np.newaxis])[np.newaxis, np.newaxis, ...]                                    # Compute cosine terms for harmonics
    sin_terms = np.sin(freq * harmonics_range[:, np.newaxis])[np.newaxis, np.newaxis, ...]                                    # Compute sine terms for harmonics
                       
    A = (2 / n) * np.nansum(yt[:, :, np.newaxis, :] * cos_terms, axis=-1)                                                     # Compute cosine coefficients
    B = (2 / n) * np.nansum(yt[:, :, np.newaxis, :] * sin_terms, axis=-1)                                                     # Compute sine coefficients
                   
    C = np.sqrt(A**2 + B**2)                                                                                                  # Compute the amplitude of the harmonics
                   
    w = np.where(A == 0, np.pi / 2, np.arctan(B / A))                                                                         # Compute the phase angle
    w = np.where(A < 0, w + np.pi, w)                                                                                         # Adjust phase angle for negative cosine coefficients
    w = np.where(w >= 2 * np.pi, w - 2 * np.pi, w)                                                                            # Ensure phase angle is within the range [0, 2π]
                   
    if (offbyone):                                                                                                            # Check for off-by-one error
        w += np.deg2rad(1 / n)                                                                                                # Adjust phase angle to correct off-by-one error
                   
    yt0 = np.repeat(ybar, n, axis=-1)                                                                                         # Initialize the reconstructed time series with the mean
                   
    freq_adjusted = freq[np.newaxis, np.newaxis, :, np.newaxis]                                                               # Adjust frequency array for broadcasting
                   
    harmonics_range = harmonics_range[np.newaxis, np.newaxis, np.newaxis, :]                                                  # Adjust harmonic range array for broadcasting
                   
    yt0 = yt0 + np.sum(C[:, :, np.newaxis, :] * np.cos(harmonics_range * freq_adjusted - w[:, :, np.newaxis, :]), axis=-1)    # Reconstruct the time series using harmonics

    return (yt0, ybar.squeeze(axis=-1), C, w)                                                                                 # Return the reconstructed time series, mean, amplitude, and phase angle

In [6]:
def compute_harmonics_xr(data_xr, k=10, large_data=False):
    '''This function is based off of Section 8.4 (Frequency Domain - 1. Harmonic Analysis)
        from the textbook Statistical Methods in the Atmospheric Sciences by Wilks (2006).
        This function calculates a specified number of harmonics, or fitted cosine curves,
        for a given time series. It functions very similiarly to fourier analysis.
        
        Inputs: 
        data_xr : a 3-d (time, lat, lon) chunked dataarray to perform harmonics analysis on
        k       : the number of harmonics to compute


        Keywords:
        large_data : Set to true if dealing with very large data (50+ GB); used for HRRR and CONUS404
        
        Outputs: 
        period_normal        : the sum of the k-harmonics computed
        anomaly              : the raw input data minus the period normal data
        harmonics_parameters : the coefficients and frequency adjustments for each harmonic'''

    n = data_xr.time.shape[0]

    if (k > int(n/2)):
        k = int(n/2)

    x    = np.linspace(1,n,n)-1
    freq = 2*np.pi*x/n
            
    harmonics_range = np.arange(1,k+1)

    base_terms = xr.DataArray(harmonics_range*freq[:,np.newaxis], 
                              dims=('time','harmonic'), 
                              coords={'time':data_xr.time.values,'harmonic':harmonics_range}).astype('float32')
    cos_terms  = np.cos(base_terms)
    sin_terms  = np.sin(base_terms)

    if (large_data):
        empty_array = np.zeros((k,data_xr.latitude.shape[0],data_xr.longitude.shape[0]))
        A = xr.DataArray(empty_array, dims=('harmonic','latitude','longitude'),
                         coords={'harmonic':harmonics_range,'latitude':data_xr.latitude,'longitude':data_xr.longitude}).astype('float32')
        B = A.copy(deep=True)
        print('Harmonics coefficients computed: ', end='')
        for i in harmonics_range:
            A[i-1,:,:] = (2/n)*(cos_terms.sel(harmonic=i)*data_xr).sum(dim='time')
            B[i-1,:,:] = (2/n)*(sin_terms.sel(harmonic=i)*data_xr).sum(dim='time')
            end = ',' if i < 10 else ''
            print(i, end=end)
        print(' Completed.')

    else:
        A = (2/n)*(cos_terms*data_xr).sum(dim='time')
        B = (2/n)*(sin_terms*data_xr).sum(dim='time')
    
    C = (A**2 + B**2)**.5

    w = np.where(A == 0, np.pi/2, np.arctan(B/A))
    w = np.where(A < 0, w + np.pi, w)
    w = np.where(w >= 2*np.pi, w - 2*np.pi, w)

    C = xr.DataArray(C, dims=('harmonic','latitude','longitude'),
                     coords={'harmonic':harmonics_range,'latitude':data_xr.latitude,'longitude':data_xr.longitude})
    w = xr.DataArray(w, dims=('harmonic','latitude','longitude'),
                     coords={'harmonic':harmonics_range,'latitude':data_xr.latitude,'longitude':data_xr.longitude})

    if (large_data):
        print('Harmonics added to mean to compute period normal: ', end='')
        period_normal = data_xr.mean(dim='time') + ((C.sel(harmonic=1))*np.cos(base_terms.sel(harmonic=1) - w.sel(harmonic=1)))
        print('1', end=',')
        for i in harmonics_range[1:]:
            period_normal = period_normal + ((C.sel(harmonic=i))*np.cos(base_terms.sel(harmonic=i) - w.sel(harmonic=i)))
            end = ',' if i < 10 else ''
            print(i, end=end)
        print(' Completed.')      
    else:
        period_normal = data_xr.mean(dim='time') + (C*np.cos(base_terms - w)).sum(dim='harmonic')
    
    period_normal = period_normal.transpose('time','latitude','longitude').astype('float32')
    period_normal = period_normal.assign_coords({'time': ('time', data_xr.time.values)})
    period_normal.attrs = data_xr.attrs.copy()
    for coord in data_xr.coords:
        period_normal.coords[coord].attrs = data_xr.coords[coord].attrs.copy()

    anomaly = data_xr - period_normal
    anomaly = anomaly.transpose('time','latitude','longitude').astype('float32')
    anomaly = anomaly.assign_coords({'time': ('time', data_xr.time.values)})
    anomaly.attrs = data_xr.attrs.copy()
    for coord in data_xr.coords:
        anomaly.coords[coord].attrs = data_xr.coords[coord].attrs.copy()

    period_normal        = period_normal.to_dataset(name=f'{data_xr.name}_periodnorm')
    anomaly              = anomaly.to_dataset(name=f'{data_xr.name}_anom')
    
    harmonics_parameters = xr.Dataset({'Constants': C, "Phis": w})
    
    return period_normal, anomaly, harmonics_parameters

## Application of the Harmonics Function

Organized by model as each needs handled differently.

### Initial Testing

In [None]:
# raw_df = xr.open_mfdataset(f'{filepath}t2{filebase}*{suffix}', decode_times=True).chunk({'time': -1, 'latitude': 100, 'longitude': 100})

# era5_period_normal, era5_anomaly, era5_harmonics_parameters = compute_harmonics_xr(raw_df['t2'])

# era5_period_normal.attrs = raw_df.attrs.copy()
# era5_anomaly.attrs       = raw_df.attrs.copy()

# era5_period_normal.attrs.update({'File_creation_date': datetime.now().strftime("%Y-%m-%d %H:%M:%S")})
# era5_anomaly.attrs.update({'File_creation_date': datetime.now().strftime("%Y-%m-%d %H:%M:%S")})

# era5_period_normal['t2_periodnorm'].chunk({'time': 500, 'latitude': -1, 'longitude': -1})

# for year, group in era5_period_normal.groupby('time.year'):
#     filename = f'temp/era5_testdata_{year}.nc'
#     group.to_netcdf(filename)

# test  = xr.open_dataset('temp/era5_test.nc')
# test1 = xr.open_dataset('temp/era5_testdata_2011.nc')
# test2 = xr.open_dataset('temp/era5_testdata_2012.nc')

# %%time
# era5_period_normal.to_netcdf('temp/era5_test2.nc')
# test2 = xr.open_dataset('temp/era5_test2.nc')
# raw_df[:,0,0].plot(aspect=3, size=4)
# test2['__xarray_dataarray_variable__'][:,0,0].plot()
# (raw_df[:,0,0] - test2['__xarray_dataarray_variable__'][:,0,0]).plot(aspect=3, size=4)
# era5_C.to_netcdf('temp/era5_harmonics_consts.nc')
# era5_C = xr.open_dataset('temp/era5_harmonics_consts.nc')
# old = xr.open_dataset('../harmonics_parameters/ERA5/t2m_Abs_ERA5_harmonics_parameters.nc')

In [None]:
# %%time
# var = vars[9]
# datatype = 'Abs'
# raw_df = xr.open_mfdataset(f'{filepath}{var}{filebase}{datatype}*.nc', decode_times=True, engine='netcdf4').load()
# data_xr = raw_df[var]
# k = 10

# n = data_xr.time.shape[0]
# if (k > int(n/2)):
#     k = int(n/2)
# x    = np.linspace(1,n,n)-1
# freq = 2*np.pi*x/n
        
# harmonics_range = np.arange(1,k+1)
# base_terms = xr.DataArray(harmonics_range*freq[:,np.newaxis], 
#                           dims=('time','harmonic')).astype('float32')
# cos_terms  = np.cos(base_terms)
# sin_terms  = np.sin(base_terms)

### ERA5

In [None]:
k = 10

model    = 'ERA5'
filepath = '../database_files_final/ERA5/'
filebase = '_ERA5_REANALYSIS_'

vars = [
    'cape', 'd2',  'ffwi',  'gust', 'hdw', 
    'lspr', 'pbl', 'prate', 'rh',   't2',
    'tcp',  'tp',  'u10',   'v10',  'vpd',
    'vsm',  'wd',  'ws']

In [None]:
for var in vars:
    for datatype in ['Abs', 'MIN', 'MAX', 'AVG']:
        raw_df = xr.open_mfdataset(f'{filepath}{var}{filebase}{datatype}*.nc', decode_times=True, engine='netcdf4').load()
      
        print(f'Data loaded, applying harmonics to: {var} {datatype}... ')
                
        period_normal, anomaly, harmonics_parameters = compute_harmonics_xr(raw_df[var], k=k)

        period_normal.attrs = raw_df.attrs.copy()
        anomaly.attrs       = raw_df.attrs.copy()

        period_normal.attrs.update({'File_creation_date': datetime.now().strftime("%Y-%m-%d %H:%M:%S")})
        anomaly.attrs.update({'File_creation_date': datetime.now().strftime("%Y-%m-%d %H:%M:%S")})

        period_normal.to_netcdf(f'temp/{model}_{var}_{datatype}_periodnorm.nc', encoding=encoding)
        anomaly.to_netcdf(f'temp/{model}_{var}_{datatype}_anomaly.nc', encoding=encoding)
        harmonics_parameters.to_netcdf(f'../harmonics_parameters/{model}/{var}_{model}_{datatype}_harmonics_parameters.nc')

        print('Full temp files written, splitting into yearly files... ')

        period_normal = xr.open_dataset(f'temp/{model}_{var}_{datatype}_periodnorm.nc')#, chunks={'time': 100})
        anomaly       = xr.open_dataset(f'temp/{model}_{var}_{datatype}_anomaly.nc')#, chunks={'time': 100})

        for year, group in period_normal.groupby('time.year'):
            filename = f'{filepath}Period_Normal/{var}{filebase}{datatype}_periodnorm_{year}.nc'
            group.to_netcdf(filename, encoding=encoding)

        for year, group in anomaly.groupby('time.year'):
            filename = f'{filepath}Anomaly/{var}{filebase}{datatype}_anom_{year}.nc'
            group.to_netcdf(filename, encoding=encoding)
        
        print(f'Complete!')

### NCEP

In [None]:
k = 10

model    = 'NCEP'
filepath = '../database_files_final/NCEP/'
filebase = '_NCEP_REANALYSIS_V2_'

vars = [
    'ffwi', 'hdw', 'prate', 'rh',  'sm', 
    't2',   'u10', 'v10',   'vpd', 'vsm', 
    'wd',   'ws']

In [None]:
for var in vars:
    for datatype in ['Abs', 'MIN', 'MAX', 'AVG']:
        raw_df = xr.open_mfdataset(f'{filepath}{var}{filebase}{datatype}*.nc', decode_times=True, engine='netcdf4').load()
      
        print(f'Data loaded, applying harmonics to: {var} {datatype}... ')
                
        period_normal, anomaly, harmonics_parameters = compute_harmonics_xr(raw_df[var], k=k)

        period_normal.attrs = raw_df.attrs.copy()
        anomaly.attrs       = raw_df.attrs.copy()

        period_normal.attrs.update({'File_creation_date': datetime.now().strftime("%Y-%m-%d %H:%M:%S")})
        anomaly.attrs.update({'File_creation_date': datetime.now().strftime("%Y-%m-%d %H:%M:%S")})

        period_normal.to_netcdf(f'temp/{model}_{var}_{datatype}_periodnorm.nc', encoding=encoding)
        anomaly.to_netcdf(f'temp/{model}_{var}_{datatype}_anomaly.nc', encoding=encoding)
        harmonics_parameters.to_netcdf(f'../harmonics_parameters/{model}/{var}_{model}_{datatype}_harmonics_parameters.nc')

        print('Full temp files written, splitting into yearly files... ')

        period_normal = xr.open_dataset(f'temp/{model}_{var}_{datatype}_periodnorm.nc')#, chunks={'time': 100})
        anomaly       = xr.open_dataset(f'temp/{model}_{var}_{datatype}_anomaly.nc')#, chunks={'time': 100})

        for year, group in period_normal.groupby('time.year'):
            filename = f'{filepath}Period_Normal/{var}{filebase}{datatype}_periodnorm_{year}.nc'
            group.to_netcdf(filename, encoding=encoding)

        for year, group in anomaly.groupby('time.year'):
            filename = f'{filepath}Anomaly/{var}{filebase}{datatype}_anom_{year}.nc'
            group.to_netcdf(filename, encoding=encoding)
        
        print(f'Complete!')

### NARR

In [None]:
k = 10

model    = 'NARR'
filepath = '../database_files_final/NARR/'
filebase = '_NARR_REANALYSIS_'

vars = [
    'ffwi', 'hdw', 'pbl', 'prate', 'rh', 
    'sm',   't2',  'tp',  'u10',   'v10', 
    'vpd',  'wd',  'ws']

In [None]:
for var in vars:
    for datatype in ['Abs', 'MIN', 'MAX', 'AVG']:
        raw_df = xr.open_mfdataset(f'{filepath}{var}{filebase}{datatype}*.nc', decode_times=True, engine='netcdf4').load()
      
        print(f'Data loaded, applying harmonics to: {var} {datatype}... ')
                
        period_normal, anomaly, harmonics_parameters = compute_harmonics_xr(raw_df[var], k=k)

        period_normal.attrs = raw_df.attrs.copy()
        anomaly.attrs       = raw_df.attrs.copy()

        period_normal.attrs.update({'File_creation_date': datetime.now().strftime("%Y-%m-%d %H:%M:%S")})
        anomaly.attrs.update({'File_creation_date': datetime.now().strftime("%Y-%m-%d %H:%M:%S")})

        period_normal.to_netcdf(f'temp/{model}_{var}_{datatype}_periodnorm.nc', encoding=encoding)
        anomaly.to_netcdf(f'temp/{model}_{var}_{datatype}_anomaly.nc', encoding=encoding)
        harmonics_parameters.to_netcdf(f'../harmonics_parameters/{model}/{var}_{model}_{datatype}_harmonics_parameters.nc')

        print('Full temp files written, splitting into yearly files... ')

        period_normal = xr.open_dataset(f'temp/{model}_{var}_{datatype}_periodnorm.nc')#, chunks={'time': 100})
        anomaly       = xr.open_dataset(f'temp/{model}_{var}_{datatype}_anomaly.nc')#, chunks={'time': 100})

        for year, group in period_normal.groupby('time.year'):
            filename = f'{filepath}Period_Normal/{var}{filebase}{datatype}_periodnorm_{year}.nc'
            group.to_netcdf(filename, encoding=encoding)

        for year, group in anomaly.groupby('time.year'):
            filename = f'{filepath}Anomaly/{var}{filebase}{datatype}_anom_{year}.nc'
            group.to_netcdf(filename, encoding=encoding)
        
        print(f'Complete!')

### NAM

In [None]:
k = 10

model    = 'NAM'
filepath = '../database_files_final/NAM/'
filebase = '_NAM_HISTORICAL_'

vars = [
    'cape', 'ffwi', 'gust', 'hdw',  'prate', 
    'rh',   'sm',   't2',   'tp',  'u10',  
    'v10',  'vpd',  'wd',   'ws']

In [None]:
for var in vars:
    for datatype in ['Abs', 'MIN', 'MAX', 'AVG']:
        raw_df = xr.open_mfdataset(f'{filepath}{var}{filebase}{datatype}*.nc', decode_times=True, engine='netcdf4').load()
      
        print(f'Data loaded, applying harmonics to: {var} {datatype}... ')
                
        large_data = True if (datatype == 'Abs') else False
                
        period_normal, anomaly, harmonics_parameters = compute_harmonics_xr(raw_df[var], k=k, large_data=large_data)

        period_normal.attrs = raw_df.attrs.copy()
        anomaly.attrs       = raw_df.attrs.copy()

        period_normal.attrs.update({'File_creation_date': datetime.now().strftime("%Y-%m-%d %H:%M:%S")})
        anomaly.attrs.update({'File_creation_date': datetime.now().strftime("%Y-%m-%d %H:%M:%S")})

        period_normal.to_netcdf(f'temp/{model}_{var}_{datatype}_periodnorm.nc', encoding=encoding)
        anomaly.to_netcdf(f'temp/{model}_{var}_{datatype}_anomaly.nc', encoding=encoding)
        harmonics_parameters.to_netcdf(f'../harmonics_parameters/{model}/{var}_{model}_{datatype}_harmonics_parameters.nc')

        print('Full temp files written, splitting into yearly files... ')

        period_normal = xr.open_dataset(f'temp/{model}_{var}_{datatype}_periodnorm.nc')
        anomaly       = xr.open_dataset(f'temp/{model}_{var}_{datatype}_anomaly.nc')

        for year, group in period_normal.groupby('time.year'):
            filename = f'{filepath}Period_Normal/{var}{filebase}{datatype}_periodnorm_{year}.nc'
            group.to_netcdf(filename, encoding=encoding)

        for year, group in anomaly.groupby('time.year'):
            filename = f'{filepath}Anomaly/{var}{filebase}{datatype}_anom_{year}.nc'
            group.to_netcdf(filename, encoding=encoding)
        
        print(f'Complete!')

### HRRR

In [7]:
k = 10

model    = 'HRRR'
filepath = '../database_files_final/HRRR/'
filebase = '_HRRR_HISTORICAL_'

vars = [
    'cape', 'd2',    'ffwi', 'gust', 'hdw', 
    'pbl',  'prate', 'rh',   'sm',   't2',
    'tp',   'u10',   'v10',  'vpd',  'wd', 
    'ws']

In [8]:
for var in vars:
    for datatype in ['Abs', 'MIN', 'MAX', 'AVG']:
        raw_df = xr.open_mfdataset(f'{filepath}{var}{filebase}{datatype}*.nc', decode_times=True, engine='netcdf4').load()
      
        print(f'Data loaded, applying harmonics to: {var} {datatype}... ')
                
        period_normal, anomaly, harmonics_parameters = compute_harmonics_xr(raw_df[var], k=k, large_data=True)

        period_normal.attrs = raw_df.attrs.copy()
        anomaly.attrs       = raw_df.attrs.copy()

        period_normal.attrs.update({'File_creation_date': datetime.now().strftime("%Y-%m-%d %H:%M:%S")})
        anomaly.attrs.update({'File_creation_date': datetime.now().strftime("%Y-%m-%d %H:%M:%S")})

        period_normal.to_netcdf(f'temp/{model}_{var}_{datatype}_periodnorm.nc', encoding=encoding)
        anomaly.to_netcdf(f'temp/{model}_{var}_{datatype}_anomaly.nc', encoding=encoding)
        harmonics_parameters.to_netcdf(f'../harmonics_parameters/{model}/{var}_{model}_{datatype}_harmonics_parameters.nc')

        print('Full temp files written, splitting into yearly files... ')

        period_normal = xr.open_dataset(f'temp/{model}_{var}_{datatype}_periodnorm.nc')
        anomaly       = xr.open_dataset(f'temp/{model}_{var}_{datatype}_anomaly.nc')

        for year, group in period_normal.groupby('time.year'):
            filename = f'{filepath}Period_Normal/{var}{filebase}{datatype}_periodnorm_{year}.nc'
            group.to_netcdf(filename, encoding=encoding)

        for year, group in anomaly.groupby('time.year'):
            filename = f'{filepath}Anomaly/{var}{filebase}{datatype}_anom_{year}.nc'
            group.to_netcdf(filename, encoding=encoding)
        
        print(f'Complete!')

Struct() takes at most 1 argument (3 given)


Data loaded, applying harmonics to: v10 Abs... 
Harmonics coefficients computed: 1,2,3,4,5,6,7,8,9,10 Completed.
Harmonics added to mean to compute period normal: 1,2,3,4,5,6,7,8,9,10 Completed.
Full temp files written, splitting into yearly files... 
Complete!
Data loaded, applying harmonics to: v10 MIN... 
Harmonics coefficients computed: 1,2,3,4,5,6,7,8,9,10 Completed.
Harmonics added to mean to compute period normal: 1,2,3,4,5,6,7,8,9,10 Completed.
Full temp files written, splitting into yearly files... 
Complete!
Data loaded, applying harmonics to: v10 MAX... 
Harmonics coefficients computed: 1,2,3,4,5,6,7,8,9,10 Completed.
Harmonics added to mean to compute period normal: 1,2,3,4,5,6,7,8,9,10 Completed.
Full temp files written, splitting into yearly files... 
Complete!
Data loaded, applying harmonics to: v10 AVG... 
Harmonics coefficients computed: 1,2,3,4,5,6,7,8,9,10 Completed.
Harmonics added to mean to compute period normal: 1,2,3,4,5,6,7,8,9,10 Completed.
Full temp files wri

### CONUS404

In [7]:
k = 10

model    = 'CONUS404'
filepath = '../database_files_final/CONUS404/'
filebase = '_CONUS404_ANALYSIS_'

vars = [
    'd2',    'ffwi', 'hdw',    'mlcape', 'pbl', 
    'prate', 'rh',   'sbcape', 't2',     'tp', 
    'u10',   'v10',  'vpd',    'vsm',    'wd', 
    'ws']

In [34]:
for var in vars:
    for datatype in ['MIN', 'MAX', 'AVG']:
        raw_df = xr.open_mfdataset(f'{filepath}{var}{filebase}{datatype}*.nc', decode_times=True, engine='netcdf4').load()
      
        print(f'Data loaded, applying harmonics to: {var} {datatype}... ')
                
        period_normal, anomaly, harmonics_parameters = compute_harmonics_xr(raw_df[var], k=k, large_data=True)

        period_normal.attrs = raw_df.attrs.copy()
        anomaly.attrs       = raw_df.attrs.copy()

        period_normal.attrs.update({'File_creation_date': datetime.now().strftime("%Y-%m-%d %H:%M:%S")})
        anomaly.attrs.update({'File_creation_date': datetime.now().strftime("%Y-%m-%d %H:%M:%S")})

        period_normal.to_netcdf(f'temp/{model}_{var}_{datatype}_periodnorm.nc', encoding=encoding)
        anomaly.to_netcdf(f'temp/{model}_{var}_{datatype}_anomaly.nc', encoding=encoding)
        harmonics_parameters.to_netcdf(f'../harmonics_parameters/{model}/{var}_{model}_{datatype}_harmonics_parameters.nc')

        print('Full temp files written, splitting into yearly files... ')

        period_normal = xr.open_dataset(f'temp/{model}_{var}_{datatype}_periodnorm.nc')#, chunks={'time': 100})
        anomaly       = xr.open_dataset(f'temp/{model}_{var}_{datatype}_anomaly.nc')#, chunks={'time': 100})

        for year, group in period_normal.groupby('time.year'):
            filename = f'{filepath}Period_Normal/{var}{filebase}{datatype}_periodnorm_{year}.nc'
            group.to_netcdf(filename, encoding=encoding)

        for year, group in anomaly.groupby('time.year'):
            filename = f'{filepath}Anomaly/{var}{filebase}{datatype}_anom_{year}.nc'
            group.to_netcdf(filename, encoding=encoding)
        
        print(f'Complete!')

Data loaded, applying harmonics to: wd MIN... 
Harmonics coefficients computed: 1,2,3,4,5,6,7,8,9,10 Completed.
Harmonics added to mean to compute period normal: 1,2,3,4,5,6,7,8,9,10 Completed.
Full temp files written, splitting into yearly files... 
Complete!
Data loaded, applying harmonics to: wd MAX... 
Harmonics coefficients computed: 1,2,3,4,5,6,7,8,9,10 Completed.
Harmonics added to mean to compute period normal: 1,2,3,4,5,6,7,8,9,10 Completed.
Full temp files written, splitting into yearly files... 
Complete!
Data loaded, applying harmonics to: wd AVG... 
Harmonics coefficients computed: 1,2,3,4,5,6,7,8,9,10 Completed.
Harmonics added to mean to compute period normal: 1,2,3,4,5,6,7,8,9,10 Completed.
Full temp files written, splitting into yearly files... 
Complete!
Data loaded, applying harmonics to: ws MIN... 
Harmonics coefficients computed: 1,2,3,4,5,6,7,8,9,10 Completed.
Harmonics added to mean to compute period normal: 1,2,3,4,5,6,7,8,9,10 Completed.
Full temp files written

In [7]:
# CONUS404 Abs are too large to process at once; needs to be split into 4
for var in vars:
    for datatype in ['Abs']:
        raw_df = xr.open_mfdataset(f'{filepath}{var}{filebase}{datatype}*.nc', decode_times=True, engine='netcdf4').load()

        ny, nx = raw_df.dims['latitude'], raw_df.dims['longitude']
        i_split = ny // 2
        j_split = nx // 2

        q00 = raw_df.isel(latitude=slice(0, i_split),  longitude=slice(0, j_split))
        q00.to_netcdf(f'temp/{model}_{var}_{datatype}_q00.nc', encoding=encoding)
        del q00
        
        q01 = raw_df.isel(latitude=slice(0, i_split),  longitude=slice(j_split, nx))
        q01.to_netcdf(f'temp/{model}_{var}_{datatype}_q01.nc', encoding=encoding)
        del q01
        
        q10 = raw_df.isel(latitude=slice(i_split, ny), longitude=slice(0, j_split))
        q10.to_netcdf(f'temp/{model}_{var}_{datatype}_q10.nc', encoding=encoding)
        del q10
        
        q11 = raw_df.isel(latitude=slice(i_split, ny), longitude=slice(j_split, nx))
        q11.to_netcdf(f'temp/{model}_{var}_{datatype}_q11.nc', encoding=encoding)
        del q11
        

Struct() takes at most 1 argument (3 given)


In [11]:
for var in vars:
    for q in ['00','01','10','11']:
        raw_df = xr.load_dataset(f'temp/{model}_{var}_Abs_q{q}.nc', decode_times=True, engine='netcdf4').load()

        print(f'Data loaded, applying harmonics to: {model} {var} Abs q{q}... ')
                
        period_normal, anomaly, harmonics_parameters = compute_harmonics_xr(raw_df[var], k=k, large_data=True)

        period_normal.attrs = raw_df.attrs.copy()
        anomaly.attrs       = raw_df.attrs.copy()

        period_normal.attrs.update({'File_creation_date': datetime.now().strftime("%Y-%m-%d %H:%M:%S")})
        anomaly.attrs.update({'File_creation_date': datetime.now().strftime("%Y-%m-%d %H:%M:%S")})

        period_normal.to_netcdf(f'temp/{model}_{var}_Abs_q{q}_periodnorm.nc', encoding=encoding)
        anomaly.to_netcdf(f'temp/{model}_{var}_Abs_q{q}_anomaly.nc', encoding=encoding)
        harmonics_parameters.to_netcdf(f'temp/{var}_{model}_Abs_q{q}_harmonics_parameters.nc')

        print('Full temp files written.')

Data loaded, applying harmonics to: CONUS404 d2 Abs q00... 
Harmonics coefficients computed: 1,2,3,4,5,8,9,10 Completed.
Harmonics added to mean to compute period normal: 1,2,3,4,5,6,7,8,9,10 Completed.
Full temp files written.
Data loaded, applying harmonics to: CONUS404 d2 Abs q01... 
Harmonics coefficients computed: 1,2,3,4,5,6,7,8,9,10 Completed.
Harmonics added to mean to compute period normal: 1,2,3,4,5,6,7,8,9,10 Completed.
Full temp files written.
Data loaded, applying harmonics to: CONUS404 d2 Abs q10... 
Harmonics coefficients computed: 1,2,3,4,5,6,7,8,9,10 Completed.
Harmonics added to mean to compute period normal: 1,2,3,4,5,6,7,8,9,10 Completed.
Full temp files written.
Data loaded, applying harmonics to: CONUS404 d2 Abs q11... 
Harmonics coefficients computed: 1,2,3,4,5,6,7,8,9,10 Completed.
Harmonics added to mean to compute period normal: 1,2,3,4,5,6,7,8,9,10 Completed.
Full temp files written.
Data loaded, applying harmonics to: CONUS404 ffwi Abs q00... 
Harmonics coef

In [15]:
for var in vars:
    ds_00 = xr.open_dataset(f'temp/{var}_{model}_Abs_q00_harmonics_parameters.nc')
    ds_01 = xr.open_dataset(f'temp/{var}_{model}_Abs_q01_harmonics_parameters.nc')
    ds_10 = xr.open_dataset(f'temp/{var}_{model}_Abs_q10_harmonics_parameters.nc')
    ds_11 = xr.open_dataset(f'temp/{var}_{model}_Abs_q11_harmonics_parameters.nc')

    ds_full = xr.combine_nested([[ds_00, ds_01], [ds_10, ds_11]],
                                concat_dim=['latitude', 'longitude'])

    ds_full.to_netcdf(f'../harmonics_parameters/{model}/{var}_{model}_Abs_harmonics_parameters.nc')

    print(f'CONUS404 {var} harmonics parameters put back together.')

In [9]:
for var in vars:

    ds_00 = xr.load_dataset(f'temp/{model}_{var}_Abs_q00_periodnorm.nc')
    ds_01 = xr.load_dataset(f'temp/{model}_{var}_Abs_q01_periodnorm.nc')
    ds_10 = xr.load_dataset(f'temp/{model}_{var}_Abs_q10_periodnorm.nc')
    ds_11 = xr.load_dataset(f'temp/{model}_{var}_Abs_q11_periodnorm.nc')

    ds_full = xr.combine_nested([[ds_00, ds_01], [ds_10, ds_11]],
                                concat_dim=['latitude', 'longitude'])

    del ds_00, ds_01, ds_10, ds_11

    for year, group in ds_full.groupby('time.year'):
        filename = f'{filepath}Period_Normal/{var}{filebase}Abs_periodnorm_{year}.nc'
        group.to_netcdf(filename, encoding=encoding)
        
    print(f'CONUS404 {var} period normals put back together and yearly files written.')

    del ds_full

CONUS404 d2 period normals put back together and yearly files written.
CONUS404 ffwi period normals put back together and yearly files written.
CONUS404 hdw period normals put back together and yearly files written.
CONUS404 mlcape period normals put back together and yearly files written.
CONUS404 pbl period normals put back together and yearly files written.
CONUS404 prate period normals put back together and yearly files written.
CONUS404 rh period normals put back together and yearly files written.
CONUS404 sbcape period normals put back together and yearly files written.
CONUS404 t2 period normals put back together and yearly files written.
CONUS404 tp period normals put back together and yearly files written.
CONUS404 u10 period normals put back together and yearly files written.
CONUS404 v10 period normals put back together and yearly files written.
CONUS404 vpd period normals put back together and yearly files written.
CONUS404 vsm period normals put back together and yearly fi

In [10]:
for var in vars:

    ds_00 = xr.load_dataset(f'temp/{model}_{var}_Abs_q00_anomaly.nc')
    ds_01 = xr.load_dataset(f'temp/{model}_{var}_Abs_q01_anomaly.nc')
    ds_10 = xr.load_dataset(f'temp/{model}_{var}_Abs_q10_anomaly.nc')
    ds_11 = xr.load_dataset(f'temp/{model}_{var}_Abs_q11_anomaly.nc')

    ds_full = xr.combine_nested([[ds_00, ds_01], [ds_10, ds_11]],
                                concat_dim=['latitude', 'longitude'])

    del ds_00, ds_01, ds_10, ds_11

    for year, group in ds_full.groupby('time.year'):
        filename = f'{filepath}Anomaly/{var}{filebase}Abs_anom_{year}.nc'
        group.to_netcdf(filename, encoding=encoding)
        
    print(f'CONUS404 {var} anomalies put back together and yearly files written.')

    del ds_full


CONUS404 d2 anomalies put back together and yearly files written.
CONUS404 ffwi anomalies put back together and yearly files written.
CONUS404 hdw anomalies put back together and yearly files written.
CONUS404 mlcape anomalies put back together and yearly files written.
CONUS404 pbl anomalies put back together and yearly files written.
CONUS404 prate anomalies put back together and yearly files written.
CONUS404 rh anomalies put back together and yearly files written.
CONUS404 sbcape anomalies put back together and yearly files written.
CONUS404 t2 anomalies put back together and yearly files written.
CONUS404 tp anomalies put back together and yearly files written.
CONUS404 u10 anomalies put back together and yearly files written.
CONUS404 v10 anomalies put back together and yearly files written.
CONUS404 vpd anomalies put back together and yearly files written.
CONUS404 vsm anomalies put back together and yearly files written.
CONUS404 wd anomalies put back together and yearly files 

### UFS_S2S Lead

In [None]:
k = 10

model    = 'UFS_S2S'
filepath = '../database_files_final/UFS_S2S/LEAD/'
filebase = '_UFS_S2S_FORECAST_'

vars = [
    'cape', 'ffwi', 'gust', 'hdw', 'prate', 
    'rh',   't2',   'u10',  'v10', 'vpd', 
    'vsm',  'wd',   'ws']

prototypes = ['5', '6', '7', '8', 'MPM']
datatypes  = ['Abs', 'AVG', 'MIN', 'MAX']
leadtypes  = ['day', 'week']

In [None]:
for prototype in prototypes:                      
    for leadtype in leadtypes:                       
        if leadtype == 'day':                       
            leads = [f'{i:02d}' for i in range(36)]  
        if leadtype == 'week':                      
            leads = [f'{i:02d}' for i in range(6)]                          
        for var in vars:                
            for lead in leads:
                for datatype in datatypes:
                    if os.path.isfile(f'{filepath}Period_Normal/{prototype}/{leadtype}/{var}{filebase}{datatype}_lead{lead}_periodnorm.nc'):
                        print(f'{model} Prototype {prototype} {var} {leadtype}{lead} {datatype} already written. Skipping...')
                        continue
                    else:
                        raw_df = xr.load_dataset(f'{filepath}{prototype}/{leadtype}/{var}{filebase}{datatype}_lead{lead}.nc', engine='netcdf4')
          
                        print(f'Data loaded, applying harmonics to: {model} Prototype {prototype} {var} {leadtype}{lead} {datatype}... ')
                    
                        period_normal, anomaly, harmonics_parameters = compute_harmonics_xr(raw_df[var], k=k)
    
                        period_normal.attrs = raw_df.attrs.copy()
                        anomaly.attrs       = raw_df.attrs.copy()
    
                        period_normal.attrs.update({'File_creation_date': datetime.now().strftime("%Y-%m-%d %H:%M:%S")})
                        anomaly.attrs.update({'File_creation_date': datetime.now().strftime("%Y-%m-%d %H:%M:%S")})
    
                        period_normal.to_netcdf(f'{filepath}Period_Normal/{prototype}/{leadtype}/{var}{filebase}{datatype}_lead{lead}_periodnorm.nc', encoding=encoding)
                        anomaly.to_netcdf(f'{filepath}Anomaly/{prototype}/{leadtype}/{var}{filebase}{datatype}_lead{lead}_anom.nc', encoding=encoding)
                        harmonics_parameters.to_netcdf(f'../harmonics_parameters/{model}/LEAD/{prototype}/{leadtype}/{var}_{model}_{datatype}_lead{lead}_harmonics_parameters.nc')
                
                        print(f'Files written. Complete!')