<a name="top"></a>
<div style="width:1000 px">

<div style="float:right; width:98 px; height:98px;">
<img src="https://cdn.miami.edu/_assets-common/images/system/um-logo-gray-bg.png" alt="Miami Logo" style="height: 98px;">
</div>

<div style="float:right; width:98 px; height:98px;">
<img src="https://media.licdn.com/dms/image/C4E0BAQFlOZSAJABP4w/company-logo_200_200/0/1548285168598?e=2147483647&v=beta&t=g4jl8rEhB7HLJuNZhU6OkJWHW4cul_y9Kj_aoD7p0_Y" alt="STI Logo" style="height: 98px;">
</div>


<h1>Compute Harmonics on the sFWRD Database</h1>
By: Tyler M. Fenske
    <br>
Last Edited: 2024-02-01
<br>
<br>    
<br>
This notebook splits each yearly variable file into 4 distinct quadrant files. The purpose is because the original files are too large to concatenate together and apply the harmonics on.<br>    
<br>
Note: This notebook should be used in conjunction with Harmonics_Applications.ipynb. This notebook is needed for that one to run properly without memory errors.
<br>    
<br>
<div style="clear:both"></div>
</div>

<hr style="height:2px;">

### Imports & Functions

In [2]:
import matplotlib.pyplot as plt
import numpy as np
import xarray as xr
import pandas as pd
import os
import glob

In [3]:
%run File_concat_mod_functions.ipynb
#include many of the existing functions to handle the NOAA S2S database

In [4]:
def open_database_abs_file_xr(model, var, year):
    '''This function opens and returns a dataframe for a given:
       model : one of the six available model outputs in the database (does not work for UFS)
       var   : any var present in the database (must match an available var or will throw an error)
       year  : same as var, but for year instead'''

    path = f'/raid60B/s2sfire/NOAA_S2S/database_files/{model}/'
    base = get_filename(model)
    name = f'{path}{var}_{base}_Abs_{str(year)}.nc'

    df = xr.open_dataset(name, decode_times=True)[var]
    return df

In [5]:
def open_database_abs_split_file_xr(model, var, year, q):
    '''This function opens and returns a dataframe for a given:
       model : one of the six available model outputs in the database (does not work for UFS)
       var   : any var present in the database (must match an available var or will throw an error)
       year  : same as var, but for year instead'''

    path = f'/raid60B/s2sfire/NOAA_S2S/database_files/{model}/SplitFiles/'
    base = get_filename(model)
    name = f'{path}{var}_{base}_Abs_{str(year)}_quad{q}.nc'

    df = xr.open_dataset(name, decode_times=True)[var]
    return df

In [6]:
def open_database_file_xr(model, var, year, stat):
    '''This function opens and returns a dataframe for a given:
       model : one of the six available model outputs in the database (does not work for UFS)
       var   : any var present in the database (must match an available var or will throw an error)
       year  : same as var, but for year instead'''

    path = f'/raid60B/s2sfire/NOAA_S2S/database_files/{model}/'
    base = get_filename(model)
    name = f'{path}{var}{stat}_{base}_Daily_{str(year)}.nc'

    df = xr.open_dataset(name, decode_times=True)[var]
    return df

In [7]:
def open_conus404_twicesplit_file(var, type, quad):
    ''''''

    path = f'../database_files/CONUS404/SplitFiles/TwiceSplit/{type}/'
    file = f'{path}{var}_CONUS404_ANALYSIS_Abs_{type.lower()}_full_period_quad{quad}.nc'

    df = xr.open_dataset(file, decode_times=True).astype('float32')
    return df

def open_conus404_split_stat_file(var, type, stat, quad):
    ''''''

    path = f'../database_files/CONUS404/SplitStatFiles/{type}/'
    file = f'{path}{var}_{stat}_CONUS404_ANALYSIS_Daily_{type.lower()}_full_period_quad{quad}.nc'

    df = xr.open_dataset(file, decode_times=True).astype('float32')
    return df

In [8]:
def open_hrrr_twicesplit_file(var, type, quad):
    ''''''

    path = f'../database_files/HRRR/SplitFiles/TwiceSplit/{type}/'
    file = f'{path}{var}_HRRR_HISTORICAL_Abs_{type.lower()}_full_period_quad{quad}.nc'

    df = xr.open_dataset(file, decode_times=True).astype('float32')
    df['latitude']  = df.latitude[0,:,:]
    df['longitude'] = df.longitude[0,:,:]
    return df.astype('float32')

def open_hrrr_split_stat_file(var, type, stat, quad):
    ''''''

    path = f'../database_files/HRRR/SplitStatFiles/{type}/'
    file = f'{path}{var}_{stat}_HRRR_HISTORICAL_Daily_{type.lower()}_full_period_quad{quad}.nc'

    df = xr.open_dataset(file, decode_times=True).astype('float32')
    df['latitude']  = df.latitude[0,:,:]
    df['longitude'] = df.longitude[0,:,:]
    return df.astype('float32')

### CONUS404 File Splitting

In [25]:
conus404_years     = np.linspace(2011, 2018, 8).astype(int).astype(str)
conus404_var_names = [
    'ffwi', 'hdwi',   'MLCAPE', 'PBLH', 'PREC_ACC_NC',
    'rh',   'SBCAPE', 'SMOIS',  'T2',   'TD2',
    'U10',  'V10',    'vpd',    'wdir', 'wspeed']

outpath  = f'/raid60B/s2sfire/NOAA_S2S/database_files/CONUS404/SplitFiles/'
filebase = get_filename('CONUS404')

#initialize the years, vars, and paths needed for CONUS404

In [None]:
for var in conus404_var_names:
    for year in conus404_years:
        buffer = open_database_abs_file_xr('CONUS404', var, year)
        if (var == 'ffwi'):
            buffer = buffer.astype('float32')
        if (var == 'SMOIS'):
            buffer = buffer.sel(soil_layers_stag=0)

        x_split = int(buffer.shape[1]/2)
        y_split = int(buffer.shape[2]/2)
        
        quad1 = buffer[:,:x_split,:y_split]
        quad2 = buffer[:,:x_split,y_split:]
        quad3 = buffer[:,x_split:,:y_split]
        quad4 = buffer[:,x_split:,y_split:]

        quad1.to_netcdf(f'{outpath}{var}_{filebase}_Abs_{str(year)}_quad1.nc')
        quad2.to_netcdf(f'{outpath}{var}_{filebase}_Abs_{str(year)}_quad2.nc')
        quad3.to_netcdf(f'{outpath}{var}_{filebase}_Abs_{str(year)}_quad3.nc')
        quad4.to_netcdf(f'{outpath}{var}_{filebase}_Abs_{str(year)}_quad4.nc')

#iterate through each CONUS404 year and var file so they can be split into 4 sub files each

In [None]:
outpath  = f'/raid60B/s2sfire/NOAA_S2S/database_files/CONUS404/SplitFiles/TwiceSplit/'

for var in conus404_var_names:
    for year in conus404_years:
        for q in (np.arange(4)+1).astype('str'):
            buffer = open_database_abs_split_file_xr('CONUS404', var, year, q)
        
            x_split = int(buffer.shape[1]/2)
            y_split = int(buffer.shape[2]/2)
            
            quad1 = buffer[:,:x_split,:y_split]
            quad2 = buffer[:,:x_split,y_split:]
            quad3 = buffer[:,x_split:,:y_split]
            quad4 = buffer[:,x_split:,y_split:]
    
            quad1.to_netcdf(f'{outpath}{var}_{filebase}_Abs_{str(year)}_quad{q}1.nc')
            quad2.to_netcdf(f'{outpath}{var}_{filebase}_Abs_{str(year)}_quad{q}2.nc')
            quad3.to_netcdf(f'{outpath}{var}_{filebase}_Abs_{str(year)}_quad{q}3.nc')
            quad4.to_netcdf(f'{outpath}{var}_{filebase}_Abs_{str(year)}_quad{q}4.nc')

#iterate through each CONUS404 year and var file so they can be split into 4 sub files each

In [29]:
outpath  = f'../database_files/CONUS404/SplitStatFiles/'

for stat in ['AVG','MAX','MIN']:
    #for var in conus404_var_names:
    for var in ['SMOIS']:
        for year in conus404_years:
            buffer = open_database_file_xr('CONUS404', var, year, stat).astype('float32')
            if var == 'SMOIS':
                buffer = buffer.sel(soil_layers_stag=0).squeeze()

            x_split = int(buffer.shape[1]/2)
            y_split = int(buffer.shape[2]/2)
            
            quad1 = buffer[:,:x_split,:y_split]
            quad2 = buffer[:,:x_split,y_split:]
            quad3 = buffer[:,x_split:,:y_split]
            quad4 = buffer[:,x_split:,y_split:]
    
            quad1.to_netcdf(f'{outpath}{var}_{stat}_{filebase}_Daily_{str(year)}_quad1.nc', mode='w')
            quad2.to_netcdf(f'{outpath}{var}_{stat}_{filebase}_Daily_{str(year)}_quad2.nc', mode='w')
            quad3.to_netcdf(f'{outpath}{var}_{stat}_{filebase}_Daily_{str(year)}_quad3.nc', mode='w')
            quad4.to_netcdf(f'{outpath}{var}_{stat}_{filebase}_Daily_{str(year)}_quad4.nc', mode='w')

            del buffer, quad1, quad2, quad3, quad4

### HRRR File Splitting

In [None]:
hrrr_years     = np.linspace(2014, 2018, 5).astype(int).astype(str)
hrrr_var_names = [
    'blh',  'cape',  'd2m',   'ffwi', 'gust', 
    'hdwi', 'mstav', 'prate', 'rh',   't2m', 
    'tp',   'u10',   'v10',   'vpd',  'wdir', 
    'wspeed']

outpath  = f'/raid60B/s2sfire/NOAA_S2S/database_files/HRRR/SplitFiles/'
filebase = get_filename('HRRR')

#initialize the years, vars, and paths needed for HRRR

In [None]:
for var in hrrr_var_names:
    for year in hrrr_years:
        buffer = open_database_abs_file_xr('HRRR', var, year)
        if (var == 'ffwi'):
            buffer = buffer.astype('float32')

        x_split = int(buffer.shape[1]/2)
        y_split = int(buffer.shape[2]/2)
        
        quad1 = buffer[:,:x_split,:y_split]
        quad2 = buffer[:,:x_split,y_split:]
        quad3 = buffer[:,x_split:,:y_split]
        quad4 = buffer[:,x_split:,y_split:]

        quad1.to_netcdf(f'{outpath}{var}_{filebase}_Abs_{str(year)}_quad1.nc')
        quad2.to_netcdf(f'{outpath}{var}_{filebase}_Abs_{str(year)}_quad2.nc')
        quad3.to_netcdf(f'{outpath}{var}_{filebase}_Abs_{str(year)}_quad3.nc')
        quad4.to_netcdf(f'{outpath}{var}_{filebase}_Abs_{str(year)}_quad4.nc')

#iterate through each HRRR year and var file so they can be split into 4 sub files each

In [None]:
outpath  = f'/raid60B/s2sfire/NOAA_S2S/database_files/HRRR/SplitFiles/TwiceSplit/'

for var in hrrr_var_names:
    for year in hrrr_years:
        for q in (np.arange(4)+1).astype('str'):
            buffer = open_database_abs_split_file_xr('HRRR', var, year, q)
        
            x_split = int(buffer.shape[1]/2)
            y_split = int(buffer.shape[2]/2)
            
            quad1 = buffer[:,:x_split,:y_split]
            quad2 = buffer[:,:x_split,y_split:]
            quad3 = buffer[:,x_split:,:y_split]
            quad4 = buffer[:,x_split:,y_split:]
    
            quad1.to_netcdf(f'{outpath}{var}_{filebase}_Abs_{str(year)}_quad{q}1.nc')
            quad2.to_netcdf(f'{outpath}{var}_{filebase}_Abs_{str(year)}_quad{q}2.nc')
            quad3.to_netcdf(f'{outpath}{var}_{filebase}_Abs_{str(year)}_quad{q}3.nc')
            quad4.to_netcdf(f'{outpath}{var}_{filebase}_Abs_{str(year)}_quad{q}4.nc')

#iterate through each HRRR year and var file so they can be split into 4 sub files each

In [None]:
outpath  = f'/raid60B/s2sfire/NOAA_S2S/database_files/HRRR/SplitStatFiles/'

for stat in ['AVG','MAX','MIN']:
    for var in hrrr_var_names:
        for year in hrrr_years:
            buffer = open_database_file_xr('HRRR', var, year, stat)

            x_split = int(buffer.shape[1]/2)
            y_split = int(buffer.shape[2]/2)
            
            quad1 = buffer[:,:x_split,:y_split]
            quad2 = buffer[:,:x_split,y_split:]
            quad3 = buffer[:,x_split:,:y_split]
            quad4 = buffer[:,x_split:,y_split:]
    
            quad1.to_netcdf(f'{outpath}{var}_{stat}_{filebase}_Daily_{str(year)}_quad1.nc')
            quad2.to_netcdf(f'{outpath}{var}_{stat}_{filebase}_Daily_{str(year)}_quad2.nc')
            quad3.to_netcdf(f'{outpath}{var}_{stat}_{filebase}_Daily_{str(year)}_quad3.nc')
            quad4.to_netcdf(f'{outpath}{var}_{stat}_{filebase}_Daily_{str(year)}_quad4.nc')

            del buffer, quad1, quad2, quad3, quad4

### CONUS404 Re-Merging

In [9]:
conus404_var_names = [
    'ffwi', 'hdwi',   'MLCAPE', 'PBLH', 'PREC_ACC_NC',
    'rh',   'SBCAPE', 'SMOIS',  'T2',   'TD2',
    'U10',  'V10',    'vpd',    'wdir', 'wspeed']

In [13]:
for type in ['Anoms','Climos']:
    for var in conus404_var_names:

        q11 = open_conus404_twicesplit_file(var, type, '11')
        q12 = open_conus404_twicesplit_file(var, type, '12')
        q13 = open_conus404_twicesplit_file(var, type, '13')
        q14 = open_conus404_twicesplit_file(var, type, '14')
        q1112 = xr.concat([q11,q12],dim='west_east')
        q1314 = xr.concat([q13,q14],dim='west_east')
        q1 = xr.concat([q1112,q1314], dim='south_north')
        del q11,q12,q13,q14,q1112,q1314

        q21 = open_conus404_twicesplit_file(var, type, '21')
        q22 = open_conus404_twicesplit_file(var, type, '22')
        q23 = open_conus404_twicesplit_file(var, type, '23')
        q24 = open_conus404_twicesplit_file(var, type, '24')
        q2122 = xr.concat([q21,q22],dim='west_east')
        q2324 = xr.concat([q23,q24],dim='west_east')
        q2 = xr.concat([q2122,q2324], dim='south_north')
        del q21,q22,q23,q24,q2122,q2324

        q31 = open_conus404_twicesplit_file(var, type, '31')
        q32 = open_conus404_twicesplit_file(var, type, '32')
        q33 = open_conus404_twicesplit_file(var, type, '33')
        q34 = open_conus404_twicesplit_file(var, type, '34')
        q3132 = xr.concat([q31,q32],dim='west_east')
        q3334 = xr.concat([q33,q34],dim='west_east')
        q3 = xr.concat([q3132,q3334], dim='south_north')
        del q31,q32,q33,q34,q3132,q3334

        q41 = open_conus404_twicesplit_file(var, type, '41')
        q42 = open_conus404_twicesplit_file(var, type, '42')
        q43 = open_conus404_twicesplit_file(var, type, '43')
        q44 = open_conus404_twicesplit_file(var, type, '44')
        q4142 = xr.concat([q41,q42],dim='west_east')
        q4344 = xr.concat([q43,q44],dim='west_east')
        q4 = xr.concat([q4142,q4344], dim='south_north')
        del q41,q42,q43,q44,q4142,q4344

        final_q12 = xr.concat([q1,q2], dim='west_east').astype('float32')
        final_q34 = xr.concat([q3,q4], dim='west_east').astype('float32')

        final_q1234 = xr.concat([final_q12,final_q34], dim='south_north')
        final_q1234.to_netcdf(f'../database_files/CONUS404/{type}/{var}_CONUS404_ANALYSIS_Abs_{type.lower()}_full_period.nc')

        del q1,q2,q3,q4,final_q12,final_q34,final_q1234

In [14]:
for type in ['Climos']:
    for stat in ['AVG','MIN','MAX']:
        for var in ['ffwi','hdwi','MLCAPE','PBLH','PREC_ACC_NC','rh','SBCAPE','T2','TD2']:
            
            q1 = open_conus404_split_stat_file(var, type, stat, '1')
            q2 = open_conus404_split_stat_file(var, type, stat, '2')
            q3 = open_conus404_split_stat_file(var, type, stat, '3')
            q4 = open_conus404_split_stat_file(var, type, stat, '4')

            q12 = xr.concat([q1,q2], dim='west_east')
            q34 = xr.concat([q3,q4], dim='west_east')
            
            q1234 = xr.concat([q12,q34], dim='south_north')
            
            q1234.to_netcdf(f'../database_files/CONUS404/{type}/{var}_{stat}_CONUS404_ANALYSIS_Daily_{type.lower()}_full_period.nc')

            del q1,q2,q3,q4,q12,q34,q1234

### HRRR Re-Merging

In [11]:
hrrr_var_names = [
    'blh',  'cape',  'd2m',   'ffwi', 'gust', 
    'hdwi', 'mstav', 'prate', 'rh',   't2m', 
    'tp',   'u10',   'v10',   'vpd',  'wdir', 
    'wspeed']

In [13]:
for type in ['Anoms','Climos']:
    for var in hrrr_var_names:

        q11 = open_hrrr_twicesplit_file(var, type, '11')
        q12 = open_hrrr_twicesplit_file(var, type, '12')
        q13 = open_hrrr_twicesplit_file(var, type, '13')
        q14 = open_hrrr_twicesplit_file(var, type, '14')
        q1112 = xr.concat([q11,q12],dim='x').astype('float32')
        q1314 = xr.concat([q13,q14],dim='x').astype('float32')
        q1 = xr.concat([q1112,q1314], dim='y').astype('float32')
        del q11,q12,q13,q14,q1112,q1314

        q21 = open_hrrr_twicesplit_file(var, type, '21')
        q22 = open_hrrr_twicesplit_file(var, type, '22')
        q23 = open_hrrr_twicesplit_file(var, type, '23')
        q24 = open_hrrr_twicesplit_file(var, type, '24')
        q2122 = xr.concat([q21,q22],dim='x').astype('float32')
        q2324 = xr.concat([q23,q24],dim='x').astype('float32')
        q2 = xr.concat([q2122,q2324], dim='y').astype('float32')
        del q21,q22,q23,q24,q2122,q2324

        q31 = open_hrrr_twicesplit_file(var, type, '31')
        q32 = open_hrrr_twicesplit_file(var, type, '32')
        q33 = open_hrrr_twicesplit_file(var, type, '33')
        q34 = open_hrrr_twicesplit_file(var, type, '34')
        q3132 = xr.concat([q31,q32],dim='x').astype('float32')
        q3334 = xr.concat([q33,q34],dim='x').astype('float32')
        q3 = xr.concat([q3132,q3334], dim='y').astype('float32')
        del q31,q32,q33,q34,q3132,q3334

        q41 = open_hrrr_twicesplit_file(var, type, '41')
        q42 = open_hrrr_twicesplit_file(var, type, '42')
        q43 = open_hrrr_twicesplit_file(var, type, '43')
        q44 = open_hrrr_twicesplit_file(var, type, '44')
        q4142 = xr.concat([q41,q42],dim='x').astype('float32')
        q4344 = xr.concat([q43,q44],dim='x').astype('float32')
        q4 = xr.concat([q4142,q4344], dim='y').astype('float32')
        del q41,q42,q43,q44,q4142,q4344

        final_q12 = xr.concat([q1,q2], dim='x').astype('float32')
        final_q34 = xr.concat([q3,q4], dim='x').astype('float32')

        final_q12.to_netcdf(f'../database_files/HRRR/{type}/{var}_HRRR_HISTORICAL_Abs_{type.lower()}_full_period_western_US.nc')
        final_q34.to_netcdf(f'../database_files/HRRR/{type}/{var}_HRRR_HISTORICAL_Abs_{type.lower()}_full_period_eastern_US.nc')
        del q1,q2,q3,q4,final_q12,final_q34

        #final_q1234 = xr.concat([final_q12,final_q34], dim='y').astype('float32')
        #final_q1234.to_netcdf(f'../database_files/HRRR/{type}/{var}_HRRR_HISTORICAL_Abs_{type.lower()}_full_period.nc')

        #del q1,q2,q3,q4,final_q12,final_q34,final_q1234

In [16]:
for type in ['Anoms','Climos']:
    for var in hrrr_var_names:
        
        west = xr.open_dataset(f'../database_files/HRRR/{type}/{var}_HRRR_HISTORICAL_Abs_{type.lower()}_full_period_western_US.nc', decode_times=True).astype('float32')
        east = xr.open_dataset(f'../database_files/HRRR/{type}/{var}_HRRR_HISTORICAL_Abs_{type.lower()}_full_period_eastern_US.nc', decode_times=True).astype('float32')
        
        final = xr.concat([west,east], dim='y').astype('float32')
        final.to_netcdf(f'../database_files/HRRR/{type}/{var}_HRRR_HISTORICAL_Abs_{type.lower()}_full_period.nc')

    del west, east, final

In [12]:
for type in ['Anoms','Climos']:
    for stat in ['AVG','MIN','MAX']:
        for var in hrrr_var_names:

            q1 = open_hrrr_split_stat_file(var, type, stat, '1')
            q2 = open_hrrr_split_stat_file(var, type, stat, '2')
            q3 = open_hrrr_split_stat_file(var, type, stat, '3')
            q4 = open_hrrr_split_stat_file(var, type, stat, '4')

            q12 = xr.concat([q1,q2], dim='x')
            q34 = xr.concat([q3,q4], dim='x')
            
            q1234 = xr.concat([q12,q34], dim='y')
            
            q1234.to_netcdf(f'../database_files/HRRR/{type}/{var}_{stat}_HRRR_HISTORICAL_Daily_{type.lower()}_full_period.nc')

            del q1,q2,q3,q4,q12,q34,q1234