<a name="top"></a>
<div style="width:1000 px">

<div style="float:right; width:98 px; height:98px;">
<img src="https://cdn.miami.edu/_assets-common/images/system/um-logo-gray-bg.png" alt="Miami Logo" style="height: 98px;">
</div>

<div style="float:right; width:98 px; height:98px;">
<img src="https://media.licdn.com/dms/image/C4E0BAQFlOZSAJABP4w/company-logo_200_200/0/1548285168598?e=2147483647&v=beta&t=g4jl8rEhB7HLJuNZhU6OkJWHW4cul_y9Kj_aoD7p0_Y" alt="STI Logo" style="height: 98px;">
</div>


<h1>Calculate the Vapor Pressure Deficit for Each Model and Timestep</h1>
By: Kayla Besong, PhD
    <br>
Last Edited: 11/22/23
<br>
<br>    
<br>
Takes models/variables downloaded and calculates the vapor pressure defict. The vapor pressure deficit calculation is in File_concat_mod_functions.ipynb, as is the function that computes the 24HR AVG, MIN, MAX outputs. 
<br>
<br>
RH is also calculated and saved for models that only had dewpoint. 
<div style="clear:both"></div>
</div>

<hr style="height:2px;">

## Import needed libraries, etc.

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import xarray as xr
import pandas as pd
from dask.distributed import Client, LocalCluster
import dask.array as da
import os
import glob
from metpy.calc import relative_humidity_from_dewpoint, saturation_vapor_pressure
from metpy.units import units
import math

In [2]:
import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')
pd.options.mode.chained_assignment = None

## OPTIONAL: Establish a dask client. This is a lot of data.

In [None]:
Cluster = LocalCluster(n_workers = 8, threads_per_worker=4, memory_limit='30GB',  processes=True)
#Cluster = LocalCluster()

In [None]:
client = Client(Cluster)
client

### The integral notebook of functions to run

In [3]:
%run File_concat_mod_functions.ipynb

the main function to run is:

    return_concat(model, variable, prototype = None)

Where the prototype is only needed for the UFS_S2S model choice. 

# These are the available options the functions are designed for

Not all variables will be available from all models but most are. 

In [None]:
model_options = ['CONUS404', 'ERA5', 'HRRR', 'NAM', 'NARR', 'NCEP', 'UFS_S2S']
variable_options = ['PBL', 'CAPE', 'SOILM', 'WIND', 'PRECIP', 'TEMP', 'RH']

In [None]:
output_dir = 'database_files'

## CONUS404

In [None]:
%%time

conus404_d2m = return_concat('CONUS404', 'RH'))
conus404_t2m = return_concat('CONUS404', 'TEMP'))

In [None]:
conus404_d2m = conus404_d2m.TD2 - 273.15
conus404_t2m = conus404_t2m.T2 - 273.15

In [None]:
conus404_rh = relative_humidity_from_dewpoint(conus404_t2m*units.degC, conus404_d2m*units.degC).metpy.dequantify()
conus404_rh = conus404_rh.to_dataset(name = 'rh')
conus404_rh

In [None]:
conus404_vpd = vapor_pressure_deficit(conus404_t2m, conus404_rh['rh'])

Get min, max, absolute for the new variable

In [None]:
%%time
resampler_regular_vars('vpd', conus404_vpd, output_dir, 'CONUS404')

In [None]:
resampler_regular_vars('rh', conus404_rh, output_dir, 'CONUS404')

## ERA5
expected timesteps = 1460

In [None]:
%%time

era_d2m = return_concat('ERA5', 'RH'))
era_t2m = return_concat('ERA5', 'TEMP'))

era_d2m = era_d2m.d2m - 273.15
era_t2m = era_t2m.t2m - 273.15

In [None]:
era_rh = relative_humidity_from_dewpoint(era_t2m*units.degC, era_d2m*units.degC).metpy.dequantify()
era_rh = era_rh.to_dataset(name = 'rh')
era_rh

In [None]:
era_rh = era_rh.to_dataset(name = 'rh')
era_rh

In [None]:
vpd_era = vapor_pressure_deficit(era_t2m, era_rh)

Get min, max, absolute for the new variable

In [None]:
%%time
resampler_regular_vars('vpd', vpd_era, output_dir, 'ERA5')

In [None]:
resampler_regular_vars('rh', era_rh, output_dir, 'ERA5')

## HRRR
expected timesteps = 1460

In [None]:
model = 'HRRR'

In [None]:
model_list = []
parent_dir = f'{output_dir}/{model}'

In [None]:
vpd_variable_options = ['TEMP', 'RH']
model_vars = []

In [None]:
for v in vpd_variable_options:
    v = get_model_var_database(model, v)
    model_vars.append(v)
    model_list.append(sorted(glob.glob(os.path.join(parent_dir, f'{v}_{get_filename(model)}_Abs_*.nc'))))


In [None]:
model_vars

In [None]:
if len(np.unique([len(i) for i in model_list])) >1:
    print('the number of years for each variable are not the same')

In [None]:
model_list

In [None]:
%%time

for temp,rh in zip(model_list[0], model_list[1]):
    if int(temp[-7:-3]) != int(rh[-7:-3]):
        print('the years for each variable are not aligned, rh')
    else:
        print(temp, rh)

        tempt = xr.open_dataset(temp)-273.15
        d2m = xr.open_dataset(rh)-273.15
        
        print('starting RH')
        hrrr_rh = relative_humidity_from_dewpoint(tempt.t2m*units.degC, d2m.d2m*units.degC).metpy.dequantify()
        hrrr_rh = hrrr_rh.to_dataset(name = 'rh')
        
        print('starting vpd')
        hrrr_vpd = vapor_pressure_deficit(tempt.t2m, hrrr_rh.rh)

        print('starting resample')        
        resampler_regular_vars('vpd', hrrr_vpd.chunk(get_chunk_database('HRRR')), output_dir, 'HRRR')

## NAM
expected timesteps = 1460

In [None]:
model = 'NAM'

In [None]:
model_list = []
parent_dir = f'{output_dir}/{model}'

In [None]:
vpd_variable_options = ['TEMP', 'RH']
model_vars = []

In [None]:
for v in vpd_variable_options:
    v = get_model_var_database(model, v)
    model_vars.append(v)
    model_list.append(sorted(glob.glob(os.path.join(parent_dir, f'{v}_{get_filename(model)}_Abs_*.nc'))))


In [None]:
model_vars

In [None]:
if len(np.unique([len(i) for i in model_list])) >1:
    print('the number of years for each variable are not the same')

In [None]:
%%time

for temp,rh in zip(model_list[0], model_list[1]):
    if int(temp[-7:-3]) != int(rh[-7:-3]):
        print('the years for each variable are not aligned, rh')
    else:
        print(temp, rh)

        tempt = xr.open_dataset(temp)-273.15
        rht = xr.open_dataset(rh)/100.

        rh_times = rht.time.values
        matching_indices_1 = [i for i, t in enumerate(tempt.time.values) if t in rh_times]

        tempt = tempt.isel(time=matching_indices_1)
        
        print('starting vpd')
        nam_vpd = vapor_pressure_deficit(tempt.t2m, rht.r)

        print('starting resample')
        
        resampler_regular_vars('vpd', nam_vpd, output_dir, 'NAM')

## NARR
expected timesteps = 1460

In [None]:
%%time

narr_d2m = return_concat('NARR', 'RH'))
narr_t2m = return_concat('NARR', 'TEMP'))

In [None]:
narr_rh = narr_d2m.Relative_humidity_height_above_ground.isel(height_above_ground1 = 0)/100.
narr_t2m = narr_t2m.Temperature_height_above_ground.sel(height_above_ground = 2.0) - 273.15

In [None]:
narr_vpd = vapor_pressure_deficit(narr_t2m, narr_rh)

Get min, max, absolute for the new variable

In [None]:
%%time
resampler_regular_vars('vpd', narr_vpd, output_dir, 'NARR')

## NCEP
expected timesteps = 1460

In [None]:
%%time

ncep_d2m = return_concat('NCEP', 'RH')
ncep_t2m = return_concat('NCEP', 'TEMP')

In [None]:
ncep_rh = ncep_d2m.rhum/100.
ncep_t2m = ncep_t2m.air - 273.15

In [None]:
ncep_vpd = vapor_pressure_deficit(ncep_t2m, ncep_rh)

Get min, max, absolute for the new variable

In [None]:
%%time
resampler_regular_vars('vpd', ncep_vpd, output_dir, 'NCEP')