# Data reader Converter

## Imports

In [1]:
from pathlib import Path

import numpy as np
import xarray as xr
from scipy.interpolate import griddata
from netCDF4 import Dataset

## Settings

In [2]:
PATH = Path.cwd()/'../Data/NOAA/Atmospherical_Conditions'
PRESSURE_PATH = Path.cwd()/'../Data/NOAA/Pressure_Conditions'
INTERPOLATIONS_PATH = Path.cwd()/'../Data/Interpolations'

YEAR = '2020'

FILES = [f'uwnd.{YEAR}.nc', f'vwnd.{YEAR}.nc',
         f'air.{YEAR}.nc', f'rhum.{YEAR}.nc']

In [6]:
# NOAA Default pressure leves
PRESSURE_LEVELS_VALUES = [925, 850, 700, 500, 300, 200, 100]

# Grids definition for SPEEDY and NOAA
X_speedy_lon = np.linspace(0, 360-3.75, 96)
Y_speedy_lat = np.array("-87.159 -83.479 -79.777 -76.070 -72.362 -68.652 -64.942 -61.232 -57.521 -53.810 -50.099 -46.389 -42.678 -38.967 -35.256 -31.545 -27.833 -24.122 -20.411 -16.700 -12.989 -9.278 -5.567 -1.856 1.856 5.567 9.278 12.989 16.700 20.411 24.122 27.833 31.545 35.256 38.967 42.678 46.389 50.099 53.810 57.521 61.232 64.942 68.652 72.362 76.070 79.777 83.479 87.159".split(" "))
Y_speedy_lat = Y_speedy_lat.astype(np.float32)
#Y_speedy_lat = np.flipud(Y_speedy_lat)


# NOAA latitude goes from North To South
X_noaa_lon = np.linspace(0, 360-2.5, 144)
Y_noaa_lat = np.linspace(90, -90, 73)
X_grid_noaa, Y_grid_noaa = np.meshgrid(X_noaa_lon, Y_noaa_lat)


SPEEDY_LON = 96
SPEEDY_LAT = 48
SPEEDY_LVL = 7


NOAA_LON = 144
NOAA_LAT = 73

INTERPOLATION_VARIABLES = len(FILES)

# Temporal setting
DATE = '2020-07-01'
TIME = '00:00:00'
# Time is HH:MM:SS in 24-hours format
DATETIME = DATE + 'T' + TIME
FILENAME = DATE.replace('-', '') + TIME[:2]

IS_CONVERTION_REQUIRED = True
# IS_CONVERTION_REQUIRED performs Relative humidity convertion to Specific Humidity, if True.
# If not, Relative Humidity is given

SAVE_AS_GRD = True
# If SAVE_AS_GRD is True, it will convert data into GRD format aditionally to the netCDF files created.
# The atmospherical variables are in one netCDF file, and the pressure will be on another file.

# NOAA convertion

Takes NOAA's variable files and join them in one nc file

In [13]:
def read_data(variable, file):
    '''
    Reads nc files from the NOAA. 

    Parameters
    ----------
        file : name of the file that contains specific variable.
        variable: desired variable to get information

    Returns
    -------  
        variable_array: n-dimensional xarray with the choosen pressure levels for the given variable
    '''
    variable_path = PATH/file
    variable_array = xr.open_dataset(variable_path)[variable].sel(
        level=PRESSURE_LEVELS_VALUES,
        time=DATETIME)
    return variable_array


def relative2specific(T, RH, p):
    '''
    Converts relative humidity to specific humidity

    Parameters
    ----------
        T : Temperature in K.
        RH : Relative humidity in percentage [0,100].
        p : Preassure in mbar.

    Returns
    -------
        specific humidity (dimensionless)

    '''
    T -= 273.15
    p *= 100
    RH /= 100
    e_s = 611.21*np.exp((18.687-T/234.5)*(T/(T+257.14)))
    e = e_s*RH
    w = 287.058/461.5*e/(p-e)
    return w/(w+1)

In [8]:
atmospherical_variables = dict()

for file in FILES:
    variable = file.split(".")[0]
    variable_values_by_level = np.zeros((SPEEDY_LVL, NOAA_LAT, NOAA_LON))

    variable_array = read_data(variable, file)

    for index_pressure_level, pressure in enumerate(PRESSURE_LEVELS_VALUES):
        variable_values_by_level[index_pressure_level, :,
                                 :] = variable_array.sel(level=pressure).values

    atmospherical_variables[variable] = variable_values_by_level

In [None]:
atmospherical_variables_to_netcdf = dict()
pressure_to_netcdf = dict()

for key, value in atmospherical_variables.items():
    atmospherical_variables_to_netcdf[key] = (("level", "lat", "lon"), value)

## Save Dataset

Dataset (variables and pression) are stores in a separated way.

In [None]:
atmospherical_dataset = xr.Dataset(
    atmospherical_variables_to_netcdf, coords={
        "level": PRESSURE_LEVELS_VALUES,
        "lat": Y_noaa_lat,
        "lon": X_noaa_lon,
    },
    attrs={
        'long_name': '6-Hourly Sample',
        'Levels': 7,
        'dataset': 'NCEP/DOE AMIP-II Reanalysis (Reanalysis-2)',
        'level_desc': 'Surface',
        'statistic': 'Individual Obs',
    },
)

In [None]:
atmospherical_dataset.to_netcdf(INTERPOLATIONS_PATH/("NOAA-" + FILENAME + "-atmospherical_dataset.nc"))

# SPEEDY convertion

In [None]:
def read_grd(filename):
    # 1 Forecast, 0 Real

    infile = open(filename, "rb")
    data = np.fromfile(infile, '>f4')
    l = 0
    U = np.empty([nlev, nlat, nlon])
    for k in range(nlev):
        for j in range(nlat):
            for i in range(nlon):
                U[k, j, i] = data[l]
                l = l+1

    V = np.empty([nlev, nlat, nlon])
    for k in range(nlev):
        for j in range(nlat):
            for i in range(nlon):
                V[k, j, i] = data[l]
                l = l+1

    T = np.empty([nlev, nlat, nlon])
    for k in range(nlev):
        for j in range(nlat):
            for i in range(nlon):
                T[k, j, i] = data[l]
                l = l+1

    SH = np.empty([nlev, nlat, nlon])
    for k in range(nlev):
        for j in range(nlat):
            for i in range(nlon):
                SH[k, j, i] = data[l]
                l = l+1

    P = np.empty([nlat, nlon])
    for j in range(nlat):  # range(nlat-1,-1,-1): #range(nlat):
        for i in range(nlon):
            P[j, i] = data[l]
            l = l+1
    return U, V, T, SH, P

In [None]:
FORECASTED_PATH = Path.cwd()/'../Data/SPEEDY'
FORECASTED_FILES = [k.name for k in FORECASTED_PATH.rglob('*.grd')]
nlon = 96
nlat = 48
nlev = 7
[U_SPEEDY, V_SPEEDY, T_SPEEDY, SH_SPEEDY, P_SPEEDY] = read_grd(
    FORECASTED_PATH/FORECASTED_FILES[2])
variables_SPEEDY = dict()
variables_SPEEDY['uwnd'] = U_SPEEDY
variables_SPEEDY['vwnd'] = V_SPEEDY
variables_SPEEDY['temperature'] = T_SPEEDY
variables_SPEEDY['shum'] = SH_SPEEDY
variables_SPEEDY['pres'] = P_SPEEDY

In [None]:
SPEEDY_atmospherical_variables_to_netcdf = dict()
pressure = variables_SPEEDY.pop('pres', None)

for key, value in variables_SPEEDY.items():
    SPEEDY_atmospherical_variables_to_netcdf[key] = (
        ("level", "lat", "lon"), value)

In [None]:
speedy_atmospherical_dataset = xr.Dataset(
    SPEEDY_atmospherical_variables_to_netcdf, coords={
        "level": PRESSURE_LEVELS_VALUES,
        "lat": Y_speedy_lat,
        "lon": X_speedy_lon,
    },
    attrs={
        'long_name': '6-Hourly Sample',
        'Levels': 7,
        'dataset': 'NCEP/DOE AMIP-II Reanalysis (Reanalysis-2)',
        'level_desc': 'Surface',
        'statistic': 'Individual Obs',
    },
)

In [None]:
speedy_atmospherical_dataset

In [None]:
SPEEDY_pressure_to_netcdf = dict()
SPEEDY_pressure_to_netcdf['pres'] = (("lat", "lon"), pressure)
SPEEDY_pressure_dataset = xr.Dataset(
    SPEEDY_pressure_to_netcdf, coords={
        "lat": Y_speedy_lat,
        "lon": X_speedy_lon,
    },
    attrs={
        'long_name': '6-Hourly Pressure at Surface',
        'Levels': 1,
        'units': 'Pascals',
        'precision': -1,
        'GRIB_id': 1,
        'GRIB_name': 'PRES',
        'var_desc': 'Pressure',
        'dataset': 'NCEP/DOE AMIP-II Reanalysis (Reanalysis-2)',
        'level_desc': 'Surface',
        'statistic': 'Individual Obs',
        'parent_stat': 'Other',
        'standard_name': 'pressure',
    },
)

In [None]:
SPEEDY_pressure_dataset

In [None]:
speedy_atmospherical_dataset.to_netcdf(
    INTERPOLATIONS_PATH/('SPEEDY-'+FILENAME + "-atmospherical_dataset.nc"))
SPEEDY_pressure_dataset.to_netcdf(
    INTERPOLATIONS_PATH/('SPEEDY-'+FILENAME + "-pressure_dataset.nc"))