In [1]:
from IPython.core.display import HTML, display
import numpy as np
import pandas as pd

In [2]:
display(HTML("<style>.container {width:90% !important}</style>"))

Extracting and Organizing Training Data

In [3]:
# column water vapor lat 361 (0.5 degrees)  x lon 576 (0.625 degrees) (cm)
cwv_merra = np.load('./TrainingData/CWV_merra.npy')
# delta_pressure (atm) (19x361x576 vertical layers; 0 is surface, 18 is )
dp_merra = np.load('./TrainingData/DP_merra.npy')
# Volume Mixing Ratio () (19x361x576)
vmrm_merra = np.load('./TrainingData/VMRM_merra.npy')
#MODISA/MERRA Water Vapor Transmittance (unitless) (16x361x576)
h20_g_trans = np.load('./TrainingData/MODISA_MERRA_wv_tbl.npy')

In [10]:
cwv_merra.shape, dp_merra.shape, vmrm_merra.shape, h20_g_trans.shape

((361, 576), (19, 361, 576), (19, 361, 576), (16, 361, 576))

In [15]:
lat = np.linspace(-90, 90, num=361)
lon = np.linspace(-180+0.625, 180, num=576)
layers = [f'vl_{i}' for i in range(19)]
wavelengths = ['412', '443', '469', '488', '531', '547', '555', '645', '667',
              '678', '748', '859', '869', '1240', '1640', '2130']

In [5]:
lon_mesh, lat_mesh = np.meshgrid(lon, lat)

In [6]:
lon_mesh = lon_mesh.reshape((lon_mesh.size,))
lat_mesh = lat_mesh.reshape((lat_mesh.size,))

In [7]:
cwv_merra_ = cwv_merra.reshape(lon_mesh.shape)
dp_merra_ = dp_merra.reshape((-1, lon_mesh.size)).T
vmrm_merra_ = vmrm_merra.reshape((-1, lon_mesh.size)).T
h20_g_trans_ = h20_g_trans.reshape((-1, lon_mesh.size)).T

In [8]:
h20_g_trans_.shape

(207936, 16)

In [9]:
dp_merra_.shape

(207936, 19)

(207936, 57)

In [18]:
cwv_merra_.shape

(207936,)

In [23]:
lon_lat_cwv_cols = ['lon', 'lat', 'cwv']
dp_cols = [f'dp_{vli}' for vli in layers]
vmrm_cols = [f'vmr_{vli}' for vli in layers]
wvt_cols = [f'wvt_{wvi}' for wvi in wavelengths]

In [24]:
df = pd.DataFrame(np.c_[lon_mesh, lat_mesh, cwv_merra_, dp_merra_, vmrm_merra_, h20_g_trans_],
                  columns = lon_lat_cwv_cols + dp_cols + vmrm_cols + wvt_cols)

In [25]:
df.head().T

Unnamed: 0,0,1,2,3,4
lon,-179.375,-178.75,-178.125,-177.5,-176.875
lat,-90.0,-90.0,-90.0,-90.0,-90.0
cwv,0.087462,0.087462,0.087462,0.087462,0.087462
dp_vl_0,,,,,
dp_vl_1,,,,,
dp_vl_2,,,,,
dp_vl_3,,,,,
dp_vl_4,,,,,
dp_vl_5,,,,,
dp_vl_6,,,,,


In [29]:
df.dropna(inplace=True)

In [30]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 138927 entries, 7523 to 207935
Data columns (total 57 columns):
lon          138927 non-null float64
lat          138927 non-null float64
cwv          138927 non-null float64
dp_vl_0      138927 non-null float64
dp_vl_1      138927 non-null float64
dp_vl_2      138927 non-null float64
dp_vl_3      138927 non-null float64
dp_vl_4      138927 non-null float64
dp_vl_5      138927 non-null float64
dp_vl_6      138927 non-null float64
dp_vl_7      138927 non-null float64
dp_vl_8      138927 non-null float64
dp_vl_9      138927 non-null float64
dp_vl_10     138927 non-null float64
dp_vl_11     138927 non-null float64
dp_vl_12     138927 non-null float64
dp_vl_13     138927 non-null float64
dp_vl_14     138927 non-null float64
dp_vl_15     138927 non-null float64
dp_vl_16     138927 non-null float64
dp_vl_17     138927 non-null float64
dp_vl_18     138927 non-null float64
vmr_vl_0     138927 non-null float64
vmr_vl_1     138927 non-null float6

In [31]:
df.to_pickle('./pickleJar/training.pkl')