In [1]:
# ERA5 DATASET INTERPOLATION
# Subhatra Sivam, Eli Lichtblau

# import packages
from math import radians
import netCDF4 as nc
import numpy as np
import pandas as pd
from typing import Tuple
import time

# functions
# pretty ones from Eli
def solution(X1: np.ndarray, X2: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
    """
    Params:
        X1: (1D array_like)
        X2: (1D array_like)
    Returns:
        X1_indices where value exists in X2 as well
        X2_indices where value exists in X1 as well
    Note: the returned indices array are ordered smallest to greatest. by the value they correspond to
    that is to say X1[X1_indices] is a sorted list, u could do X1[X1_indices.sort()] to get the values in 
    the order they appear in the orignal X1
    
    """
    inter = np.intersect1d(X1, X2)
    def helper(inter: np.ndarray, x: np.ndarray):
        sorter = np.argsort(x)
        searchsorted_left = np.searchsorted(x, inter, sorter=sorter,side='left')
        searchsorted_right = np.searchsorted(x, inter, sorter=sorter,side='right')
        values = vrange(searchsorted_left, searchsorted_right) 
        return sorter[values] # optional to sort this if u care?
        

    return helper(inter, X1), helper(inter, X2)
def vrange(starts: np.ndarray, stops: np.ndarray):
    """Create concatenated ranges of integers for multiple start/stop

    Parameters:
        starts (1-D array_like): starts for each range
        stops (1-D array_like): stops for each range (same shape as starts)

    Returns:
        numpy.ndarray: concatenated ranges

    For example:

        >>> starts = [1, 3, 4, 6]
        >>> stops  = [1, 5, 7, 6]
        >>> vrange(starts, stops)
        array([3, 4, 4, 5, 6])

    """
    stops = np.asarray(stops)
    l = stops - starts # Lengths of each range.
    return np.repeat(stops - l.cumsum(), l) + np.arange(l.sum())
def latLongL2(original, secondary, k=4):
    o_lat = original[:,1]
    o_long = original[:,0]
    s_lat = secondary[:,1]
    s_long = secondary[:,0]
    
    diffs = (o_lat[:, None] - s_lat[None, :])**2 + (o_long[:, None] - s_long[None, :])**2
    indices = np.argpartition(diffs, k, axis=1)[:, :k]
    return indices
# ugly ones from me
def find_nearest(array, value): # finds closest value
    array = np.asarray(array)
    idx = (np.abs(array - value)).argmin()
    return idx
def square(list): # squares something in a list because it wasn't working for me
    return [i ** 2 for i in list]
def squared(list):
    return [i ** 1/2 for i in list]

ERA5 INPUT VARIABLES: time, longitude, latitude, d2m, t2m, sst, u10, v10, slhf, sshf, sp

In [8]:
# 2017 data import and organization

# let's do the entire thing by year. this way, we can just copy, paste, and adjust with year.
# also makes naming variables easier lol

sd = pd.read_csv('/Users/subhatrasivam/Documents/Internships/NOAA/Code/Saildrone/SD2017E5avghr.csv')
time_sd = sd['hours'].values
lon_sd = sd['lon'].values
lat_sd = sd['lat'].values

print('start ERA5 import for 2017')

t0 = time.perf_counter()

filename_prod = '/Users/subhatrasivam/Documents/Internships/NOAA/Code/ERA5/ERA5_2017.nc'
f = nc.Dataset(filename_prod,mode='r')
time_prod = f.variables['time'][:] - 1028496 # hours since 05/01/2017 00:30:00
lon_prod = f.variables['longitude'][:] # degrees
lat_prod = f.variables['latitude'][:] # degrees
dewtemp_prod = f.variables['d2m'][:] # kg/kg
airtemp_prod = f.variables['t2m'][:] # K
skntemp_prod = f.variables['sst'][:] # K
uwind_prod = f.variables['u10'][:] # m*s^-1
vwind_prod = f.variables['v10'][:] # m*s^-1
eflux_prod = f.variables['slhf'][:] # W*m^-2
hflux_prod = f.variables['sshf'][:] # W*m^-2
pressure_prod = f.variables['sp'][:] # Pa

t1 = time.perf_counter()
delt = str("{:.2f}".format(t1-t0))

print('completed ERA5 2017 import: ' + delt +' s')

filepath = '/Users/subhatrasivam/Documents/Internships/NOAA/Code/ERA5/2017ERA5interp.csv'

start ERA5 import for 2017
completed ERA5 2017 import: 31.77 s


In [41]:
# 2018 data import and organization

sd = pd.read_csv('/Users/subhatrasivam/Documents/Internships/NOAA/Code/Saildrone/SD2018E5avghr.csv')
time_sd = sd['hours'].values
lon_sd = sd['lon'].values
lat_sd = sd['lat'].values

print('start ERA5 import for 2018')

t0 = time.perf_counter()

filename_prod = '/Users/subhatrasivam/Documents/Internships/NOAA/Code/ERA5/ERA5_2018.nc'
f = nc.Dataset(filename_prod,mode='r')
time_prod = f.variables['time'][:] - 1037256# hours since 05/01/2017 00:30:00
lon_prod = f.variables['longitude'][:] # degrees
lat_prod = f.variables['latitude'][:] # degrees
dewtemp_prod = f.variables['d2m'][:] # kg/kg
airtemp_prod = f.variables['t2m'][:] # K
skntemp_prod = f.variables['sst'][:] # K
uwind_prod = f.variables['u10'][:] # m*s^-1
vwind_prod = f.variables['v10'][:] # m*s^-1
eflux_prod = f.variables['slhf'][:] # W*m^-2
hflux_prod = f.variables['sshf'][:] # W*m^-2
pressure_prod = f.variables['sp'][:] # Pa

t1 = time.perf_counter()
delt = str("{:.2f}".format(t1-t0))

print('completed ERA5 2018 import: ' + delt +' s')

filepath = '/Users/subhatrasivam/Documents/Internships/NOAA/Code/ERA5/2018ERA5interp.csv'

start ERA5 import for 2018
completed ERA5 2018 import: 23.23 s


In [2]:
# 2019 data import and organization

sd = pd.read_csv('/Users/subhatrasivam/Documents/Internships/NOAA/Code/Saildrone/SD2019E5avghr.csv')
time_sd = sd['hours'].values
lon_sd = sd['lon'].values
lat_sd = sd['lat'].values

print('start ERA5 import for 2019')

t0 = time.perf_counter()

filename_prod = '/Users/subhatrasivam/Documents/Internships/NOAA/Code/ERA5/ERA5_2019.nc'
f = nc.Dataset(filename_prod,mode='r')
time_prod = f.variables['time'][:] - 1046016 # hours since 05/01/2017 00:30:00
lon_prod = f.variables['longitude'][:] # degrees
lat_prod = f.variables['latitude'][:] # degrees
dewtemp_prod = f.variables['d2m'][:] # kg/kg
airtemp_prod = f.variables['t2m'][:] # K
skntemp_prod = f.variables['sst'][:] # K
uwind_prod = f.variables['u10'][:] # m*s^-1
vwind_prod = f.variables['v10'][:] # m*s^-1
eflux_prod = f.variables['slhf'][:] # W*m^-2
hflux_prod = f.variables['sshf'][:] # W*m^-2
pressure_prod = f.variables['sp'][:] # Pa

t1 = time.perf_counter()
delt = str("{:.2f}".format(t1-t0))

print('completed ERA5 2019 import: ' + delt +' s')

filepath = '/Users/subhatrasivam/Documents/Internships/NOAA/Code/ERA5/2019ERA5interp.csv'

start ERA5 import for 2019
completed ERA5 2019 import: 19.41 s


In [3]:
# interpolation and calculations

t0 = time.perf_counter()

# organize coordinates
location_sd = np.stack((lon_sd,lat_sd),axis=1) # set saildrone coordinates

gridlocation = [] # set MERRA2 coordinates
grididx = []
for idx_lon, lon in enumerate(lon_prod):
    for idx_lat,lat in enumerate(lat_prod):
        gridlocation.append([lon,lat])
        grididx.append([idx_lon,idx_lat])
gridlocation = np.asarray(gridlocation)
grididx = np.asarray(grididx)

close4 = latLongL2(location_sd,gridlocation)

# organize times
[idx_prod,idx_sd] = solution(time_prod,time_sd)
idx_prod = np.array(idx_prod)
timeidx_prod = np.copy(time_prod[idx_prod])
efluxidx_prod = np.copy(eflux_prod[idx_prod])
hfluxidx_prod = np.copy(hflux_prod[idx_prod])
airtempidx_prod = np.copy(airtemp_prod[idx_prod])
skntempidx_prod = np.copy(skntemp_prod[idx_prod])
dewtempidx_prod = np.copy(dewtemp_prod[idx_prod])
uwindidx_prod = np.copy(uwind_prod[idx_prod])
vwindidx_prod = np.copy(vwind_prod[idx_prod])
pressureidx_prod = np.copy(pressure_prod[idx_prod])

t1 = time.perf_counter()
delt = str("{:.2f}".format(t1-t0))

print('organized times: ' + delt +' s')

t0 = time.perf_counter()

# get distance estimations for MERRA2 (haversine)
percentdist = []
for grid_idx, point in enumerate(location_sd):
    ind_close4 = close4[grid_idx]
    ind_gridlocations = []
    distancegrid = []
    for ind in ind_close4:
        gridpoint = gridlocation[ind]
        ind_gridlocations.append(gridpoint)
        lata, lona, latb, lonb, R = radians(gridpoint[1]),radians(gridpoint[0]), radians(point[1]), radians(point[0]), 6378.0
        lat_diff = latb-lata
        lon_diff = lonb-lona
        a = np.sin(lat_diff/2)**2+np.cos(latb)*np.cos(lata)*np.sin(lon_diff/2)**2
        c = 2*np.arctan2(a**(1/2),(1-a)**(1/2))
        distancepix = R * c
        distancegrid.append(np.divide(1,distancepix))
    sumdist = np.sum(distancegrid)
    percentdist.append(distancegrid/sumdist)

# get distance estimations for MERRA2 (pixel distance formula)
"""
percentdist = []
for grid_idx, point in enumerate(location_sd):
    ind_close4 = close4[grid_idx]
    ind_gridlocations = []
    distancegrid = []
    for ind in ind_close4:
        gridpoint = gridlocation[ind]
        ind_gridlocations.append(gridpoint)
        distancepix = ((point[0] - gridpoint[0])**2 + (point[1] - gridpoint[1])**2)**1/2
        distancegrid.append(np.divide(1,distancepix))
    sumdist = np.sum(distancegrid)
    percentdist.append(distancegrid/sumdist)
"""

percentdist = np.array(percentdist)

t1 = time.perf_counter()
delt = str("{:.2f}".format(t1-t0))

print('get inverse distance weights: ' + delt +' s')

organized times: 66.70 s
get inverse distance weights: 4.50 s


In [4]:
# to save my poor computer with no disk space
dewtemp_prod = []
airtemp_prod = []
skntemp_prod = []
uwind_prod = []
vwind_prod = []
eflux_prod = []
hflux_prod = []
pressure_prod = []

t0 = time.perf_counter()

# interpolate MERRA2 variables

collect_idx = []
for time_s in time_sd:
    for idx_time,time_m in enumerate(timeidx_prod):
        if time_s == time_m:
            collect_idx.append(idx_time)

eflux_prod_withsd = np.copy(efluxidx_prod[collect_idx])
hflux_prod_withsd = np.copy(hfluxidx_prod[collect_idx])
airtemp_prod_withsd = np.copy(airtempidx_prod[collect_idx])
dewtemp_prod_withsd = np.copy(skntempidx_prod[collect_idx])
skntemp_prod_withsd = np.copy(dewtempidx_prod[collect_idx])
uwind_prod_withsd = np.copy(uwindidx_prod[collect_idx])
vwind_prod_withsd = np.copy(vwindidx_prod[collect_idx])
pressure_prod_withsd = np.copy(pressureidx_prod[collect_idx])

t1 = time.perf_counter()
delt = str("{:.2f}".format(t1-t0))

print('get interpolated values: ' + delt +' s')

t0 = time.perf_counter()

# get 4 closest grid points
timeidx = np.arange(0,len(time_sd),1)
lat1 = []
lat2 = []
lat3 = []
lat4 = []
lon1 = []
lon2 = []
lon3 = []
lon4 = []

for k in close4:
    lat1.append(gridlocation[k[0]][1])
    lon1.append(gridlocation[k[0]][0])
    lat2.append(gridlocation[k[1]][1])
    lon2.append(gridlocation[k[1]][0])
    lat3.append(gridlocation[k[2]][1])
    lon3.append(gridlocation[k[2]][0])
    lat4.append(gridlocation[k[3]][1])
    lon4.append(gridlocation[k[3]][0])

arraylon1 = []
arraylon2 = []
arraylon3 = []
arraylon4 = []
arraylat1 = []
arraylat2 = []
arraylat3 = []
arraylat4 = []

for idx,lon in enumerate(lon1):
    arraylon1.append(find_nearest(lon_prod,lon))
    arraylon2.append(find_nearest(lon_prod,lon2[idx]))
    arraylon3.append(find_nearest(lon_prod,lon3[idx]))
    arraylon4.append(find_nearest(lon_prod,lon4[idx]))
    arraylat1.append(find_nearest(lat_prod,lat1[idx]))
    arraylat2.append(find_nearest(lat_prod,lat2[idx]))
    arraylat3.append(find_nearest(lat_prod,lat3[idx]))
    arraylat4.append(find_nearest(lat_prod,lat4[idx]))  

latlon1 = np.stack((arraylat1,arraylon1),axis=1)
latlon2 = np.stack((arraylat2,arraylon2),axis=1)   
latlon3 = np.stack((arraylat3,arraylon3),axis=1)   
latlon4 = np.stack((arraylat4,arraylon4),axis=1)

timeidx = np.arange(0,len(time_sd),1)

efluxlatlon1 = []
hfluxlatlon1 = []
airtemplatlon1 = []
skntemplatlot1 = []
dewtemplatlon1 = []
uwindtemplatlot1 = []
vwindtemplatlot1 = []
pressurelatlon1 = []

efluxlatlon2 = []
hfluxlatlon2 = []
airtemplatlon2 = []
skntemplatlot2 = []
dewtemplatlon2 = []
uwindtemplatlot2 = []
vwindtemplatlot2 = []
pressurelatlon2 = []

efluxlatlon3 = []
hfluxlatlon3 = []
airtemplatlon3 = []
skntemplatlot3 = []
dewtemplatlon3 = []
uwindtemplatlot3 = []
vwindtemplatlot3 = []
pressurelatlon3 = []

efluxlatlon4 = []
hfluxlatlon4 = []
airtemplatlon4 = []
skntemplatlot4 = []
dewtemplatlon4 = []
uwindtemplatlot4 = []
vwindtemplatlot4 = []
pressurelatlon4 = []

for j in timeidx:
    # first closest
    efluxlatlon1.append(eflux_prod_withsd[j,latlon1[j,0],latlon1[j,1]])
    hfluxlatlon1.append(hflux_prod_withsd[j,latlon1[j,0],latlon1[j,1]])
    dewtemplatlon1.append(dewtemp_prod_withsd[j,latlon1[j,0],latlon1[j,1]])
    airtemplatlon1.append(airtemp_prod_withsd[j,latlon1[j,0],latlon1[j,1]])
    skntemplatlot1.append(skntemp_prod_withsd[j,latlon1[j,0],latlon1[j,1]])
    uwindtemplatlot1.append(uwind_prod_withsd[j,latlon1[j,0],latlon1[j,1]])
    vwindtemplatlot1.append(vwind_prod_withsd[j,latlon1[j,0],latlon1[j,1]])
    pressurelatlon1.append(pressure_prod_withsd[j,latlon1[j,0],latlon1[j,1]])
    # second closest
    efluxlatlon2.append(eflux_prod_withsd[j,latlon2[j,0],latlon2[j,1]])
    hfluxlatlon2.append(hflux_prod_withsd[j,latlon2[j,0],latlon2[j,1]])
    dewtemplatlon2.append(dewtemp_prod_withsd[j,latlon2[j,0],latlon2[j,1]])
    airtemplatlon2.append(airtemp_prod_withsd[j,latlon2[j,0],latlon2[j,1]])
    skntemplatlot2.append(skntemp_prod_withsd[j,latlon2[j,0],latlon2[j,1]])
    uwindtemplatlot2.append(uwind_prod_withsd[j,latlon2[j,0],latlon2[j,1]])
    vwindtemplatlot2.append(vwind_prod_withsd[j,latlon2[j,0],latlon2[j,1]])
    pressurelatlon2.append(pressure_prod_withsd[j,latlon1[j,0],latlon1[j,1]])
    # third closest
    efluxlatlon3.append(eflux_prod_withsd[j,latlon3[j,0],latlon3[j,1]])
    hfluxlatlon3.append(hflux_prod_withsd[j,latlon3[j,0],latlon3[j,1]])
    dewtemplatlon3.append(dewtemp_prod_withsd[j,latlon3[j,0],latlon3[j,1]])
    airtemplatlon3.append(airtemp_prod_withsd[j,latlon3[j,0],latlon3[j,1]])
    skntemplatlot3.append(skntemp_prod_withsd[j,latlon3[j,0],latlon3[j,1]])
    uwindtemplatlot3.append(uwind_prod_withsd[j,latlon3[j,0],latlon3[j,1]])
    vwindtemplatlot3.append(vwind_prod_withsd[j,latlon3[j,0],latlon3[j,1]])
    pressurelatlon3.append(pressure_prod_withsd[j,latlon1[j,0],latlon1[j,1]])
    # furthest
    efluxlatlon4.append(eflux_prod_withsd[j,latlon4[j,0],latlon4[j,1]])
    hfluxlatlon4.append(hflux_prod_withsd[j,latlon4[j,0],latlon4[j,1]])
    dewtemplatlon4.append(dewtemp_prod_withsd[j,latlon4[j,0],latlon4[j,1]])
    airtemplatlon4.append(airtemp_prod_withsd[j,latlon4[j,0],latlon4[j,1]])
    skntemplatlot4.append(skntemp_prod_withsd[j,latlon4[j,0],latlon4[j,1]])
    uwindtemplatlot4.append(uwind_prod_withsd[j,latlon4[j,0],latlon4[j,1]])
    vwindtemplatlot4.append(vwind_prod_withsd[j,latlon4[j,0],latlon4[j,1]])
    pressurelatlon4.append(pressure_prod_withsd[j,latlon1[j,0],latlon1[j,1]])

efluxlatlon1 = np.array(efluxlatlon1)
hfluxlatlon1 = np.array(hfluxlatlon1)
dewtemplatlon1 = np.array(dewtemplatlon1)
airtemplatlon1 = np.array(airtemplatlon1)
skntemplatlot1 = np.array(skntemplatlot1)
uwindtemplatlot1 = np.array(uwindtemplatlot1)
vwindtemplatlot1 = np.array(vwindtemplatlot1)
pressurelatlon1 = np.array(pressurelatlon1)

efluxlatlon2 = np.array(efluxlatlon2)
hfluxlatlon2 = np.array(hfluxlatlon2)
dewtemplatlon2 = np.array(dewtemplatlon2)
airtemplatlon2 = np.array(airtemplatlon2)
skntemplatlot2 = np.array(skntemplatlot2)
uwindtemplatlot2 = np.array(uwindtemplatlot2)
vwindtemplatlot2 = np.array(vwindtemplatlot2)
pressurelatlon2 = np.array(pressurelatlon2)

efluxlatlon3 = np.array(efluxlatlon3)
hfluxlatlon3 = np.array(hfluxlatlon3)
dewtemplatlon3 = np.array(dewtemplatlon3)
airtemplatlon3 = np.array(airtemplatlon3)
skntemplatlot3 = np.array(skntemplatlot3)
uwindtemplatlot3 = np.array(uwindtemplatlot3)
vwindtemplatlot3 = np.array(vwindtemplatlot3)
pressurelatlon3 = np.array(pressurelatlon3)

efluxlatlon4 = np.array(efluxlatlon4)
hfluxlatlon4 = np.array(hfluxlatlon4)
dewtemplatlon4 = np.array(dewtemplatlon4)
airtemplatlon4 = np.array(airtemplatlon4)
skntemplatlot4 = np.array(skntemplatlot4)
uwindtemplatlot4 = np.array(uwindtemplatlot4)
vwindtemplatlot4 = np.array(vwindtemplatlot4)
pressurelatlon4 = np.array(pressurelatlon4)

corr_percent = np.stack([timeidx,percentdist[:,0],percentdist[:,1],percentdist[:,2],percentdist[:,3]],axis=1)
for_df = pd.DataFrame([timeidx,efluxlatlon1,hfluxlatlon1,airtemplatlon1,skntemplatlot1,dewtemplatlon1,uwindtemplatlot1,vwindtemplatlot1,pressurelatlon1,efluxlatlon2,hfluxlatlon2,airtemplatlon2,skntemplatlot2,dewtemplatlon2,uwindtemplatlot2,vwindtemplatlot2, pressurelatlon2,efluxlatlon3,hfluxlatlon3,airtemplatlon3,skntemplatlot3,dewtemplatlon3,uwindtemplatlot3,vwindtemplatlot3, pressurelatlon3,efluxlatlon4,hfluxlatlon4,airtemplatlon4,skntemplatlot4,dewtemplatlon4,uwindtemplatlot4,vwindtemplatlot4, pressurelatlon4])
for_df = for_df.T
for_df = np.ma.masked_where(for_df ==  -32767, for_df)
for_df = np.ma.compress_rows(for_df)
df = pd.DataFrame(for_df, columns=['time','eflux1','hflux1','airtemp1','skntemp1','dewtemp1','uwind1','vwind1','pressure1','eflux2','hflux2','airtemp2','skntemp2','dewtemp2','uwind2','vwind2','pressure2','eflux3','hflux3','airtemp3','skntemp3','dewtemp3','uwind3','vwind3','pressure3','eflux4','hflux4','airtemp4','skntemp4','dewtemp4','uwind4','vwind4','pressure4'])

time1 = df['time']
efluxlatlon1 = df['eflux1'].values
hfluxlatlon1 = df['hflux1'].values
airtemplatlon1 = df['airtemp1'].values
skntemplatlot1 = df['skntemp1'].values
dewtemplatlon1 = df['dewtemp1'].values
uwindtemplatlot1 = df['uwind1'].values
vwindtemplatlot1 = df['vwind1'].values
pressure1 = df['pressure1'].values

efluxlatlon1 = np.array(efluxlatlon1)
hfluxlatlon1 = np.array(hfluxlatlon1)
dewtemplatlon1 = np.array(dewtemplatlon1)
airtemplatlon1 = np.array(airtemplatlon1)
skntemplatlot1 = np.array(skntemplatlot1)
uwindtemplatlot1 = np.array(uwindtemplatlot1)
vwindtemplatlot1 = np.array(vwindtemplatlot1)
pressure1 = np.array(pressure1)

efluxlatlon2 = df['eflux2'].values
hfluxlatlon2 = df['hflux2'].values
airtemplatlon2 = df['airtemp2'].values
skntemplatlot2 = df['skntemp2'].values
dewtemplatlon2 = df['dewtemp2'].values
uwindtemplatlot2 = df['uwind2'].values
vwindtemplatlot2 = df['vwind2'].values
pressure2 = df['pressure2'].values

efluxlatlon2 = np.array(efluxlatlon2)
hfluxlatlon2 = np.array(hfluxlatlon2)
dewtemplatlon2 = np.array(dewtemplatlon2)
airtemplatlon2 = np.array(airtemplatlon2)
skntemplatlot2 = np.array(skntemplatlot2)
uwindtemplatlot2 = np.array(uwindtemplatlot2)
vwindtemplatlot2 = np.array(vwindtemplatlot2)
pressure2 = np.array(pressure2)

efluxlatlon3 = df['eflux3'].values
hfluxlatlon3 = df['hflux3'].values
airtemplatlon3 = df['airtemp3'].values
skntemplatlot3 = df['skntemp3'].values
dewtemplatlon3 = df['dewtemp3'].values
uwindtemplatlot3 = df['uwind3'].values
vwindtemplatlot3 = df['vwind3'].values
pressure3 = df['pressure3'].values

efluxlatlon3 = np.array(efluxlatlon3)
hfluxlatlon3 = np.array(hfluxlatlon3)
dewtemplatlon3 = np.array(dewtemplatlon3)
airtemplatlon3 = np.array(airtemplatlon3)
skntemplatlot3 = np.array(skntemplatlot3)
uwindtemplatlot3 = np.array(uwindtemplatlot3)
vwindtemplatlot3 = np.array(vwindtemplatlot3)
pressure3 = np.array(pressure3)

efluxlatlon4 = df['eflux4'].values
hfluxlatlon4 = df['hflux4'].values
airtemplatlon4 = df['airtemp4'].values
skntemplatlot4 = df['skntemp4'].values
dewtemplatlon4 = df['dewtemp4'].values
uwindtemplatlot4 = df['uwind4'].values
vwindtemplatlot4 = df['vwind4'].values
pressure4 = df['pressure4'].values

efluxlatlon4 = np.array(efluxlatlon4)
hfluxlatlon4 = np.array(hfluxlatlon4)
dewtemplatlon4 = np.array(dewtemplatlon4)
airtemplatlon4 = np.array(airtemplatlon4)
skntemplatlot4 = np.array(skntemplatlot4)
uwindtemplatlot4 = np.array(uwindtemplatlot4)
vwindtemplatlot4 = np.array(vwindtemplatlot4)
pressure4 = np.array(pressure4)

percent1 = []
percent2 = []
percent3 = []
percent4 = []
timefin_sd = []

for idx in time1:
    k = find_nearest(corr_percent[:,0],idx)
    timefin_sd.append(time_sd[k])
    percent1.append(corr_percent[:,1][k])
    percent2.append(corr_percent[:,2][k])
    percent3.append(corr_percent[:,3][k])
    percent4.append(corr_percent[:,4][k])


t1 = time.perf_counter()
delt = str("{:.2f}".format(t1-t0))

print('get 4 closest variable values: ' + delt +' s')

# get weighted average
eflux = efluxlatlon1*percent1 + efluxlatlon2*percent2 + efluxlatlon3*percent3 + efluxlatlon4*percent4
hflux = hfluxlatlon1*percent1 + hfluxlatlon2*percent2 + hfluxlatlon3*percent3 + hfluxlatlon4*percent4
airtemp = airtemplatlon1*percent1 + airtemplatlon2*percent2 + airtemplatlon3*percent3 + airtemplatlon4*percent4
skntemp = skntemplatlot1*percent1 + skntemplatlot2*percent2 + skntemplatlot3*percent3 + skntemplatlot4*percent4
dewtemp = dewtemplatlon1*percent1 + dewtemplatlon2*percent2 + dewtemplatlon3*percent3 + dewtemplatlon4*percent4
uwind = uwindtemplatlot1*percent1 + uwindtemplatlot2*percent2 + uwindtemplatlot3*percent3 + uwindtemplatlot4*percent4
vwind = vwindtemplatlot1*percent1 + vwindtemplatlot2*percent2 + vwindtemplatlot3*percent3 + vwindtemplatlot4*percent4
pressure = pressure1*percent1 + pressure2*percent2 + pressure3*percent3 + pressure4*percent4

t0 = time.perf_counter()

# calculate variables
airtemp = np.array(np.subtract(airtemp,273.15)) # K -> C
skntemp = np.array(np.subtract(skntemp,273.15)) # K -> C
dewtemp = np.array(np.subtract(dewtemp,273.15)) # K -> C

pressure = np.array(np.divide(pressure,100))

eflux = np.array(np.divide(eflux,-3600))
hflux = np.array(np.divide(hflux,-3600))

def square(list):
    return [i ** 2 for i in list]
def squared(list):
    return [i ** 1/2 for i in list]
usq = square(uwind)
vsq = square(vwind)
sum  = np.add(usq,vsq)
windidx_prod = squared(sum) # u and v -> total magnitude
wind = np.array(windidx_prod)

eair = 6.11*10**((7.5*dewtemp)/(237.7+dewtemp))
qair = 1000*0.622*eair/(pressure-eair)

es_tempfin_prod = 6.11*2.71828**(5420*(1/273-1/(airtemp+273.15))) # air saturation vapor pressure OK
es_sknfin_prod = 6.11*2.71828**(5420*(1/273-1/(skntemp+273.15))) # sea saturation vapor pressure OK

rh = 100*eair/es_tempfin_prod

esea = es_sknfin_prod*rh/100

qskn = 1000*0.622*esea/(pressure-esea)

t1 = time.perf_counter()
delt = str("{:.2f}".format(t1-t0))

print('calculated variables: ' + delt +' s')

for_df = np.stack([timefin_sd,eflux,hflux,qair,qskn,airtemp,skntemp,wind],axis=1)
df = pd.DataFrame(for_df,columns=['time','eflux','hflux','qair','qskn','airtemp','skntemp','wind'])
df.to_csv(filepath)

get interpolated values: 174.88 s
get 4 closest variable values: 79.42 s
calculated variables: 0.03 s


In [None]:
for_df = np.stack([timefin_sd,eflux,hflux,qair,qskn,airtemp,skntemp,wind],axis=1)
df = pd.DataFrame(for_df,columns=['time','eflux','hflux','qair','qskn','airtemp','skntemp','wind'])
df.to_csv(filepath)