In [1]:
from sklearn.cluster import KMeans
import numpy as np
import matplotlib.pyplot as plt

from matplotlib import cm
import matplotlib.cbook as cbook
import matplotlib.colors as colors

import pandas as pd
import xarray as xr
from netCDF4 import Dataset, num2date, date2num
from datetime import datetime, timedelta
import os

import seaborn as sns
import cmocean as cmocn

%matplotlib inline

In [4]:
ds= xr.open_dataset('/srv/data2/srai_poseidon/srai_poseidon/observation/SatelliteVsBuoy/downloads/Buoy/extractedGZ/WINDS/T_09N_140W_xrr_MatchUp_720_mins_2000.nc')
ds

In [70]:
#list(ds.keys())

In [78]:
statVars = []
for timeWindow in range(20,730,10):
    statVarList= [f'mean WSPD {timeWindow}min',
             f'std. dev. WSPD {timeWindow}min',
             f'mean WSPD_10N {timeWindow}min',
             f'std. dev. WSPD_10N {timeWindow}min',
             f'mean Ux_10N {timeWindow}min',
             f'std. dev. Ux_10N {timeWindow}min',
             f'mean Vy_10N {timeWindow}min',
             f'std. dev. Vy_10N {timeWindow}min',
             f'mean WDIR {timeWindow}min',
             f'std. dev. WDIR {timeWindow}min',
             f'mean cosWDIR {timeWindow}min',
             f'std. dev. cosWDIR {timeWindow}min',
             f'mean sinWDIR {timeWindow}min',
             f'std. dev. sinWDIR {timeWindow}min']
    statVars = statVars + statVarList

In [81]:
def getSubDF(ds, LAT, LON):
    subds = xr.Dataset()

    taoTimeVar = ['TAO_TIME']
    
    windVars = ['WDIR',
         'WDIR_QC',
         'WDIR_DM',
         'WSPD',
         'WSPD_QC',
         'WSPD_DM',
         'UWND',
         'VWND']

    sstVars = ['SST',
             'SST_QC',
             'SST_DM']
    
    airtVars = ['AIRT',
             'AIRT_QC',
             'AIRT_DM']
    
    rhVars = ['RELH',
             'RELH_QC',
             'RELH_DM']
    
    u10Vars =['WSPD_10N',
             'WSPD_10']

    statVars = []
    for timeWindow in range(20,730,10):
        statVarList= [f'mean WSPD {timeWindow}min',
                 f'std. dev. WSPD {timeWindow}min',
                 f'mean WSPD_10N {timeWindow}min',
                 f'std. dev. WSPD_10N {timeWindow}min',
                 f'mean Ux_10N {timeWindow}min',
                 f'std. dev. Ux_10N {timeWindow}min',
                 f'mean Vy_10N {timeWindow}min',
                 f'std. dev. Vy_10N {timeWindow}min',
                 f'mean WDIR {timeWindow}min',
                 f'std. dev. WDIR {timeWindow}min',
                 f'mean cosWDIR {timeWindow}min',
                 f'std. dev. cosWDIR {timeWindow}min',
                 f'mean sinWDIR {timeWindow}min',
                 f'std. dev. sinWDIR {timeWindow}min']
        statVars = statVars + statVarList
        
    
    satVars = ['retrieved_wind_speed',
             'retrieved_wind_direction',
             'rain_impact',
             'flags',
             'eflags',
             'nudge_wind_speed',
             'nudge_wind_direction',
             'retrieved_wind_speed_uncorrected',
             'cross_track_wind_speed_bias',
             'atmospheric_speed_bias',
             'wind_obj',
             'ambiguity_speed',
             'ambiguity_direction',
             'ambiguity_obj',
             'number_in_fore',
             'number_in_aft',
             'number_out_fore',
             'number_out_aft',
             'gmf_sst',
             'distance_from_coast',
             'exp_bias_wrt_oceanward_neighbors']

     
    
    for windVar in windVars:
        subds[windVar] = ds[windVar].sel(HEIGHT=4)
    subds.drop_vars('HEIGHT')
    for sstVar in sstVars:
        subds[sstVar] = ds[sstVar].sel(DEPTH=1)
    subds.drop_vars('DEPTH')
    for rhVar in rhVars:
        subds[rhVar] = ds[rhVar].sel(HEIGHT=3)
    subds.drop_vars('HEIGHT')
    for airtVar in airtVars:
        subds[airtVar] = ds[airtVar].sel(HEIGHT=3)
    subds.drop_vars('HEIGHT')
    for u10Var in u10Vars:
        subds[u10Var] = ds[u10Var].sel(HEIGHT=10)
    subds.drop_vars('HEIGHT')

    for statVar in statVars:
        subds[statVar] = ds[statVar]

    subds['LATITUDE'] = xr.DataArray(LAT * np.ones(len(subds['QS_TIME']), dtype = float), dims=['QS_TIME'])
    subds['LONGITUDE'] = xr.DataArray(LON * np.ones(len(subds['QS_TIME']), dtype = float), dims=['QS_TIME'])
    
    subds['TAO_TIME'] = ds['TAO_TIME']

    deployFileName = f'../../downloads/Buoy/extractedGZ/WINDS/T_{lat:02d}{latUnits}_{lon:03d}{lonUnits}/T_{lat:02d}{latUnits}_{lon:03d}{lonUnits}_DeploymentDates.nc'
    ds2 = xr.open_dataset(deployFileName)
    startDates = np.array(ds2['startDate'])
    endDates = np.array(ds2['endDate'])

    depNum = np.zeros((len(subds['QS_TIME'])), dtype=int)
    c = 1
    for i in range(1,len(startDates)):
        mask = subds['QS_TIME'] >= startDates[i]
        mask *= subds['QS_TIME'] <= endDates[i]
        depNum[mask] = c
        c = c+1

    subds['Deployment index'] = xr.DataArray(depNum, dims=['QS_TIME'])
    
    
    satDS = ds[satVars]
    satDS = satDS.isel(cross_track=0, ambiguities=0)

    
    
    returnDS = xr.merge((subds, satDS))

    
    
    return returnDS

In [82]:
latList = [-9, -8, -5, -2, 0, 2, 5, 8, 9]
lonList = [-95, -110, -125, -140, -155, -170, -180, 165]

ylen = len(latList)
xlen = len(lonList)

taskList = []

for latId  in range(ylen):
    for lonId in range(xlen):
        taskList.append([latList[latId], lonList[lonId]])

ntasks = len(taskList)


allDS = xr.Dataset()
dataCount = 0
fileCount = 0
for task in taskList:
    lat = task[0]
    lon = task[1]

    LAT = lat
    LON = lon

    if lat < 0:
        latUnits = 'S'
    else:
        latUnits = 'N'

    if lon < 0:
        lonUnits = 'W'
    else:
        lonUnits = 'E'

    LON = (LON+360)%360
    lat=abs(lat)
    lon=abs(lon)

    matchFname = f'../../downloads/Buoy/extractedGZ/WINDS/T_{lat:02d}{latUnits}_{lon:03d}{lonUnits}_xrr_MatchUp_720_mins_2000.nc'
    #deployFileName = f'../../downloads/Buoy/extractedGZ/WINDS/T_{lat:02d}{latUnits}_{lon:03d}{lonUnits}/T_{lat:02d}{latUnits}_{lon:03d}{lonUnits}_DeploymentDates.nc'
    
    if os.path.isfile(matchFname):
        print(f'T_{lat:02d}{latUnits}_{lon:03d}{lonUnits}_xrr_MatchUp_2000.nc')
        rds = xr.open_dataset(matchFname)
        ds = getSubDF(rds, LAT, LON)
        QS_TIME = ds['QS_TIME'].to_numpy()
        ds = ds.rename({'QS_TIME':'count'})
        dataLen = len(ds['count'])
        print(dataCount, dataCount+dataLen)
        ds['count'] = xr.DataArray(pd.Series(np.arange(dataCount, dataCount+dataLen)), 
                                   dims = ['count'])
        ds['QS_TIME'] = xr.DataArray(QS_TIME, 
                                   dims = ['count'])
        dataCount += dataLen
        if fileCount == 0:
            allDS = ds
        else:
            allDS = xr.concat((allDS, ds), dim='count')
        fileCount += 1
        

T_08S_095W_xrr_MatchUp_2000.nc
0 4091
T_08S_110W_xrr_MatchUp_2000.nc
4091 7839


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_08S_125W_xrr_MatchUp_2000.nc
7839 12311


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_08S_155W_xrr_MatchUp_2000.nc
12311 15908


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_08S_170W_xrr_MatchUp_2000.nc
15908 20370


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_08S_165E_xrr_MatchUp_2000.nc
20370 24539


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_05S_095W_xrr_MatchUp_2000.nc
24539 28725


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_05S_110W_xrr_MatchUp_2000.nc
28725 34054


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_05S_125W_xrr_MatchUp_2000.nc
34054 39648


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_05S_140W_xrr_MatchUp_2000.nc
39648 45275


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_05S_155W_xrr_MatchUp_2000.nc
45275 48785


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_05S_170W_xrr_MatchUp_2000.nc
48785 53848


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_05S_165E_xrr_MatchUp_2000.nc
53848 59856


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_02S_095W_xrr_MatchUp_2000.nc
59856 64292


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_02S_110W_xrr_MatchUp_2000.nc
64292 67094


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_02S_125W_xrr_MatchUp_2000.nc
67094 71725


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_02S_140W_xrr_MatchUp_2000.nc
71725 77312


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_02S_155W_xrr_MatchUp_2000.nc
77312 82069


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_02S_170W_xrr_MatchUp_2000.nc
82069 85687


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_02S_180W_xrr_MatchUp_2000.nc
85687 90853


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_02S_165E_xrr_MatchUp_2000.nc
90853 95040


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_00N_095W_xrr_MatchUp_2000.nc
95040 99931


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_00N_110W_xrr_MatchUp_2000.nc
99931 104995


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_00N_125W_xrr_MatchUp_2000.nc
104995 108710


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_00N_140W_xrr_MatchUp_2000.nc
108710 114350


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_00N_155W_xrr_MatchUp_2000.nc
114350 119650


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_00N_170W_xrr_MatchUp_2000.nc
119650 125190


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_00N_180W_xrr_MatchUp_2000.nc
125190 129800


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_00N_165E_xrr_MatchUp_2000.nc
129800 133084


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_02N_095W_xrr_MatchUp_2000.nc
133084 137302


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_02N_110W_xrr_MatchUp_2000.nc
137302 141950


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_02N_125W_xrr_MatchUp_2000.nc
141950 146687


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_02N_140W_xrr_MatchUp_2000.nc
146687 152372


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_02N_155W_xrr_MatchUp_2000.nc
152372 157486


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_02N_170W_xrr_MatchUp_2000.nc
157486 161208


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_02N_180W_xrr_MatchUp_2000.nc
161208 165857


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_02N_165E_xrr_MatchUp_2000.nc
165857 171281


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_05N_095W_xrr_MatchUp_2000.nc
171281 175525


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_05N_110W_xrr_MatchUp_2000.nc
175525 180990


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_05N_125W_xrr_MatchUp_2000.nc
180990 185923


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_05N_140W_xrr_MatchUp_2000.nc
185923 191669


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_05N_155W_xrr_MatchUp_2000.nc
191669 196601


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_05N_170W_xrr_MatchUp_2000.nc
196601 201836


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_05N_165E_xrr_MatchUp_2000.nc
201836 207245


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_08N_095W_xrr_MatchUp_2000.nc
207245 211404


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_08N_110W_xrr_MatchUp_2000.nc
211404 217644


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_08N_155W_xrr_MatchUp_2000.nc
217644 222896


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_08N_170W_xrr_MatchUp_2000.nc
222896 227845


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_08N_180W_xrr_MatchUp_2000.nc
227845 233017


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_08N_165E_xrr_MatchUp_2000.nc
233017 238343


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_09N_140W_xrr_MatchUp_2000.nc
238343 244387


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


In [100]:
allDS = allDS.drop_vars(['HEIGHT','DEPTH','cross_track','ambiguities'])

ValueError: These variables cannot be found in this dataset: ['ambiguities', 'HEIGHT', 'cross_track', 'DEPTH']

In [101]:
allDS

In [102]:
extended_flag_meaning = ['rain_correction_not_applied_flag',
'correction_produced_negative_spd_flag',
'all_ambiguities_contribute_to_nudging_flag',
'large_rain_correction_flag',
'coastal_processing_applied_flag',
'lake_winds_flag',
'rain_nearby_flag',
'ice_nearby_flag',
'significant_rain_correction_flag',
'rain_correction_applied_flag',
'wind_retrieval_possibly_corrupted_flag']

extended_flags = np.array(allDS['eflags'].to_numpy(), dtype=int)
eflags = np.array(['{:016b}'.format(val) for val in extended_flags])
rainCorrectionNotApplied = np.array([val[15] == '0' for val in eflags])
rainCorrectionApplied = np.array([val[4] == '1' for val in eflags])
significantRainCorrectionApplied = np.array([val[5] == '1' for val in eflags])
largeRainCorrectionApplied = np.array([val[12] == '1' for val in eflags])
rainMask = np.logical_or(rainCorrectionApplied, significantRainCorrectionApplied)
rainMask = np.logical_or(rainMask, largeRainCorrectionApplied)
rainMask = np.logical_or(rainMask, rainCorrectionNotApplied)

In [103]:
noRainMask = ~rainMask
mask1 = allDS.sel(TAO_TIME_INDEX=0)['WSPD_QC'].isin([1,2]).to_numpy()
mask2 = allDS.sel(TAO_TIME_INDEX=0)['WDIR_QC'].isin([1,2]).to_numpy()
mask3 = allDS.sel(TAO_TIME_INDEX=0)['SST_QC'].isin([1,2]).to_numpy()
mask4 = allDS.sel(TAO_TIME_INDEX=0)['RELH_QC'].isin([1,2]).to_numpy()
mask5 = allDS.sel(TAO_TIME_INDEX=0)['AIRT_QC'].isin([1,2]).to_numpy()

In [104]:
selectMask = np.logical_and(noRainMask, mask1)
selectMask = np.logical_and(selectMask, mask2)
selectMask = np.logical_and(selectMask, mask3)
selectMask = np.logical_and(selectMask, mask4)
selectMask = np.logical_and(selectMask, mask5)

In [105]:
selectMask

array([ True,  True,  True, ...,  True,  True,  True])

In [106]:
indices = selectMask.nonzero()[0]

In [107]:
goodDataSet = allDS.sel(count=indices)

In [108]:
goodDataSet = goodDataSet.rename_vars({'WSPD':'Wind Speed (TAO)',
                  'WDIR':'Wind Direction (TAO)',
                  'UWND': 'Zonal Wind Speed (TAO)',
                  'VWND': 'Meridional Wind Speed (TAO)',
                  'SST': 'Sea Surface Temperature (TAO)',
                  'RELH': 'Relative Humidity (TAO)',
                  'AIRT': 'Air Temperature (TAO)',
                  'WSPD_10N': 'Neutral Wind Speed at 10m (TAO)',
                  'WSPD_10': 'Wind Speed at 10m (TAO)',
                  'retrieved_wind_speed': 'Wind Speed (QuikSCAT)',
                  'retrieved_wind_direction': 'Wind Direction (QuikSCAT)'})

In [118]:
goodDataSet.to_netcdf('goodData_QSwithTAO.nc')

In [110]:
centerData = allDS.sel(TAO_TIME_INDEX = 0)
centerData = centerData.drop('TAO_TIME_INDEX')
indices = list(np.nonzero(~rainMask)[0])

goodData = centerData.isel(count=~rainMask)
goodData = goodData.isel(count = goodData["WSPD_QC"].isin([1,2]))
goodData = goodData.isel(count = goodData["WDIR_QC"].isin([1,2]))
goodData = goodData.isel(count = goodData["SST_QC"].isin([1,2]))
goodData = goodData.isel(count = goodData["RELH_QC"].isin([1,2]))
goodData = goodData.isel(count = goodData["AIRT_QC"].isin([1,2]))


print(len(goodData['count']))




156953


In [111]:
mask = np.logical_or(goodData['WSPD'] > 1000, goodData['WDIR'] > 3600)
#mask = np.logical_or(mask, abs(goodData['WSPD_10N'].to_numpy()) < 0.1)
mask = np.logical_or(mask, np.isnan(goodData['WSPD'].to_numpy()))
mask = np.logical_or(mask, np.isnan(goodData['WDIR'].to_numpy()))
mask = np.logical_or(mask, np.isnan(goodData['retrieved_wind_speed'].to_numpy()))
mask = np.logical_or(mask, np.isnan(goodData['retrieved_wind_direction'].to_numpy()))
print(np.sum(mask))
df = goodData.isel(count = ~mask)
#df = df.to_dataframe()

<xarray.DataArray 'WSPD' ()>
array(8226)


In [112]:
df

In [113]:
df = df.rename_vars({'WSPD':'Wind Speed (TAO)',
                  'WDIR':'Wind Direction (TAO)',
                  'UWND': 'Zonal Wind Speed (TAO)',
                  'VWND': 'Meridional Wind Speed (TAO)',
                  'SST': 'Sea Surface Temperature (TAO)',
                  'RELH': 'Relative Humidity (TAO)',
                  'AIRT': 'Air Temperature (TAO)',
                  'WSPD_10N': 'Neutral Wind Speed at 10m (TAO)',
                  'WSPD_10': 'Wind Speed at 10m (TAO)',
                  'retrieved_wind_speed': 'Wind Speed (QuikSCAT)',
                  'retrieved_wind_direction': 'Wind Direction (QuikSCAT)'})

In [114]:
df['Speed Difference (QuikSCAT - TAO)'] = df['Wind Speed (QuikSCAT)'] - df['Neutral Wind Speed at 10m (TAO)']

## change true degrees to degrees where 0 deg mean east and 90 deg mean north 
df['Wind Direction (TAO)'] = (-(df['Wind Direction (TAO)'] - 90.0) + 360)%360
df['Wind Direction (QuikSCAT)'] = (-(df['Wind Direction (QuikSCAT)'] - 90.0) + 360)%360

## calculate zonal and meridional winds 
df['Zonal Neutral Wind Speed (TAO)'] = df['Neutral Wind Speed at 10m (TAO)'] * np.cos(np.deg2rad(df['Wind Direction (TAO)']))
df['Meridional Neutral Wind Speed (TAO)'] = df['Neutral Wind Speed at 10m (TAO)'] * np.sin(np.deg2rad(df['Wind Direction (TAO)']))

df['Zonal Wind Speed (QuikSCAT)'] = df['Wind Speed (QuikSCAT)'] * np.cos(np.deg2rad(df['Wind Direction (QuikSCAT)']))
df['Meridional Wind Speed (QuikSCAT)'] = df['Wind Speed (QuikSCAT)'] * np.sin(np.deg2rad(df['Wind Direction (QuikSCAT)']))

## direction diff in range (-180,180)
dirDiff = ((df['Wind Direction (QuikSCAT)'] - df['Wind Direction (TAO)'])+360)%360
dirDiff = xr.where(dirDiff > 180, dirDiff-360, dirDiff)
df['Direction Difference (QuikSCAT - TAO)'] = dirDiff

df['Zonal Wind Speed Difference (QuikSCAT - TAO)'] = df['Zonal Wind Speed (QuikSCAT)'] - df['Zonal Neutral Wind Speed (TAO)']
df['Meridional Wind Speed Difference (QuikSCAT - TAO)'] = df['Meridional Wind Speed (QuikSCAT)'] - df['Meridional Neutral Wind Speed (TAO)']

for timeWindow in range(20,730,10):
    df[f'Speed Difference (QuikSCAT - TAO {timeWindow} min mean)'] = df['Wind Speed (QuikSCAT)'] - df[f'mean WSPD_10N {timeWindow}min']

    ## change true degrees to degrees where 0 deg mean east and 90 deg mean north 
    df[f'mean WDIR {timeWindow}min'] = (-(df[f'mean WDIR {timeWindow}min'] - 90.0) + 360)%360

    ## direction diff in range (-180,180)
    dirDiff = ((df['Wind Direction (QuikSCAT)'] - df[f'mean WDIR {timeWindow}min'])+360)%360
    dirDiff = xr.where(dirDiff > 180, dirDiff-360, dirDiff)
    df[f'Direction Difference (QuikSCAT - TAO {timeWindow} min mean)'] = dirDiff

    ### 
    df[f'Zonal Wind Speed Difference (QuikSCAT - TAO {timeWindow} min mean)'] = df['Zonal Wind Speed (QuikSCAT)'] - df[f'mean Ux_10N {timeWindow}min']
    df[f'Meridional Wind Speed Difference (QuikSCAT - TAO {timeWindow} min mean)'] = df['Zonal Wind Speed (QuikSCAT)'] - df[f'mean Vy_10N {timeWindow}min']

In [115]:
df

In [116]:
df.to_netcdf('rainFlagRemovedBuoyDataBadQualityRemovedMatchup.nc')

In [None]:
list(df.variables.keys())

In [17]:
len(df['count'])

148727

In [18]:
selAllDS = allDS.sel(count=df['count'])

In [19]:
selAllDS = selAllDS.rename_vars({'WSPD':'Wind Speed (TAO)',
                  'WDIR':'Wind Direction (TAO)',
                  'UWND': 'Zonal Wind Speed (TAO)',
                  'VWND': 'Meridional Wind Speed (TAO)',
                  'SST': 'Sea Surface Temperature (TAO)',
                  'RELH': 'Relative Humidity (TAO)',
                  'AIRT': 'Air Temperature (TAO)',
                  'WSPD_10N': 'Neutral Wind Speed at 10m (TAO)',
                  'WSPD_10': 'Wind Speed at 10m (TAO)',
                  'retrieved_wind_speed': 'Wind Speed (QuikSCAT)',
                  'retrieved_wind_direction': 'Wind Direction (QuikSCAT)'})

In [20]:
selAllDS['Speed Difference (QuikSCAT - TAO)'] = selAllDS['Wind Speed (QuikSCAT)'] - selAllDS['Wind Speed (TAO)']

selAllDS['Wind Direction (TAO)'] = (-(selAllDS['Wind Direction (TAO)'] - 90.0) + 360)%360
selAllDS['Wind Direction (QuikSCAT)'] = (-(selAllDS['Wind Direction (QuikSCAT)'] - 90.0) + 360)%360

selAllDS['Direction Difference (QuikSCAT - TAO 30 min mean)'] = selAllDS['Wind Direction (QuikSCAT)'] - selAllDS['mean WSPD 30min']
selAllDS['Direction Difference (QuikSCAT - TAO 30 min mean)'] = selAllDS['Wind Direction (QuikSCAT)'] - selAllDS['mean WDIR 30min']

selAllDS['Direction Difference (QuikSCAT - TAO 1 hr mean)'] = selAllDS['Wind Direction (QuikSCAT)'] - selAllDS['mean WSPD 60min']
selAllDS['Direction Difference (QuikSCAT - TAO 1 hr mean)'] = selAllDS['Wind Direction (QuikSCAT)'] - selAllDS['mean WDIR 60min']

selAllDS['Direction Difference (QuikSCAT - TAO 2 hr mean)'] = selAllDS['Wind Direction (QuikSCAT)'] - selAllDS['mean WSPD 120min']
selAllDS['Direction Difference (QuikSCAT - TAO 2 hr mean)'] = selAllDS['Wind Direction (QuikSCAT)'] - selAllDS['mean WDIR 120min']


selAllDS['Zonal Neutral Wind Speed at 10m (TAO)'] = selAllDS['Neutral Wind Speed at 10m (TAO)']*np.cos(np.deg2rad(selAllDS['Wind Direction (TAO)']))
selAllDS['Meridional Neutral Wind Speed at 10m (TAO)'] = selAllDS['Neutral Wind Speed at 10m (TAO)']*np.sin(np.deg2rad(selAllDS['Wind Direction (TAO)']))

selAllDS['Zonal Neutral Wind Speed at 10m (QuikSCAT)'] = selAllDS['Wind Speed (QuikSCAT)']*np.cos(np.deg2rad(selAllDS['Wind Direction (QuikSCAT)']))
selAllDS['Meridional Neutral Wind Speed at 10m (QuikSCAT)'] = selAllDS['Wind Speed (QuikSCAT)']*np.sin(np.deg2rad(selAllDS['Wind Direction (QuikSCAT)']))

selAllDS['Zonal Wind Speed Difference (QuikSCAT - TAO)'] = selAllDS['Zonal Neutral Wind Speed at 10m (QuikSCAT)'] - selAllDS['Zonal Neutral Wind Speed at 10m (TAO)']
selAllDS['Meridional Wind Speed Difference (QuikSCAT - TAO)'] = selAllDS['Meridional Neutral Wind Speed at 10m (QuikSCAT)'] - selAllDS['Meridional Neutral Wind Speed at 10m (TAO)']


In [21]:
selAllDS