In [26]:
from sklearn.cluster import KMeans
import numpy as np
import matplotlib.pyplot as plt

from matplotlib import cm
import matplotlib.cbook as cbook
import matplotlib.colors as colors

import pandas as pd
import xarray as xr
from netCDF4 import Dataset, num2date, date2num
from datetime import datetime, timedelta
import os

import seaborn as sns
import cmocean as cmocn

%matplotlib inline

In [27]:
ds= xr.open_dataset('/srv/data2/srai_poseidon/srai_poseidon/observation/SatelliteVsBuoy/downloads/Buoy/extractedGZ/WINDS/T_08N_180WCOARE3p5_2000_2hrMeanVar_QS_Matchup.nc')
ds

In [28]:
ds.close()

In [29]:
list(ds.keys())

['retrieved_wind_speed',
 'retrieved_wind_direction',
 'rain_impact',
 'flags',
 'eflags',
 'nudge_wind_speed',
 'nudge_wind_direction',
 'retrieved_wind_speed_uncorrected',
 'cross_track_wind_speed_bias',
 'atmospheric_speed_bias',
 'wind_obj',
 'ambiguity_speed',
 'ambiguity_direction',
 'ambiguity_obj',
 'number_in_fore',
 'number_in_aft',
 'number_out_fore',
 'number_out_aft',
 'gmf_sst',
 'distance_from_coast',
 'exp_bias_wrt_oceanward_neighbors',
 'WDIR',
 'WDIR_QC',
 'WDIR_DM',
 'WSPD',
 'WSPD_QC',
 'WSPD_DM',
 'UWND',
 'VWND',
 'SST',
 'SST_QC',
 'SST_DM',
 'AIRT',
 'AIRT_QC',
 'AIRT_DM',
 'RELH',
 'RELH_QC',
 'RELH_DM',
 'WSPD_10N',
 'WSPD_10',
 'SST - AIRT',
 'cosWDIR',
 'sinWDIR',
 'U10N_x',
 'U10N_y',
 'mean_WSPD',
 'std_WSPD',
 'mean_cosWDIR',
 'std_cosWDIR',
 'mean_sinWDIR',
 'std_sinWDIR',
 'mean_WSPD_10N',
 'std_WSPD_10N',
 'mean_U10N_x',
 'std_U10N_x',
 'mean_U10N_y',
 'std_U10N_y',
 'mean_SST',
 'std_SST',
 'mean_AIRT',
 'std_AIRT',
 'mean_RELH',
 'std_RELH',
 'mean_S

In [30]:
statVars = ['mean_WSPD',
             'std_WSPD',
             'mean_WSPD_10N',
             'std_WSPD_10N',
             'mean_SST',
             'std_SST',
             'mean_AIRT',
             'std_AIRT',
             'mean_SST - AIRT',
             'std_SST - AIRT',
             'mean_RELH',
             'std_RELH',
             'mean_U10N_x',
             'std_U10N_x',
             'mean_U10N_y',
             'std_U10N_y',
             'mean_cosWDIR',
             'std_cosWDIR',
             'mean_sinWDIR',
             'std_sinWDIR']

In [7]:
def getSubDF(ds, LAT, LON):
    subds = xr.Dataset()

    taoTimeVar = ['TAO_TIME']
    
    windVars = ['WDIR',
         'WDIR_QC',
         'WDIR_DM',
         'WSPD',
         'WSPD_QC',
         'WSPD_DM',
         'UWND',
         'VWND']

    sstVars = ['SST',
             'SST_QC',
             'SST_DM']
    
    airtVars = ['AIRT',
             'AIRT_QC',
             'AIRT_DM']
    
    rhVars = ['RELH',
             'RELH_QC',
             'RELH_DM']
    
    u10Vars =['WSPD_10N',
             'WSPD_10']

    statVars = ['mean_WSPD',
             'std_WSPD',
             'mean_WSPD_10N',
             'std_WSPD_10N',
             'mean_SST',
             'std_SST',
             'mean_AIRT',
             'std_AIRT',
             'mean_SST - AIRT',
             'std_SST - AIRT',
             'mean_RELH',
             'std_RELH',
             'mean_U10N_x',
             'std_U10N_x',
             'mean_U10N_y',
             'std_U10N_y',
             'mean_cosWDIR',
             'std_cosWDIR',
             'mean_sinWDIR',
             'std_sinWDIR']
        
    
    satVars = ['retrieved_wind_speed',
             'retrieved_wind_direction',
             'rain_impact',
             'flags',
             'eflags',
             'nudge_wind_speed',
             'nudge_wind_direction',
             'retrieved_wind_speed_uncorrected',
             'cross_track_wind_speed_bias',
             'atmospheric_speed_bias',
             'wind_obj',
             'ambiguity_speed',
             'ambiguity_direction',
             'ambiguity_obj',
             'number_in_fore',
             'number_in_aft',
             'number_out_fore',
             'number_out_aft',
             'gmf_sst',
             'distance_from_coast',
             'exp_bias_wrt_oceanward_neighbors']

     
    
    for windVar in windVars:
        subds[windVar] = ds[windVar].sel(HEIGHT=4)
    subds.drop_vars('HEIGHT')
    for sstVar in sstVars:
        subds[sstVar] = ds[sstVar].sel(DEPTH=1)
    subds.drop_vars('DEPTH')
    for rhVar in rhVars:
        subds[rhVar] = ds[rhVar].sel(HEIGHT=3)
    subds.drop_vars('HEIGHT')
    for airtVar in airtVars:
        subds[airtVar] = ds[airtVar].sel(HEIGHT=3)
    subds.drop_vars('HEIGHT')
    for u10Var in u10Vars:
        subds[u10Var] = ds[u10Var].sel(HEIGHT=10)
    subds.drop_vars('HEIGHT')

    for statVar in statVars:
        subds[statVar] = ds[statVar]

    subds['LATITUDE'] = xr.DataArray(LAT * np.ones(len(subds['QS_TIME']), dtype = float), dims=['QS_TIME'])
    subds['LONGITUDE'] = xr.DataArray(LON * np.ones(len(subds['QS_TIME']), dtype = float), dims=['QS_TIME'])
    
    subds['TAO_TIME'] = ds['TAO_TIME']

    deployFileName = f'../../../downloads/Buoy/extractedGZ/WINDS/T_{lat:02d}{latUnits}_{lon:03d}{lonUnits}/T_{lat:02d}{latUnits}_{lon:03d}{lonUnits}_DeploymentDates.nc'
    ds2 = xr.open_dataset(deployFileName)
    startDates = np.array(ds2['startDate'])
    endDates = np.array(ds2['endDate'])

    depNum = np.zeros((len(subds['QS_TIME'])), dtype=int)
    c = 1
    for i in range(1,len(startDates)):
        mask = subds['QS_TIME'] >= startDates[i]
        mask *= subds['QS_TIME'] <= endDates[i]
        depNum[mask] = c
        c = c+1

    subds['Deployment index'] = xr.DataArray(depNum, dims=['QS_TIME'])
    
    
    satDS = ds[satVars]
    satDS = satDS.isel(cross_track=0, ambiguities=0)

    
    
    returnDS = xr.merge((subds, satDS))

    
    
    return returnDS

In [8]:
latList = [-9, -8, -5, -2, 0, 2, 5, 8, 9]
lonList = [-95, -110, -125, -140, -155, -170, -180, 165]

ylen = len(latList)
xlen = len(lonList)

taskList = []

for latId  in range(ylen):
    for lonId in range(xlen):
        taskList.append([latList[latId], lonList[lonId]])

ntasks = len(taskList)


allDS = xr.Dataset()
dataCount = 0
fileCount = 0
for task in taskList:
    lat = task[0]
    lon = task[1]

    LAT = lat
    LON = lon

    if lat < 0:
        latUnits = 'S'
    else:
        latUnits = 'N'

    if lon < 0:
        lonUnits = 'W'
    else:
        lonUnits = 'E'

    LON = (LON+360)%360
    lat=abs(lat)
    lon=abs(lon)

    matchFname = f'../../../downloads/Buoy/extractedGZ/WINDS/T_{lat:02d}{latUnits}_{lon:03d}{lonUnits}COARE3p5_2000_2hrMeanVar_QS_Matchup.nc'
    #deployFileName = f'../../downloads/Buoy/extractedGZ/WINDS/T_{lat:02d}{latUnits}_{lon:03d}{lonUnits}/T_{lat:02d}{latUnits}_{lon:03d}{lonUnits}_DeploymentDates.nc'
    
    if os.path.isfile(matchFname):
        print(f'T_{lat:02d}{latUnits}_{lon:03d}{lonUnits}_xrr_MatchUp_2000.nc')
        rds = xr.open_dataset(matchFname)
        ds = getSubDF(rds, LAT, LON)
        QS_TIME = ds['QS_TIME'].to_numpy()
        ds = ds.rename({'QS_TIME':'count'})
        dataLen = len(ds['count'])
        print(dataCount, dataCount+dataLen)
        ds['count'] = xr.DataArray(pd.Series(np.arange(dataCount, dataCount+dataLen)), 
                                   dims = ['count'])
        ds['QS_TIME'] = xr.DataArray(QS_TIME, 
                                   dims = ['count'])
        dataCount += dataLen
        if fileCount == 0:
            allDS = ds
        else:
            allDS = xr.concat((allDS, ds), dim='count')
        fileCount += 1
        

T_08S_095W_xrr_MatchUp_2000.nc
0 4033
T_08S_110W_xrr_MatchUp_2000.nc
4033 6764


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_08S_125W_xrr_MatchUp_2000.nc
6764 9758


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_08S_155W_xrr_MatchUp_2000.nc
9758 12737


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_08S_170W_xrr_MatchUp_2000.nc
12737 16617


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_08S_165E_xrr_MatchUp_2000.nc
16617 19615


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_05S_095W_xrr_MatchUp_2000.nc
19615 22560


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_05S_110W_xrr_MatchUp_2000.nc
22560 27032


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_05S_125W_xrr_MatchUp_2000.nc
27032 31411


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_05S_140W_xrr_MatchUp_2000.nc
31411 36134


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_05S_155W_xrr_MatchUp_2000.nc
36134 38694


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_05S_170W_xrr_MatchUp_2000.nc
38694 42723


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_05S_165E_xrr_MatchUp_2000.nc
42723 47407


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_02S_095W_xrr_MatchUp_2000.nc
47407 48639


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_02S_110W_xrr_MatchUp_2000.nc
48639 50581


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_02S_125W_xrr_MatchUp_2000.nc
50581 54805


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_02S_140W_xrr_MatchUp_2000.nc
54805 59259


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_02S_155W_xrr_MatchUp_2000.nc
59259 63139


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_02S_170W_xrr_MatchUp_2000.nc
63139 65593


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_02S_180W_xrr_MatchUp_2000.nc
65593 70429


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_02S_165E_xrr_MatchUp_2000.nc
70429 72271


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_00N_095W_xrr_MatchUp_2000.nc
72271 75460


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_00N_110W_xrr_MatchUp_2000.nc
75460 79607


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_00N_125W_xrr_MatchUp_2000.nc
79607 82583


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_00N_140W_xrr_MatchUp_2000.nc
82583 87857


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_00N_155W_xrr_MatchUp_2000.nc
87857 92792


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_00N_170W_xrr_MatchUp_2000.nc
92792 97030


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_00N_180W_xrr_MatchUp_2000.nc
97030 99893


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_00N_165E_xrr_MatchUp_2000.nc
99893 101719


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_02N_095W_xrr_MatchUp_2000.nc
101719 104264


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_02N_110W_xrr_MatchUp_2000.nc
104264 108119


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_02N_125W_xrr_MatchUp_2000.nc
108119 111404


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_02N_140W_xrr_MatchUp_2000.nc
111404 116862


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_02N_155W_xrr_MatchUp_2000.nc
116862 121972


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_02N_170W_xrr_MatchUp_2000.nc
121972 123663


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_02N_180W_xrr_MatchUp_2000.nc
123663 127456


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_02N_165E_xrr_MatchUp_2000.nc
127456 130512


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_05N_095W_xrr_MatchUp_2000.nc
130512 134174


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_05N_110W_xrr_MatchUp_2000.nc
134174 138104


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_05N_125W_xrr_MatchUp_2000.nc
138104 141610


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_05N_140W_xrr_MatchUp_2000.nc
141610 146862


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_05N_155W_xrr_MatchUp_2000.nc
146862 149844


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_05N_170W_xrr_MatchUp_2000.nc
149844 153490


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_05N_165E_xrr_MatchUp_2000.nc
153490 156406


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_08N_095W_xrr_MatchUp_2000.nc
156406 158825


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_08N_110W_xrr_MatchUp_2000.nc
158825 163936


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_08N_155W_xrr_MatchUp_2000.nc
163936 167377


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_08N_170W_xrr_MatchUp_2000.nc
167377 172162


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_08N_180W_xrr_MatchUp_2000.nc
172162 175198


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_08N_165E_xrr_MatchUp_2000.nc
175198 179330


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


T_09N_140W_xrr_MatchUp_2000.nc
179330 184319


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


In [9]:
allDS = allDS.drop_vars(['HEIGHT','DEPTH','cross_track','ambiguities'])

In [10]:
allDS

In [11]:
extended_flag_meaning = ['rain_correction_not_applied_flag',
'correction_produced_negative_spd_flag',
'all_ambiguities_contribute_to_nudging_flag',
'large_rain_correction_flag',
'coastal_processing_applied_flag',
'lake_winds_flag',
'rain_nearby_flag',
'ice_nearby_flag',
'significant_rain_correction_flag',
'rain_correction_applied_flag',
'wind_retrieval_possibly_corrupted_flag']

extended_flags = np.array(allDS['eflags'].to_numpy(), dtype=int)
eflags = np.array(['{:016b}'.format(val) for val in extended_flags])
rainCorrectionNotApplied = np.array([val[15] == '0' for val in eflags])
rainCorrectionApplied = np.array([val[4] == '1' for val in eflags])
significantRainCorrectionApplied = np.array([val[5] == '1' for val in eflags])
largeRainCorrectionApplied = np.array([val[12] == '1' for val in eflags])
rainMask = np.logical_or(rainCorrectionApplied, significantRainCorrectionApplied)
rainMask = np.logical_or(rainMask, largeRainCorrectionApplied)
rainMask = np.logical_or(rainMask, rainCorrectionNotApplied)

In [12]:
np.sum(rainMask)

27364

In [13]:
noRainMask = ~rainMask
mask1 = np.isnan(allDS['mean_WSPD'].to_numpy())
mask1 = np.logical_or(mask1, np.isnan(allDS['WSPD_10N'].to_numpy()))
mask1 = np.logical_or(mask1, np.isnan(allDS['WDIR'].to_numpy()))
mask1 = np.logical_or(mask1, np.isnan(allDS['retrieved_wind_speed'].to_numpy()))
mask1 = np.logical_or(mask1, np.isnan(allDS['retrieved_wind_direction'].to_numpy()))
# mask2 = np.isnan(allDS['mean_WDIR'].to_numpy())
# mask3 = np.isnan(allDS['mean_SST'].to_numpy())
# mask4 = np.isnan(allDS['mean_RELH'].to_numpy())
# mask5 = np.isnan(allDS['mean_AIRT'].to_numpy())

In [14]:
selectMask = np.logical_and(noRainMask, ~mask1)

In [15]:
np.sum(noRainMask), np.sum(selectMask)

(156955, 148300)

In [16]:
indices = selectMask.nonzero()[0]

In [17]:
goodDataSet = allDS.sel(count=indices)

In [18]:
stdLabels = ['std_WSPD', 'std_cosWDIR', 'std_sinWDIR', 'std_SST', 'std_AIRT', 'std_SST - AIRT', 'std_RELH']
for label in stdLabels:
    mask = np.isnan(goodDataSet[label].to_numpy())
    print(label, np.sum(mask))
    goodDataSet[label] = xr.where(mask, 0.0, goodDataSet[label])

std_WSPD 0
std_cosWDIR 5
std_sinWDIR 0
std_SST 972
std_AIRT 0
std_SST - AIRT 0
std_RELH 5


In [19]:
goodDataSet = goodDataSet.rename_vars({'WSPD':'Wind Speed (TAO)',
                  'WDIR':'Wind Direction (TAO)',
                  'UWND': 'Zonal Wind Speed (TAO)',
                  'VWND': 'Meridional Wind Speed (TAO)',
                  'SST': 'Sea Surface Temperature (TAO)',
                  'RELH': 'Relative Humidity (TAO)',
                  'AIRT': 'Air Temperature (TAO)',
                  'WSPD_10N': 'Neutral Wind Speed at 10m (TAO)',
                  'WSPD_10': 'Wind Speed at 10m (TAO)',
                  'retrieved_wind_speed': 'Wind Speed (QuikSCAT)',
                  'retrieved_wind_direction': 'Wind Direction (QuikSCAT)'})

In [20]:
df = goodDataSet.copy()

In [21]:
df['Speed Difference (QuikSCAT - TAO)'] = df['Wind Speed (QuikSCAT)'] - df['Neutral Wind Speed at 10m (TAO)']

## change true degrees to degrees where 0 deg mean east and 90 deg mean north 
#df['Wind Direction (TAO)'] = (-(df['Wind Direction (TAO)'] - 90.0) + 360)%360  ## TAO wind direction already fixed
df['cos(Wind Direction (TAO)'] = np.cos(np.deg2rad(df['Wind Direction (TAO)']))
df['sin(Wind Direction (TAO)'] = np.sin(np.deg2rad(df['Wind Direction (TAO)']))
df['Wind Direction (QuikSCAT)'] = (-(df['Wind Direction (QuikSCAT)'] - 90.0) + 360)%360
df['cos(Wind Direction (QuikSCAT)'] = np.cos(np.deg2rad(df['Wind Direction (QuikSCAT)']))
df['sin(Wind Direction (QuikSCAT)'] = np.sin(np.deg2rad(df['Wind Direction (QuikSCAT)']))

## calculate zonal and meridional winds 
df['Zonal Neutral Wind Speed (TAO)'] = df['Neutral Wind Speed at 10m (TAO)'] * np.cos(np.deg2rad(df['Wind Direction (TAO)']))
df['Meridional Neutral Wind Speed (TAO)'] = df['Neutral Wind Speed at 10m (TAO)'] * np.sin(np.deg2rad(df['Wind Direction (TAO)']))

df['Zonal Wind Speed (QuikSCAT)'] = df['Wind Speed (QuikSCAT)'] * np.cos(np.deg2rad(df['Wind Direction (QuikSCAT)']))
df['Meridional Wind Speed (QuikSCAT)'] = df['Wind Speed (QuikSCAT)'] * np.sin(np.deg2rad(df['Wind Direction (QuikSCAT)']))

## direction diff in range (-180,180)
dirDiff = ((df['Wind Direction (QuikSCAT)'] - df['Wind Direction (TAO)'])+360)%360
dirDiff = xr.where(dirDiff > 180, dirDiff-360, dirDiff)
df['Direction Difference (QuikSCAT - TAO)'] = dirDiff
df['cos(Direction Difference (QuikSCAT - TAO))'] = np.cos(np.deg2rad(dirDiff))
df['sin(Direction Difference (QuikSCAT - TAO))'] = np.sin(np.deg2rad(dirDiff))

df['Zonal Wind Speed Difference (QuikSCAT - TAO)'] = df['Zonal Wind Speed (QuikSCAT)'] - df['Zonal Neutral Wind Speed (TAO)']
df['Meridional Wind Speed Difference (QuikSCAT - TAO)'] = df['Meridional Wind Speed (QuikSCAT)'] - df['Meridional Neutral Wind Speed (TAO)']

# for timeWindow in range(20,730,10):
#     df[f'Speed Difference (QuikSCAT - TAO {timeWindow} min mean)'] = df['Wind Speed (QuikSCAT)'] - df[f'mean WSPD_10N {timeWindow}min']

#     ## change true degrees to degrees where 0 deg mean east and 90 deg mean north 
#     df[f'mean WDIR {timeWindow}min'] = (-(df[f'mean WDIR {timeWindow}min'] - 90.0) + 360)%360

#     ## direction diff in range (-180,180)
#     dirDiff = ((df['Wind Direction (QuikSCAT)'] - df[f'mean WDIR {timeWindow}min'])+360)%360
#     dirDiff = xr.where(dirDiff > 180, dirDiff-360, dirDiff)
#     df[f'Direction Difference (QuikSCAT - TAO {timeWindow} min mean)'] = dirDiff

#     ### 
#     df[f'Zonal Wind Speed Difference (QuikSCAT - TAO {timeWindow} min mean)'] = df['Zonal Wind Speed (QuikSCAT)'] - df[f'mean Ux_10N {timeWindow}min']
#     df[f'Meridional Wind Speed Difference (QuikSCAT - TAO {timeWindow} min mean)'] = df['Zonal Wind Speed (QuikSCAT)'] - df[f'mean Vy_10N {timeWindow}min']

In [22]:
df['mean Vector Neutral Wind Speed (TAO)'] = np.sqrt(df['mean_U10N_x']**2 + df['mean_U10N_y']**2)

In [23]:
list(df.keys())

['Wind Direction (TAO)',
 'WDIR_QC',
 'WDIR_DM',
 'Wind Speed (TAO)',
 'WSPD_QC',
 'WSPD_DM',
 'Zonal Wind Speed (TAO)',
 'Meridional Wind Speed (TAO)',
 'Sea Surface Temperature (TAO)',
 'SST_QC',
 'SST_DM',
 'Relative Humidity (TAO)',
 'RELH_QC',
 'RELH_DM',
 'Air Temperature (TAO)',
 'AIRT_QC',
 'AIRT_DM',
 'Neutral Wind Speed at 10m (TAO)',
 'Wind Speed at 10m (TAO)',
 'mean_WSPD',
 'std_WSPD',
 'mean_WSPD_10N',
 'std_WSPD_10N',
 'mean_SST',
 'std_SST',
 'mean_AIRT',
 'std_AIRT',
 'mean_SST - AIRT',
 'std_SST - AIRT',
 'mean_RELH',
 'std_RELH',
 'mean_U10N_x',
 'std_U10N_x',
 'mean_U10N_y',
 'std_U10N_y',
 'mean_cosWDIR',
 'std_cosWDIR',
 'mean_sinWDIR',
 'std_sinWDIR',
 'LATITUDE',
 'LONGITUDE',
 'TAO_TIME',
 'Deployment index',
 'Wind Speed (QuikSCAT)',
 'Wind Direction (QuikSCAT)',
 'rain_impact',
 'flags',
 'eflags',
 'nudge_wind_speed',
 'nudge_wind_direction',
 'retrieved_wind_speed_uncorrected',
 'cross_track_wind_speed_bias',
 'atmospheric_speed_bias',
 'wind_obj',
 'ambigu

In [24]:
df['Speed Difference (QuikSCAT - TAO 2hr mean)'] = df['Wind Speed (QuikSCAT)'] - df['mean_WSPD_10N']
df['Wind Direction (TAO 2hr mean)'] = np.rad2deg(np.arctan2(df['mean_U10N_y'],df['mean_U10N_x']))
dirDiff = df['Wind Direction (QuikSCAT)'] - df['Wind Direction (TAO 2hr mean)']
dirDiff = xr.where(dirDiff > 180, dirDiff-360, dirDiff)
df['Direction Difference (QuikSCAT - TAO 2hr mean)'] = dirDiff

df['cos(Direction Difference (QuikSCAT - TAO 2hr mean))'] = np.cos(np.deg2rad(df['Wind Direction (QuikSCAT)'] - df['Wind Direction (TAO 2hr mean)']))
df['sin(Direction Difference (QuikSCAT - TAO 2hr mean))'] = np.sin(np.deg2rad(df['Wind Direction (QuikSCAT)'] - df['Wind Direction (TAO 2hr mean)']))

df['Zonal Wind Speed Difference (QuikSCAT - TAO 2hr mean)'] = df['Zonal Wind Speed (QuikSCAT)'] - df['mean_U10N_x']
df['Meridional Wind Speed Difference (QuikSCAT - TAO 2hr mean)'] = df['Meridional Wind Speed (QuikSCAT)'] - df['mean_U10N_y']

In [25]:
df.to_netcdf('goodData_QSwithTAO_with2hrStatsConv.nc')