In [1]:
from sklearn.cluster import KMeans
import numpy as np
import matplotlib.pyplot as plt

from matplotlib import cm
import matplotlib.cbook as cbook
import matplotlib.colors as colors

import pandas as pd
import xarray as xr
from netCDF4 import Dataset, num2date, date2num
from datetime import datetime, timedelta
import os

import seaborn as sns
import cmocean as cmocn

%matplotlib inline

In [3]:
ds= xr.open_dataset('/srv/data2/srai_poseidon/srai_poseidon/observation/SatelliteVsBuoy/downloads/Buoy/extractedGZ2/WINDS/T_09N_140W_xrr_MatchUp.nc')
ds

In [4]:
list(ds.keys())

['wvc_index',
 'model_speed',
 'model_dir',
 'ice_prob',
 'ice_age',
 'wvc_quality_flag',
 'wind_speed',
 'wind_dir',
 'bs_distance',
 'UWND',
 'VWND',
 'WDIR',
 'WDIR_DM',
 'WDIR_QC',
 'WSPD',
 'WSPD_DM',
 'WSPD_QC',
 'SST',
 'SST_DM',
 'SST_QC',
 'AIRT',
 'AIRT_DM',
 'AIRT_QC',
 'RELH',
 'RELH_DM',
 'RELH_QC',
 'WSPD_10N',
 'WSPD_10',
 'TAO_TIME',
 'mean WSPD 20min',
 'std. dev. WSPD 20min',
 'mean WSPD_10N 20min',
 'std. dev. WSPD_10N 20min',
 'mean Ux_10N 20min',
 'std. dev. Ux_10N 20min',
 'mean Vy_10N 20min',
 'std. dev. Vy_10N 20min',
 'mean WDIR 20min',
 'std. dev. WDIR 20min',
 'mean WSPD 30min',
 'std. dev. WSPD 30min',
 'mean WSPD_10N 30min',
 'std. dev. WSPD_10N 30min',
 'mean Ux_10N 30min',
 'std. dev. Ux_10N 30min',
 'mean Vy_10N 30min',
 'std. dev. Vy_10N 30min',
 'mean WDIR 30min',
 'std. dev. WDIR 30min',
 'mean WSPD 40min',
 'std. dev. WSPD 40min',
 'mean WSPD_10N 40min',
 'std. dev. WSPD_10N 40min',
 'mean Ux_10N 40min',
 'std. dev. Ux_10N 40min',
 'mean Vy_10N 40min

In [15]:
def getSubDF(ds, LAT, LON):
    subds = xr.Dataset()

    taoTimeVar = ['TAO_TIME']
    
    windVars = ['WDIR',
         'WDIR_QC',
         'WDIR_DM',
         'WSPD',
         'WSPD_QC',
         'WSPD_DM',
         'UWND',
         'VWND']

    sstVars = ['SST',
             'SST_QC',
             'SST_DM']
    
    airtVars = ['AIRT',
             'AIRT_QC',
             'AIRT_DM']
    
    rhVars = ['RELH',
             'RELH_QC',
             'RELH_DM']
    
    u10Vars =['WSPD_10N',
             'WSPD_10']

    statVars = ['mean WSPD 20min',
                 'std. dev. WSPD 20min',
                 'mean WSPD_10N 20min',
                 'std. dev. WSPD_10N 20min',
                 'mean Ux_10N 20min',
                 'std. dev. Ux_10N 20min',
                 'mean Vy_10N 20min',
                 'std. dev. Vy_10N 20min',
                 'mean WDIR 20min',
                 'std. dev. WDIR 20min',
                 'mean WSPD 30min',
                 'std. dev. WSPD 30min',
                 'mean WSPD_10N 30min',
                 'std. dev. WSPD_10N 30min',
                 'mean Ux_10N 30min',
                 'std. dev. Ux_10N 30min',
                 'mean Vy_10N 30min',
                 'std. dev. Vy_10N 30min',
                 'mean WDIR 30min',
                 'std. dev. WDIR 30min',
                 'mean WSPD 40min',
                 'std. dev. WSPD 40min',
                 'mean WSPD_10N 40min',
                 'std. dev. WSPD_10N 40min',
                 'mean Ux_10N 40min',
                 'std. dev. Ux_10N 40min',
                 'mean Vy_10N 40min',
                 'std. dev. Vy_10N 40min',
                 'mean WDIR 40min',
                 'std. dev. WDIR 40min',
                 'mean WSPD 50min',
                 'std. dev. WSPD 50min',
                 'mean WSPD_10N 50min',
                 'std. dev. WSPD_10N 50min',
                 'mean Ux_10N 50min',
                 'std. dev. Ux_10N 50min',
                 'mean Vy_10N 50min',
                 'std. dev. Vy_10N 50min',
                 'mean WDIR 50min',
                 'std. dev. WDIR 50min',
                 'mean WSPD 60min',
                 'std. dev. WSPD 60min',
                 'mean WSPD_10N 60min',
                 'std. dev. WSPD_10N 60min',
                 'mean Ux_10N 60min',
                 'std. dev. Ux_10N 60min',
                 'mean Vy_10N 60min',
                 'std. dev. Vy_10N 60min',
                 'mean WDIR 60min',
                 'std. dev. WDIR 60min',
                 'mean WSPD 70min',
                 'std. dev. WSPD 70min',
                 'mean WSPD_10N 70min',
                 'std. dev. WSPD_10N 70min',
                 'mean Ux_10N 70min',
                 'std. dev. Ux_10N 70min',
                 'mean Vy_10N 70min',
                 'std. dev. Vy_10N 70min',
                 'mean WDIR 70min',
                 'std. dev. WDIR 70min',
                 'mean WSPD 80min',
                 'std. dev. WSPD 80min',
                 'mean WSPD_10N 80min',
                 'std. dev. WSPD_10N 80min',
                 'mean Ux_10N 80min',
                 'std. dev. Ux_10N 80min',
                 'mean Vy_10N 80min',
                 'std. dev. Vy_10N 80min',
                 'mean WDIR 80min',
                 'std. dev. WDIR 80min',
                 'mean WSPD 90min',
                 'std. dev. WSPD 90min',
                 'mean WSPD_10N 90min',
                 'std. dev. WSPD_10N 90min',
                 'mean Ux_10N 90min',
                 'std. dev. Ux_10N 90min',
                 'mean Vy_10N 90min',
                 'std. dev. Vy_10N 90min',
                 'mean WDIR 90min',
                 'std. dev. WDIR 90min',
                 'mean WSPD 100min',
                 'std. dev. WSPD 100min',
                 'mean WSPD_10N 100min',
                 'std. dev. WSPD_10N 100min',
                 'mean Ux_10N 100min',
                 'std. dev. Ux_10N 100min',
                 'mean Vy_10N 100min',
                 'std. dev. Vy_10N 100min',
                 'mean WDIR 100min',
                 'std. dev. WDIR 100min',
                 'mean WSPD 110min',
                 'std. dev. WSPD 110min',
                 'mean WSPD_10N 110min',
                 'std. dev. WSPD_10N 110min',
                 'mean Ux_10N 110min',
                 'std. dev. Ux_10N 110min',
                 'mean Vy_10N 110min',
                 'std. dev. Vy_10N 110min',
                 'mean WDIR 110min',
                 'std. dev. WDIR 110min',
                 'mean WSPD 120min',
                 'std. dev. WSPD 120min',
                 'mean WSPD_10N 120min',
                 'std. dev. WSPD_10N 120min',
                 'mean Ux_10N 120min',
                 'std. dev. Ux_10N 120min',
                 'mean Vy_10N 120min',
                 'std. dev. Vy_10N 120min',
                 'mean WDIR 120min',
                 'std. dev. WDIR 120min']
    
    
    satVars = ['wvc_index',
             'model_speed',
             'model_dir',
             'ice_prob',
             'ice_age',
             'wvc_quality_flag',
             'wind_speed',
             'wind_dir',
             'bs_distance']

     
    
    for windVar in windVars:
        subds[windVar] = ds[windVar].sel(HEIGHT=4)
    subds.drop_vars('HEIGHT')
    for sstVar in sstVars:
        subds[sstVar] = ds[sstVar].sel(DEPTH=1)
    subds.drop_vars('DEPTH')
    for rhVar in rhVars:
        subds[rhVar] = ds[rhVar].sel(HEIGHT=3)
    subds.drop_vars('HEIGHT')
    for airtVar in airtVars:
        subds[airtVar] = ds[airtVar].sel(HEIGHT=3)
    subds.drop_vars('HEIGHT')
    for u10Var in u10Vars:
        subds[u10Var] = ds[u10Var].sel(HEIGHT=10)
    subds.drop_vars('HEIGHT')

    for statVar in statVars:
        subds[statVar] = ds[statVar]

    subds['LATITUDE'] = xr.DataArray(LAT * np.ones(len(subds['AS_TIME']), dtype = float), dims=['AS_TIME'])
    subds['LONGITUDE'] = xr.DataArray(LON * np.ones(len(subds['AS_TIME']), dtype = float), dims=['AS_TIME'])
    
    subds['TAO_TIME'] = ds['TAO_TIME']

    deployFileName = f'../../downloads/Buoy/extractedGZ2/WINDS/T_{lat:02d}{latUnits}_{lon:03d}{lonUnits}/T_{lat:02d}{latUnits}_{lon:03d}{lonUnits}_DeploymentDates.nc'
    ds2 = xr.open_dataset(deployFileName)
    startDates = np.array(ds2['startDate'])
    endDates = np.array(ds2['endDate'])

    depNum = np.zeros((len(subds['AS_TIME'])), dtype=int)
    c = 1
    for i in range(1,len(startDates)):
        mask = subds['AS_TIME'] >= startDates[i]
        mask *= subds['AS_TIME'] <= endDates[i]
        depNum[mask] = c
        c = c+1

    subds['Deployment index'] = xr.DataArray(depNum, dims=['AS_TIME'])
    
    
    satDS = ds[satVars]
    satDS = satDS.isel(NUMCELLS=0)

    
    
    returnDS = xr.merge((subds, satDS))

    
    
    return returnDS

In [16]:
latList = [-9, -8, -5, -2, 0, 2, 5, 8, 9]
lonList = [-95, -110, -125, -140, -155, -170, -180, 165]

ylen = len(latList)
xlen = len(lonList)

taskList = []

for latId  in range(ylen):
    for lonId in range(xlen):
        taskList.append([latList[latId], lonList[lonId]])

ntasks = len(taskList)


allDS = xr.Dataset()
dataCount = 0
fileCount = 0
for task in taskList:
    lat = task[0]
    lon = task[1]

    LAT = lat
    LON = lon

    if lat < 0:
        latUnits = 'S'
    else:
        latUnits = 'N'

    if lon < 0:
        lonUnits = 'W'
    else:
        lonUnits = 'E'

    LON = (LON+360)%360
    lat=abs(lat)
    lon=abs(lon)

    matchFname = f'../../downloads/Buoy/extractedGZ2/WINDS/T_{lat:02d}{latUnits}_{lon:03d}{lonUnits}_xrr_MatchUp.nc'
    #deployFileName = f'../../downloads/Buoy/extractedGZ/WINDS/T_{lat:02d}{latUnits}_{lon:03d}{lonUnits}/T_{lat:02d}{latUnits}_{lon:03d}{lonUnits}_DeploymentDates.nc'
    
    if os.path.isfile(matchFname):
        print(f'T_{lat:02d}{latUnits}_{lon:03d}{lonUnits}_xrr_MatchUp_2000.nc')
        rds = xr.open_dataset(matchFname)
        ds = getSubDF(rds, LAT, LON)
        AS_TIME = ds['AS_TIME'].to_numpy()
        ds = ds.rename({'AS_TIME':'count'})
        dataLen = len(ds['count'])
        print(dataCount, dataCount+dataLen)
        ds['count'] = xr.DataArray(pd.Series(np.arange(dataCount, dataCount+dataLen)), 
                                   dims = ['count'])
        ds['AS_TIME'] = xr.DataArray(AS_TIME, 
                                   dims = ['count'])
        dataCount += dataLen
        if fileCount == 0:
            allDS = ds
        else:
            allDS = xr.concat((allDS, ds), dim='count')
        fileCount += 1
        

T_08S_095W_xrr_MatchUp_2000.nc


ValueError: Dimensions {'NUM_CELLS'} do not exist. Expected one or more of Frozen({'AS_TIME': 1261, 'NUMCELLS': 3})

In [14]:
allDS = allDS.drop_vars(['HEIGHT','DEPTH','cross_track','ambiguities'])

ValueError: These variables cannot be found in this dataset: ['ambiguities', 'HEIGHT', 'DEPTH', 'cross_track']

In [7]:
allDS

In [8]:
extended_flag_meaning = ['rain_correction_not_applied_flag',
'correction_produced_negative_spd_flag',
'all_ambiguities_contribute_to_nudging_flag',
'large_rain_correction_flag',
'coastal_processing_applied_flag',
'lake_winds_flag',
'rain_nearby_flag',
'ice_nearby_flag',
'significant_rain_correction_flag',
'rain_correction_applied_flag',
'wind_retrieval_possibly_corrupted_flag']

extended_flags = np.array(allDS['eflags'].to_numpy(), dtype=int)
eflags = np.array(['{:016b}'.format(val) for val in extended_flags])
rainCorrectionNotApplied = np.array([val[15] == '0' for val in eflags])
rainCorrectionApplied = np.array([val[4] == '1' for val in eflags])
significantRainCorrectionApplied = np.array([val[5] == '1' for val in eflags])
largeRainCorrectionApplied = np.array([val[12] == '1' for val in eflags])
rainMask = np.logical_or(rainCorrectionApplied, significantRainCorrectionApplied)
rainMask = np.logical_or(rainMask, largeRainCorrectionApplied)
rainMask = np.logical_or(rainMask, rainCorrectionNotApplied)

In [9]:
np.sum(largeRainCorrectionApplied), np.sum(significantRainCorrectionApplied), np.sum(rainCorrectionApplied)

(7545, 7565, 7567)

In [10]:
centerData = allDS.sel(TAO_TIME_INDEX = 0)
centerData = centerData.drop('TAO_TIME_INDEX')
indices = list(np.nonzero(~rainMask)[0])

goodData = centerData.isel(count=~rainMask)
goodData = goodData.isel(count = goodData["WSPD_QC"].isin([1,2]))
goodData = goodData.isel(count = goodData["WDIR_QC"].isin([1,2]))
goodData = goodData.isel(count = goodData["SST_QC"].isin([1,2]))
goodData = goodData.isel(count = goodData["RELH_QC"].isin([1,2]))
goodData = goodData.isel(count = goodData["AIRT_QC"].isin([1,2]))


print(len(goodData['count']))




156953


In [11]:
mask = np.logical_or(goodData['WSPD'] > 1000, goodData['WDIR'] > 3600)
#mask = np.logical_or(mask, abs(goodData['WSPD_10N'].to_numpy()) < 0.1)
mask = np.logical_or(mask, np.isnan(goodData['WSPD'].to_numpy()))
mask = np.logical_or(mask, np.isnan(goodData['WDIR'].to_numpy()))
mask = np.logical_or(mask, np.isnan(goodData['retrieved_wind_speed'].to_numpy()))
mask = np.logical_or(mask, np.isnan(goodData['retrieved_wind_direction'].to_numpy()))
print(np.sum(mask))
df = goodData.isel(count = ~mask)
#df = df.to_dataframe()

<xarray.DataArray 'WSPD' ()>
array(8226)


In [12]:
df

In [13]:
df = df.rename_vars({'WSPD':'Wind Speed (TAO)',
                  'WDIR':'Wind Direction (TAO)',
                  'UWND': 'Zonal Wind Speed (TAO)',
                  'VWND': 'Meridional Wind Speed (TAO)',
                  'SST': 'Sea Surface Temperature (TAO)',
                  'RELH': 'Relative Humidity (TAO)',
                  'AIRT': 'Air Temperature (TAO)',
                  'WSPD_10N': 'Neutral Wind Speed at 10m (TAO)',
                  'WSPD_10': 'Wind Speed at 10m (TAO)',
                  'retrieved_wind_speed': 'Wind Speed (QuikSCAT)',
                  'retrieved_wind_direction': 'Wind Direction (QuikSCAT)'})

In [14]:
df['Speed Difference (QuikSCAT - TAO)'] = df['Wind Speed (QuikSCAT)'] - df['Neutral Wind Speed at 10m (TAO)']

## change true degrees to degrees where 0 deg mean east and 90 deg mean north 
df['Wind Direction (TAO)'] = (-(df['Wind Direction (TAO)'] - 90.0) + 360)%360
df['Wind Direction (QuikSCAT)'] = (-(df['Wind Direction (QuikSCAT)'] - 90.0) + 360)%360

## calculate zonal and meridional winds 
df['Zonal Neutral Wind Speed (TAO)'] = df['Neutral Wind Speed at 10m (TAO)'] * np.cos(np.deg2rad(df['Wind Direction (TAO)']))
df['Meridional Neutral Wind Speed (TAO)'] = df['Neutral Wind Speed at 10m (TAO)'] * np.sin(np.deg2rad(df['Wind Direction (TAO)']))

df['Zonal Wind Speed (QuikSCAT)'] = df['Wind Speed (QuikSCAT)'] * np.cos(np.deg2rad(df['Wind Direction (QuikSCAT)']))
df['Meridional Wind Speed (QuikSCAT)'] = df['Wind Speed (QuikSCAT)'] * np.sin(np.deg2rad(df['Wind Direction (QuikSCAT)']))

## direction diff in range (-180,180)
dirDiff = ((df['Wind Direction (QuikSCAT)'] - df['Wind Direction (TAO)'])+360)%360
dirDiff = xr.where(dirDiff > 180, dirDiff-360, dirDiff)
df['Direction Difference (QuikSCAT - TAO)'] = dirDiff

df['Zonal Wind Speed Difference (QuikSCAT - TAO)'] = df['Zonal Wind Speed (QuikSCAT)'] - df['Zonal Neutral Wind Speed (TAO)']
df['Meridional Wind Speed Difference (QuikSCAT - TAO)'] = df['Meridional Wind Speed (QuikSCAT)'] - df['Meridional Neutral Wind Speed (TAO)']

for timeWindow in range(20,130,10):
    df[f'Speed Difference (QuikSCAT - TAO {timeWindow} min mean)'] = df['Wind Speed (QuikSCAT)'] - df[f'mean WSPD_10N {timeWindow}min']

    ## change true degrees to degrees where 0 deg mean east and 90 deg mean north 
    df[f'mean WDIR {timeWindow}min'] = (-(df[f'mean WDIR {timeWindow}min'] - 90.0) + 360)%360

    ## direction diff in range (-180,180)
    dirDiff = ((df['Wind Direction (QuikSCAT)'] - df[f'mean WDIR {timeWindow}min'])+360)%360
    dirDiff = xr.where(dirDiff > 180, dirDiff-360, dirDiff)
    df[f'Direction Difference (QuikSCAT - TAO {timeWindow} min mean)'] = dirDiff

    ### 
    df[f'Zonal Wind Speed Difference (QuikSCAT - TAO {timeWindow} min mean)'] = df['Zonal Wind Speed (QuikSCAT)'] - df[f'mean Ux_10N {timeWindow}min']
    df[f'Meridional Wind Speed Difference (QuikSCAT - TAO {timeWindow} min mean)'] = df['Zonal Wind Speed (QuikSCAT)'] - df[f'mean Vy_10N {timeWindow}min']

In [15]:
df

In [16]:
df.to_netcdf('rainFlagRemovedBuoyDataBadQualityRemovedMatchup.nc')

In [None]:
len(df['count'])

In [None]:
selAllDS = allDS.sel(count=df['count'])

In [None]:
selAllDS = selAllDS.rename_vars({'WSPD':'Wind Speed (TAO)',
                  'WDIR':'Wind Direction (TAO)',
                  'UWND': 'Zonal Wind Speed (TAO)',
                  'VWND': 'Meridional Wind Speed (TAO)',
                  'SST': 'Sea Surface Temperature (TAO)',
                  'RELH': 'Relative Humidity (TAO)',
                  'AIRT': 'Air Temperature (TAO)',
                  'WSPD_10N': 'Neutral Wind Speed at 10m (TAO)',
                  'WSPD_10': 'Wind Speed at 10m (TAO)',
                  'retrieved_wind_speed': 'Wind Speed (QuikSCAT)',
                  'retrieved_wind_direction': 'Wind Direction (QuikSCAT)'})

In [None]:
selAllDS['Speed Difference (QuikSCAT - TAO)'] = selAllDS['Wind Speed (QuikSCAT)'] - selAllDS['Wind Speed (TAO)']

selAllDS['Wind Direction (TAO)'] = (-(selAllDS['Wind Direction (TAO)'] - 90.0) + 360)%360
selAllDS['Wind Direction (QuikSCAT)'] = (-(selAllDS['Wind Direction (QuikSCAT)'] - 90.0) + 360)%360

selAllDS['Direction Difference (QuikSCAT - TAO 30 min mean)'] = selAllDS['Wind Direction (QuikSCAT)'] - selAllDS['mean WSPD 30min']
selAllDS['Direction Difference (QuikSCAT - TAO 30 min mean)'] = selAllDS['Wind Direction (QuikSCAT)'] - selAllDS['mean WDIR 30min']

selAllDS['Direction Difference (QuikSCAT - TAO 1 hr mean)'] = selAllDS['Wind Direction (QuikSCAT)'] - selAllDS['mean WSPD 60min']
selAllDS['Direction Difference (QuikSCAT - TAO 1 hr mean)'] = selAllDS['Wind Direction (QuikSCAT)'] - selAllDS['mean WDIR 60min']

selAllDS['Direction Difference (QuikSCAT - TAO 2 hr mean)'] = selAllDS['Wind Direction (QuikSCAT)'] - selAllDS['mean WSPD 120min']
selAllDS['Direction Difference (QuikSCAT - TAO 2 hr mean)'] = selAllDS['Wind Direction (QuikSCAT)'] - selAllDS['mean WDIR 120min']


selAllDS['Zonal Neutral Wind Speed at 10m (TAO)'] = selAllDS['Neutral Wind Speed at 10m (TAO)']*np.cos(np.deg2rad(selAllDS['Wind Direction (TAO)']))
selAllDS['Meridional Neutral Wind Speed at 10m (TAO)'] = selAllDS['Neutral Wind Speed at 10m (TAO)']*np.sin(np.deg2rad(selAllDS['Wind Direction (TAO)']))

selAllDS['Zonal Neutral Wind Speed at 10m (QuikSCAT)'] = selAllDS['Wind Speed (QuikSCAT)']*np.cos(np.deg2rad(selAllDS['Wind Direction (QuikSCAT)']))
selAllDS['Meridional Neutral Wind Speed at 10m (QuikSCAT)'] = selAllDS['Wind Speed (QuikSCAT)']*np.sin(np.deg2rad(selAllDS['Wind Direction (QuikSCAT)']))

selAllDS['Zonal Wind Speed Difference (QuikSCAT - TAO)'] = selAllDS['Zonal Neutral Wind Speed at 10m (QuikSCAT)'] - selAllDS['Zonal Neutral Wind Speed at 10m (TAO)']
selAllDS['Meridional Wind Speed Difference (QuikSCAT - TAO)'] = selAllDS['Meridional Neutral Wind Speed at 10m (QuikSCAT)'] - selAllDS['Meridional Neutral Wind Speed at 10m (TAO)']


In [None]:
selAllDS