In [1]:
from sklearn.cluster import KMeans
import numpy as np
import matplotlib.pyplot as plt

from matplotlib import cm
import matplotlib.cbook as cbook
import matplotlib.colors as colors

import pandas as pd
import xarray as xr
from netCDF4 import Dataset, num2date, date2num
from datetime import datetime, timedelta
import os

import seaborn as sns
import cmocean as cmocn

%matplotlib inline

In [2]:
fname = '/srv/data2/srai_poseidon/srai_poseidon/observation/SatelliteVsBuoy/downloads/CCMP/atBuoyLocs/CCMP_and_Buoy_09N_140W_xrr_MatchUp_120_mins_2000.nc'
testDs = xr.open_dataset(fname)

In [3]:
testDs

In [8]:
statVars =[ 'mean_WSPD', 
            'std_WSPD', 
            'mean_WSPD_10N', 
            'std_WSPD_10N', 
            'mean_U10N_x', 
            'std_U10N_x',
            'mean_U10N_y', 
            'std_U10N_y',
            'mean_cosWDIR',
            'std_cosWDIR',
            'mean_sinWDIR',
            'std_sinWDIR',
            'mean_SST',
            'std_SST',
            'mean_AIRT',
            'std_AIRT',
            'mean_RELH',
            'std_RELH',
            'mean_SST - AIRT',
            'std_SST - AIRT']

In [26]:
def getSubDF(ds, LAT, LON):
    subds = xr.Dataset()

    taoTimeVar = ['TAO_TIME']
    
    windVars = ['WDIR',
         'WDIR_QC',
         'WDIR_DM',
         'WSPD',
         'WSPD_QC',
         'WSPD_DM',
         'UWND',
         'VWND']

    sstVars = ['SST',
             'SST_QC',
             'SST_DM']
    
    airtVars = ['AIRT',
             'AIRT_QC',
             'AIRT_DM']
    
    rhVars = ['RELH',
             'RELH_QC',
             'RELH_DM']
    
    u10Vars =['WSPD_10N',
             'WSPD_10']
        
    otherVars = ['SST - AIRT']
    
    satVars = ['uwnd',
               'vwnd',
               'ws',
               'nobs'] 
    
    for windVar in windVars:
        subds[windVar] = ds[windVar].sel(HEIGHT=4)
    subds.drop_vars('HEIGHT')
    
    for sstVar in sstVars:
        subds[sstVar] = ds[sstVar].sel(DEPTH=1)
    subds.drop_vars('DEPTH')
    
    for rhVar in rhVars:
        subds[rhVar] = ds[rhVar].sel(HEIGHT=3)
    subds.drop_vars('HEIGHT')
    
    for airtVar in airtVars:
        subds[airtVar] = ds[airtVar].sel(HEIGHT=3)
    subds.drop_vars('HEIGHT')
    
    for u10Var in u10Vars:
        subds[u10Var] = ds[u10Var].sel(HEIGHT=10)
    subds.drop_vars('HEIGHT')

    for otherVar in otherVars:
        subds[otherVar] = ds[otherVar]

    for statVar in statVars:
        subds[statVar] = ds[statVar]

    subds['LATITUDE'] = xr.DataArray(LAT * np.ones(len(subds['CC_TIME']), dtype = float), dims=['CC_TIME'])
    subds['LONGITUDE'] = xr.DataArray(LON * np.ones(len(subds['CC_TIME']), dtype = float), dims=['CC_TIME'])
    
    subds['TAO_TIME'] = ds['TAO_TIME']

    deployFileName = f'../../downloads/Buoy/extractedGZ/WINDS/T_{lat:02d}{latUnits}_{lon:03d}{lonUnits}/T_{lat:02d}{latUnits}_{lon:03d}{lonUnits}_DeploymentDates.nc'
    ds2 = xr.open_dataset(deployFileName)
    startDates = np.array(ds2['startDate'])
    endDates = np.array(ds2['endDate'])

    depNum = np.zeros((len(subds['CC_TIME'])), dtype=int)
    c = 1
    for i in range(1,len(startDates)):
        mask = subds['CC_TIME'] >= startDates[i]
        mask *= subds['CC_TIME'] <= endDates[i]
        depNum[mask] = c
        c = c+1

    subds['Deployment index'] = xr.DataArray(depNum, dims=['CC_TIME'])
    
    
    satDS = ds[satVars]
    

    returnDS = xr.merge((subds, satDS))

    
    
    return returnDS

In [27]:
latList = [-9, -8, -5, -2, 0, 2, 5, 8, 9]
lonList = [-95, -110, -125, -140, -155, -170, -180, 165]

ylen = len(latList)
xlen = len(lonList)

taskList = []

for latId  in range(ylen):
    for lonId in range(xlen):
        taskList.append([latList[latId], lonList[lonId]])

ntasks = len(taskList)


allDS = xr.Dataset()
dataCount = 0
fileCount = 0
for task in taskList:
    lat = task[0]
    lon = task[1]

    LAT = lat
    LON = lon

    if lat < 0:
        latUnits = 'S'
    else:
        latUnits = 'N'

    if lon < 0:
        lonUnits = 'W'
    else:
        lonUnits = 'E'

    LON = (LON+360)%360
    lat=abs(lat)
    lon=abs(lon)

    
    matchFname = f'../../downloads/CCMP/atBuoyLocs/CCMP_and_Buoy_{lat:02d}{latUnits}_{lon:03d}{lonUnits}_xrr_MatchUp_120_mins_2000.nc'
    
    if os.path.isfile(matchFname):
        print(f'T_{lat:02d}{latUnits}_{lon:03d}{lonUnits}_xrr_MatchUp_2000.nc')
        rds = xr.open_dataset(matchFname)
        ds = getSubDF(rds, LAT, LON)
        CC_TIME = ds['CC_TIME'].to_numpy()
        ds = ds.rename({'CC_TIME':'count'})
        dataLen = len(ds['count'])
        print(dataCount, dataCount+dataLen)
        ds['count'] = xr.DataArray(pd.Series(np.arange(dataCount, dataCount+dataLen)), 
                                   dims = ['count'])
        ds['CC_TIME'] = xr.DataArray(CC_TIME, 
                                   dims = ['count'])
        dataCount += dataLen
        if fileCount == 0:
            allDS = ds
        else:
            allDS = xr.concat((allDS, ds), dim='count')
        fileCount += 1
    # else:
    #     print(matchFname + ' NOT FOUND')


T_08S_095W_xrr_MatchUp_2000.nc


KeyError: "'HEIGHT' is not a valid dimension or coordinate"

In [11]:
allDS = allDS.drop_vars(['HEIGHT','DEPTH'])

In [12]:
goodDataSet = allDS.rename_vars({'WSPD':'Wind Speed (TAO)',
                  'WDIR':'Wind Direction (TAO)',
                  'UWND': 'Zonal Wind Speed (TAO)',
                  'VWND': 'Meridional Wind Speed (TAO)',
                  'SST': 'Sea Surface Temperature (TAO)',
                  'RELH': 'Relative Humidity (TAO)',
                  'AIRT': 'Air Temperature (TAO)',
                  'WSPD_10N': 'Neutral Wind Speed at 10m (TAO)',
                  'WSPD_10': 'Wind Speed at 10m (TAO)',
                  'uwnd': 'Zonal Wind Speed (CCMP)',
                  'vwnd': 'Meridional Wind Speed (CCMP)',
                  'ws': 'Wind Speed (CCMP)'})

In [14]:
goodData = allDS.isel(count = allDS["WSPD_QC"].isin([1,2]))
goodData = goodData.isel(count = goodData["WDIR_QC"].isin([1,2]))
goodData = goodData.isel(count = goodData["SST_QC"].isin([1,2]))
goodData = goodData.isel(count = goodData["RELH_QC"].isin([1,2]))
goodData = goodData.isel(count = goodData["AIRT_QC"].isin([1,2]))


print(len(goodData['count']))



283881


In [15]:
mask = np.isnan(goodData['WSPD'].to_numpy())
mask = np.logical_or(mask, np.isnan(goodData['WDIR'].to_numpy()))
mask = np.logical_or(mask, np.isnan(goodData['ws'].to_numpy()))
mask = np.logical_or(mask, np.isnan(goodData['uwnd'].to_numpy()))
mask = np.logical_or(mask, np.isnan(goodData['vwnd'].to_numpy()))
mask = np.logical_or(goodData['WSPD'] > 1000, mask)
mask = np.logical_or(mask, goodData['WDIR'] > 3600)
mask = np.logical_or(mask, np.isnan(goodData['WSPD'].to_numpy()))
mask = np.logical_or(mask, np.isnan(goodData['WDIR'].to_numpy()))
print(np.sum(mask))
df = goodData.isel(count = ~mask)
#df = df.to_dataframe()

<xarray.DataArray 'WSPD' ()>
array(108)


In [16]:
mask

In [17]:
df

In [19]:
df = df.rename_vars({'WSPD':'Wind Speed (TAO)',
                     'WDIR':'Wind Direction (TAO)',
                     'UWND': 'Zonal Wind Speed (TAO)',
                     'VWND': 'Meridional Wind Speed (TAO)',
                     'SST': 'Sea Surface Temperature (TAO)',
                     'RELH': 'Relative Humidity (TAO)',
                     'AIRT': 'Air Temperature (TAO)',
                     'WSPD_10N': 'Neutral Wind Speed at 10m (TAO)',
                     'WSPD_10': 'Wind Speed at 10m (TAO)',
                     'uwnd': 'Zonal Wind Speed (CCMP)',
                     'vwnd': 'Meridional Wind Speed (CCMP)',
                     'ws'  : 'Wind Speed (CCMP)'})

In [20]:
df['Wind Direction (CCMP)'] = np.rad2deg(np.arctan2(df['Meridional Wind Speed (CCMP)'], df['Zonal Wind Speed (CCMP)']))%360
df['Speed Difference (CCMP - TAO)'] = df['Wind Speed (CCMP)'] - df['Neutral Wind Speed at 10m (TAO)']

# ## change true degrees to degrees where 0 deg mean east and 90 deg mean north 
### already done
# df['Wind Direction (TAO)'] = (-(df['Wind Direction (TAO)'] - 90.0) + 360)%360

## calculate zonal and meridional winds 
df['Zonal Neutral Wind Speed (TAO)'] = df['Neutral Wind Speed at 10m (TAO)'] * np.cos(np.deg2rad(df['Wind Direction (TAO)']))
df['Meridional Neutral Wind Speed (TAO)'] = df['Neutral Wind Speed at 10m (TAO)'] * np.sin(np.deg2rad(df['Wind Direction (TAO)']))

## direction diff in range (-180,180)
dirDiff = ((df['Wind Direction (CCMP)'] - df['Wind Direction (TAO)'])+360)%360
dirDiff = xr.where(dirDiff > 180, dirDiff-360, dirDiff)
df['Direction Difference (CCMP - TAO)'] = dirDiff

df['Zonal Wind Speed Difference (CCMP - TAO)'] = df['Zonal Wind Speed (CCMP)'] - df['Zonal Neutral Wind Speed (TAO)']
df['Meridional Wind Speed Difference (CCMP - TAO)'] = df['Meridional Wind Speed (CCMP)'] - df['Meridional Neutral Wind Speed (TAO)']

# for timeWindow in range(20,130,10):
#     df[f'Speed Difference (CCMP - TAO {timeWindow} min mean)'] = df['Wind Speed (CCMP)'] - df[f'mean WSPD_10N {timeWindow}min']

#     ## change true degrees to degrees where 0 deg mean east and 90 deg mean north 
#     df[f'mean WDIR {timeWindow}min'] = (-(df[f'mean WDIR {timeWindow}min'] - 90.0) + 360)%360

#     ## direction diff in range (-180,180)
#     dirDiff = ((df['Wind Direction (CCMP)'] - df[f'mean WDIR {timeWindow}min'])+360)%360
#     dirDiff = xr.where(dirDiff > 180, dirDiff-360, dirDiff)
#     df[f'Direction Difference (CCMP - TAO {timeWindow} min mean)'] = dirDiff

#     ### 
#     df[f'Zonal Wind Speed Difference (CCMP - TAO {timeWindow} min mean)'] = df['Zonal Wind Speed (CCMP)'] - df[f'mean Ux_10N {timeWindow}min']
#     df[f'Meridional Wind Speed Difference (CCMP - TAO {timeWindow} min mean)'] = df['Zonal Wind Speed (CCMP)'] - df[f'mean Vy_10N {timeWindow}min']

In [22]:
df.to_netcdf('CCMPandBuoyDataBadQualityRemovedMatchup.nc')