In [2]:
from sklearn.cluster import KMeans
import numpy as np
import matplotlib.pyplot as plt

from matplotlib import cm
import matplotlib.cbook as cbook
import matplotlib.colors as colors

import pandas as pd
import xarray as xr
from netCDF4 import Dataset, num2date, date2num
from datetime import datetime, timedelta
import os

import seaborn as sns
#import cmocean as cmocn

%matplotlib inline

In [3]:
ds= xr.open_dataset('/srv/data2/srai_poseidon/srai_poseidon/observation/SatelliteVsBuoy/downloads/TRMM_data/TRMM_byTAOpos/T_09N_140W_xrr_MatchUp_TRMM_2000.nc')
ds

In [4]:
list(ds.keys())

['nearSurfRain',
 'e_SurfRain',
 'rainAve',
 'rainFlag',
 'rainType',
 'rain',
 'Latitude',
 'Longitude',
 'WDIR',
 'WDIR_QC',
 'WDIR_DM',
 'WSPD',
 'WSPD_QC',
 'WSPD_DM',
 'UWND',
 'VWND',
 'SST',
 'SST_QC',
 'SST_DM',
 'AIRT',
 'AIRT_QC',
 'AIRT_DM',
 'RELH',
 'RELH_QC',
 'RELH_DM',
 'WSPD_10N',
 'WSPD_10',
 'TAO_TIME',
 'mean WSPD 20min',
 'std. dev. WSPD 20min',
 'mean WSPD_10N 20min',
 'std. dev. WSPD_10N 20min',
 'mean Ux_10N 20min',
 'std. dev. Ux_10N 20min',
 'mean Vy_10N 20min',
 'std. dev. Vy_10N 20min',
 'mean WDIR 20min',
 'std. dev. WDIR 20min',
 'mean cosWDIR 20min',
 'std. dev. cosWDIR 20min',
 'mean sinWDIR 20min',
 'std. dev. sinWDIR 20min',
 'mean WSPD 30min',
 'std. dev. WSPD 30min',
 'mean WSPD_10N 30min',
 'std. dev. WSPD_10N 30min',
 'mean Ux_10N 30min',
 'std. dev. Ux_10N 30min',
 'mean Vy_10N 30min',
 'std. dev. Vy_10N 30min',
 'mean WDIR 30min',
 'std. dev. WDIR 30min',
 'mean cosWDIR 30min',
 'std. dev. cosWDIR 30min',
 'mean sinWDIR 30min',
 'std. dev. sinWDI

In [5]:
statVars = []
for timeWindow in range(20,120,10):
    statVarList= [f'mean WSPD {timeWindow}min',
             f'std. dev. WSPD {timeWindow}min',
             f'mean WSPD_10N {timeWindow}min',
             f'std. dev. WSPD_10N {timeWindow}min',
             f'mean Ux_10N {timeWindow}min',
             f'std. dev. Ux_10N {timeWindow}min',
             f'mean Vy_10N {timeWindow}min',
             f'std. dev. Vy_10N {timeWindow}min',
             f'mean WDIR {timeWindow}min',
             f'std. dev. WDIR {timeWindow}min',
             f'mean cosWDIR {timeWindow}min',
             f'std. dev. cosWDIR {timeWindow}min',
             f'mean sinWDIR {timeWindow}min',
             f'std. dev. sinWDIR {timeWindow}min']
    statVars = statVars + statVarList

In [9]:
def getSubDF(ds, LAT, LON):
    subds = xr.Dataset()

    taoTimeVar = ['TAO_TIME']
    
    windVars = ['WDIR',
         'WDIR_QC',
         'WDIR_DM',
         'WSPD',
         'WSPD_QC',
         'WSPD_DM',
         'UWND',
         'VWND']

    sstVars = ['SST',
             'SST_QC',
             'SST_DM']
    
    airtVars = ['AIRT',
             'AIRT_QC',
             'AIRT_DM']
    
    rhVars = ['RELH',
             'RELH_QC',
             'RELH_DM']
    
    u10Vars =['WSPD_10N',
             'WSPD_10']

    statVars = []
    for timeWindow in range(20,120,10):
        statVarList= [f'mean WSPD {timeWindow}min',
                 f'std. dev. WSPD {timeWindow}min',
                 f'mean WSPD_10N {timeWindow}min',
                 f'std. dev. WSPD_10N {timeWindow}min',
                 f'mean Ux_10N {timeWindow}min',
                 f'std. dev. Ux_10N {timeWindow}min',
                 f'mean Vy_10N {timeWindow}min',
                 f'std. dev. Vy_10N {timeWindow}min',
                 f'mean WDIR {timeWindow}min',
                 f'std. dev. WDIR {timeWindow}min',
                 f'mean cosWDIR {timeWindow}min',
                 f'std. dev. cosWDIR {timeWindow}min',
                 f'mean sinWDIR {timeWindow}min',
                 f'std. dev. sinWDIR {timeWindow}min']
        statVars = statVars + statVarList
        
    
    satVars = ['nearSurfRain',
                 'e_SurfRain']

    
    for windVar in windVars:
        subds[windVar] = ds[windVar].sel(HEIGHT=4)
    subds.drop_vars('HEIGHT')
    for sstVar in sstVars:
        subds[sstVar] = ds[sstVar].sel(DEPTH=1)
    subds.drop_vars('DEPTH')
    for rhVar in rhVars:
        subds[rhVar] = ds[rhVar].sel(HEIGHT=3)
    subds.drop_vars('HEIGHT')
    for airtVar in airtVars:
        subds[airtVar] = ds[airtVar].sel(HEIGHT=3)
    subds.drop_vars('HEIGHT')
    for u10Var in u10Vars:
        subds[u10Var] = ds[u10Var].sel(HEIGHT=10)
    subds.drop_vars('HEIGHT')

    for statVar in statVars:
        subds[statVar] = ds[statVar]

    subds['LATITUDE'] = xr.DataArray(LAT * np.ones(len(subds['TRMM_TIME']), dtype = float), dims=['TRMM_TIME'])
    subds['LONGITUDE'] = xr.DataArray(LON * np.ones(len(subds['TRMM_TIME']), dtype = float), dims=['TRMM_TIME'])
    
    subds['TAO_TIME'] = ds['TAO_TIME']

    deployFileName = f'../../downloads/Buoy/extractedGZ/WINDS/T_{lat:02d}{latUnits}_{lon:03d}{lonUnits}/T_{lat:02d}{latUnits}_{lon:03d}{lonUnits}_DeploymentDates.nc'
    ds2 = xr.open_dataset(deployFileName)
    startDates = np.array(ds2['startDate'])
    endDates = np.array(ds2['endDate'])

    depNum = np.zeros((len(subds['TRMM_TIME'])), dtype=int)
    c = 1
    for i in range(1,len(startDates)):
        mask = subds['TRMM_TIME'] >= startDates[i]
        mask *= subds['TRMM_TIME'] <= endDates[i]
        depNum[mask] = c
        c = c+1

    subds['Deployment index'] = xr.DataArray(depNum, dims=['TRMM_TIME'])
    
    
    satDS = ds[satVars]
    satDS = satDS.isel(nray=0)#, ambiguities=0)

    
    
    returnDS = xr.merge((subds, satDS))

    
    
    return returnDS

In [10]:
latList = [-9, -8, -5, -2, 0, 2, 5, 8, 9]
lonList = [-95, -110, -125, -140, -155, -170, -180, 165]

ylen = len(latList)
xlen = len(lonList)

taskList = []

for latId  in range(ylen):
    for lonId in range(xlen):
        taskList.append([latList[latId], lonList[lonId]])

ntasks = len(taskList)


allDS = xr.Dataset()
dataCount = 0
fileCount = 0
for task in taskList:
    lat = task[0]
    lon = task[1]

    LAT = lat
    LON = lon

    if lat < 0:
        latUnits = 'S'
    else:
        latUnits = 'N'

    if lon < 0:
        lonUnits = 'W'
    else:
        lonUnits = 'E'

    LON = (LON+360)%360
    lat=abs(lat)
    lon=abs(lon)

    matchFname = f'../../downloads/TRMM_data/TRMM_byTAOpos/T_{lat:02d}{latUnits}_{lon:03d}{lonUnits}_xrr_MatchUp_TRMM_2000.nc'
    #deployFileName = f'../../downloads/Buoy/extractedGZ/WINDS/T_{lat:02d}{latUnits}_{lon:03d}{lonUnits}/T_{lat:02d}{latUnits}_{lon:03d}{lonUnits}_DeploymentDates.nc'
    
    if os.path.isfile(matchFname):
        print(f'T_{lat:02d}{latUnits}_{lon:03d}{lonUnits}_xrr_MatchUp_2000.nc')
        rds = xr.open_dataset(matchFname)
        ds = getSubDF(rds, LAT, LON)
        TRMM_TIME = ds['TRMM_TIME'].to_numpy()
        ds = ds.rename({'TRMM_TIME':'count'})
        dataLen = len(ds['count'])
        print(dataCount, dataCount+dataLen)
        ds['count'] = xr.DataArray(pd.Series(np.arange(dataCount, dataCount+dataLen)), 
                                   dims = ['count'])
        ds['TRMM_TIME'] = xr.DataArray(TRMM_TIME, 
                                   dims = ['count'])
        dataCount += dataLen
        if fileCount == 0:
            allDS = ds
        else:
            allDS = xr.concat((allDS, ds), dim='count')
        fileCount += 1
        

T_08S_095W_xrr_MatchUp_2000.nc
0 1099
T_08S_110W_xrr_MatchUp_2000.nc
1099 2125
T_08S_125W_xrr_MatchUp_2000.nc
2125 3363
T_08S_155W_xrr_MatchUp_2000.nc
3363 4351
T_08S_170W_xrr_MatchUp_2000.nc
4351 5584
T_08S_165E_xrr_MatchUp_2000.nc
5584 6700
T_05S_095W_xrr_MatchUp_2000.nc
6700 7807
T_05S_110W_xrr_MatchUp_2000.nc
7807 9236
T_05S_125W_xrr_MatchUp_2000.nc
9236 10740
T_05S_140W_xrr_MatchUp_2000.nc
10740 12250
T_05S_155W_xrr_MatchUp_2000.nc
12250 13206
T_05S_170W_xrr_MatchUp_2000.nc
13206 14582
T_05S_165E_xrr_MatchUp_2000.nc
14582 16206
T_02S_095W_xrr_MatchUp_2000.nc
16206 17369
T_02S_110W_xrr_MatchUp_2000.nc
17369 18117
T_02S_125W_xrr_MatchUp_2000.nc
18117 19383
T_02S_140W_xrr_MatchUp_2000.nc
19383 20872
T_02S_155W_xrr_MatchUp_2000.nc
20872 22152
T_02S_170W_xrr_MatchUp_2000.nc
22152 23138
T_02S_180W_xrr_MatchUp_2000.nc
23138 24525
T_02S_165E_xrr_MatchUp_2000.nc
24525 25617
T_00N_095W_xrr_MatchUp_2000.nc
25617 26914
T_00N_110W_xrr_MatchUp_2000.nc
26914 28233
T_00N_125W_xrr_MatchUp_2000.nc


In [11]:
allDS

In [12]:
allDS = allDS.drop_vars(['HEIGHT','DEPTH','nray']) #,'ambiguities'])

In [13]:
allDS

In [14]:
mask1 = allDS.sel(TAO_TIME_INDEX=0)['WSPD_QC'].isin([1,2]).to_numpy()
mask2 = allDS.sel(TAO_TIME_INDEX=0)['WDIR_QC'].isin([1,2]).to_numpy()
mask3 = allDS.sel(TAO_TIME_INDEX=0)['SST_QC'].isin([1,2]).to_numpy()
mask4 = allDS.sel(TAO_TIME_INDEX=0)['RELH_QC'].isin([1,2]).to_numpy()
mask5 = allDS.sel(TAO_TIME_INDEX=0)['AIRT_QC'].isin([1,2]).to_numpy()

In [15]:
#selectMask = np.logical_and(noRainMask, mask1)
selectMask = np.logical_and(mask1, mask2)
selectMask = np.logical_and(selectMask, mask3)
selectMask = np.logical_and(selectMask, mask4)
selectMask = np.logical_and(selectMask, mask5)

In [16]:
selectMask

array([ True,  True,  True, ...,  True,  True,  True])

In [17]:
indices = selectMask.nonzero()[0]

In [18]:
goodDataSet = allDS.sel(count=indices)

In [19]:
goodDataSet

In [20]:
goodDataSet = goodDataSet.rename_vars({'WSPD':'Wind Speed (TAO)',
                  'WDIR':'Wind Direction (TAO)',
                  'UWND': 'Zonal Wind Speed (TAO)',
                  'VWND': 'Meridional Wind Speed (TAO)',
                  'SST': 'Sea Surface Temperature (TAO)',
                  'RELH': 'Relative Humidity (TAO)',
                  'AIRT': 'Air Temperature (TAO)',
                  'WSPD_10N': 'Neutral Wind Speed at 10m (TAO)',
                  'WSPD_10': 'Wind Speed at 10m (TAO)'})

In [21]:
goodDataSet.to_netcdf('goodData_TRMMwithTAO.nc')

In [110]:
centerData = allDS.sel(TAO_TIME_INDEX = 0)
centerData = centerData.drop('TAO_TIME_INDEX')
indices = list(np.nonzero(~rainMask)[0])

goodData = centerData.isel(count=~rainMask)
goodData = goodData.isel(count = goodData["WSPD_QC"].isin([1,2]))
goodData = goodData.isel(count = goodData["WDIR_QC"].isin([1,2]))
goodData = goodData.isel(count = goodData["SST_QC"].isin([1,2]))
goodData = goodData.isel(count = goodData["RELH_QC"].isin([1,2]))
goodData = goodData.isel(count = goodData["AIRT_QC"].isin([1,2]))


print(len(goodData['count']))




156953


In [111]:
mask = np.logical_or(goodData['WSPD'] > 1000, goodData['WDIR'] > 3600)
#mask = np.logical_or(mask, abs(goodData['WSPD_10N'].to_numpy()) < 0.1)
mask = np.logical_or(mask, np.isnan(goodData['WSPD'].to_numpy()))
mask = np.logical_or(mask, np.isnan(goodData['WDIR'].to_numpy()))
mask = np.logical_or(mask, np.isnan(goodData['retrieved_wind_speed'].to_numpy()))
mask = np.logical_or(mask, np.isnan(goodData['retrieved_wind_direction'].to_numpy()))
print(np.sum(mask))
df = goodData.isel(count = ~mask)
#df = df.to_dataframe()

<xarray.DataArray 'WSPD' ()>
array(8226)


In [112]:
df

In [113]:
df = df.rename_vars({'WSPD':'Wind Speed (TAO)',
                  'WDIR':'Wind Direction (TAO)',
                  'UWND': 'Zonal Wind Speed (TAO)',
                  'VWND': 'Meridional Wind Speed (TAO)',
                  'SST': 'Sea Surface Temperature (TAO)',
                  'RELH': 'Relative Humidity (TAO)',
                  'AIRT': 'Air Temperature (TAO)',
                  'WSPD_10N': 'Neutral Wind Speed at 10m (TAO)',
                  'WSPD_10': 'Wind Speed at 10m (TAO)',
                  'retrieved_wind_speed': 'Wind Speed (QuikSCAT)',
                  'retrieved_wind_direction': 'Wind Direction (QuikSCAT)'})

In [114]:
df['Speed Difference (QuikSCAT - TAO)'] = df['Wind Speed (QuikSCAT)'] - df['Neutral Wind Speed at 10m (TAO)']

## change true degrees to degrees where 0 deg mean east and 90 deg mean north 
df['Wind Direction (TAO)'] = (-(df['Wind Direction (TAO)'] - 90.0) + 360)%360
df['Wind Direction (QuikSCAT)'] = (-(df['Wind Direction (QuikSCAT)'] - 90.0) + 360)%360

## calculate zonal and meridional winds 
df['Zonal Neutral Wind Speed (TAO)'] = df['Neutral Wind Speed at 10m (TAO)'] * np.cos(np.deg2rad(df['Wind Direction (TAO)']))
df['Meridional Neutral Wind Speed (TAO)'] = df['Neutral Wind Speed at 10m (TAO)'] * np.sin(np.deg2rad(df['Wind Direction (TAO)']))

df['Zonal Wind Speed (QuikSCAT)'] = df['Wind Speed (QuikSCAT)'] * np.cos(np.deg2rad(df['Wind Direction (QuikSCAT)']))
df['Meridional Wind Speed (QuikSCAT)'] = df['Wind Speed (QuikSCAT)'] * np.sin(np.deg2rad(df['Wind Direction (QuikSCAT)']))

## direction diff in range (-180,180)
dirDiff = ((df['Wind Direction (QuikSCAT)'] - df['Wind Direction (TAO)'])+360)%360
dirDiff = xr.where(dirDiff > 180, dirDiff-360, dirDiff)
df['Direction Difference (QuikSCAT - TAO)'] = dirDiff

df['Zonal Wind Speed Difference (QuikSCAT - TAO)'] = df['Zonal Wind Speed (QuikSCAT)'] - df['Zonal Neutral Wind Speed (TAO)']
df['Meridional Wind Speed Difference (QuikSCAT - TAO)'] = df['Meridional Wind Speed (QuikSCAT)'] - df['Meridional Neutral Wind Speed (TAO)']

for timeWindow in range(20,730,10):
    df[f'Speed Difference (QuikSCAT - TAO {timeWindow} min mean)'] = df['Wind Speed (QuikSCAT)'] - df[f'mean WSPD_10N {timeWindow}min']

    ## change true degrees to degrees where 0 deg mean east and 90 deg mean north 
    df[f'mean WDIR {timeWindow}min'] = (-(df[f'mean WDIR {timeWindow}min'] - 90.0) + 360)%360

    ## direction diff in range (-180,180)
    dirDiff = ((df['Wind Direction (QuikSCAT)'] - df[f'mean WDIR {timeWindow}min'])+360)%360
    dirDiff = xr.where(dirDiff > 180, dirDiff-360, dirDiff)
    df[f'Direction Difference (QuikSCAT - TAO {timeWindow} min mean)'] = dirDiff

    ### 
    df[f'Zonal Wind Speed Difference (QuikSCAT - TAO {timeWindow} min mean)'] = df['Zonal Wind Speed (QuikSCAT)'] - df[f'mean Ux_10N {timeWindow}min']
    df[f'Meridional Wind Speed Difference (QuikSCAT - TAO {timeWindow} min mean)'] = df['Zonal Wind Speed (QuikSCAT)'] - df[f'mean Vy_10N {timeWindow}min']

In [115]:
df

In [116]:
df.to_netcdf('rainFlagRemovedBuoyDataBadQualityRemovedMatchup.nc')

In [None]:
list(df.variables.keys())

In [17]:
len(df['count'])

148727

In [18]:
selAllDS = allDS.sel(count=df['count'])

In [19]:
selAllDS = selAllDS.rename_vars({'WSPD':'Wind Speed (TAO)',
                  'WDIR':'Wind Direction (TAO)',
                  'UWND': 'Zonal Wind Speed (TAO)',
                  'VWND': 'Meridional Wind Speed (TAO)',
                  'SST': 'Sea Surface Temperature (TAO)',
                  'RELH': 'Relative Humidity (TAO)',
                  'AIRT': 'Air Temperature (TAO)',
                  'WSPD_10N': 'Neutral Wind Speed at 10m (TAO)',
                  'WSPD_10': 'Wind Speed at 10m (TAO)',
                  'retrieved_wind_speed': 'Wind Speed (QuikSCAT)',
                  'retrieved_wind_direction': 'Wind Direction (QuikSCAT)'})

In [20]:
selAllDS['Speed Difference (QuikSCAT - TAO)'] = selAllDS['Wind Speed (QuikSCAT)'] - selAllDS['Wind Speed (TAO)']

selAllDS['Wind Direction (TAO)'] = (-(selAllDS['Wind Direction (TAO)'] - 90.0) + 360)%360
selAllDS['Wind Direction (QuikSCAT)'] = (-(selAllDS['Wind Direction (QuikSCAT)'] - 90.0) + 360)%360

selAllDS['Direction Difference (QuikSCAT - TAO 30 min mean)'] = selAllDS['Wind Direction (QuikSCAT)'] - selAllDS['mean WSPD 30min']
selAllDS['Direction Difference (QuikSCAT - TAO 30 min mean)'] = selAllDS['Wind Direction (QuikSCAT)'] - selAllDS['mean WDIR 30min']

selAllDS['Direction Difference (QuikSCAT - TAO 1 hr mean)'] = selAllDS['Wind Direction (QuikSCAT)'] - selAllDS['mean WSPD 60min']
selAllDS['Direction Difference (QuikSCAT - TAO 1 hr mean)'] = selAllDS['Wind Direction (QuikSCAT)'] - selAllDS['mean WDIR 60min']

selAllDS['Direction Difference (QuikSCAT - TAO 2 hr mean)'] = selAllDS['Wind Direction (QuikSCAT)'] - selAllDS['mean WSPD 120min']
selAllDS['Direction Difference (QuikSCAT - TAO 2 hr mean)'] = selAllDS['Wind Direction (QuikSCAT)'] - selAllDS['mean WDIR 120min']


selAllDS['Zonal Neutral Wind Speed at 10m (TAO)'] = selAllDS['Neutral Wind Speed at 10m (TAO)']*np.cos(np.deg2rad(selAllDS['Wind Direction (TAO)']))
selAllDS['Meridional Neutral Wind Speed at 10m (TAO)'] = selAllDS['Neutral Wind Speed at 10m (TAO)']*np.sin(np.deg2rad(selAllDS['Wind Direction (TAO)']))

selAllDS['Zonal Neutral Wind Speed at 10m (QuikSCAT)'] = selAllDS['Wind Speed (QuikSCAT)']*np.cos(np.deg2rad(selAllDS['Wind Direction (QuikSCAT)']))
selAllDS['Meridional Neutral Wind Speed at 10m (QuikSCAT)'] = selAllDS['Wind Speed (QuikSCAT)']*np.sin(np.deg2rad(selAllDS['Wind Direction (QuikSCAT)']))

selAllDS['Zonal Wind Speed Difference (QuikSCAT - TAO)'] = selAllDS['Zonal Neutral Wind Speed at 10m (QuikSCAT)'] - selAllDS['Zonal Neutral Wind Speed at 10m (TAO)']
selAllDS['Meridional Wind Speed Difference (QuikSCAT - TAO)'] = selAllDS['Meridional Neutral Wind Speed at 10m (QuikSCAT)'] - selAllDS['Meridional Neutral Wind Speed at 10m (TAO)']


In [21]:
selAllDS