In [1]:
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import normalize
from sklearn.linear_model import LinearRegression, RANSACRegressor
from scipy import stats

import joblib

import numpy as np
import xarray as xr

import matplotlib.pyplot as plt
from matplotlib import cm
import matplotlib.cbook as cbook
import matplotlib.colors as colors

from netCDF4 import Dataset, num2date, date2num
import os

import seaborn as sns
import cmocean as cmocn

%matplotlib inline

In [2]:
def getLeftRightCenterConv(array, kernelSize):
    if kernelSize%2 == 0:
        kernelSize += 1
    kernel = np.ones((kernelSize), dtype=float)
    kernel = kernel/np.sum(kernel)
    halfKSize = int(np.floor(kernelSize//2))
    array_bar = np.convolve(array, kernel, mode='full')
    centered = array_bar[halfKSize:-halfKSize]
    left_edged = array_bar[0:len(array)]
    right_edged = array_bar[-len(array):]
    return left_edged, centered, right_edged

In [3]:
fname = '../testMatchups/rainFlagRemovedBuoyDataBadQualityRemovedMatchupAmbuguitiesAdded_waveAndGlorysAdded.nc'
#fname = '../testMatchups/rainFlagRemovedBuoyDataBadQualityRemovedMatchup_MetOP_AB_waveAndGlorysAdded.nc'
ds = xr.open_dataset(fname)

In [4]:
data = ds.to_dataframe()
df = data.copy()

In [5]:
ds

In [6]:
def plotDiff(SUBDF, lat, lon, sat='QuikSCAT', sat_time='QS_TIME', suffix='all', kernelSize = 30):
    subDF = SUBDF.loc[SUBDF['LATITUDE'] == lat]
    subDF = subDF.loc[subDF['LONGITUDE'] == (lon + 360) % 360]

    if len(subDF.index) < 1:
        return

    deployments = subDF['Deployment index']
    dep_labels = np.unique(np.array(deployments))
    startDates = []
    endDates = []

    fig, axes = plt.subplots(nrows=6, ncols=1, figsize=(20, 18))

    # 🎨 Colors for deployments
    cmap = plt.get_cmap('tab10')
    colors = [cmap(i % 10) for i in range(len(dep_labels))]

    # ✏️ Define consistent line styles for variables
    line_styles = {
        'raw': '-',
        'left': '--',
        'center': '-.',
        'right': ':'
    }

    # ✏️ Track which labels we have already added (so we don’t duplicate)
    label_done = {ax_idx: set() for ax_idx in range(6)}

    for i, dep in enumerate(dep_labels):
        ssubDF = subDF.loc[subDF['Deployment index'] == dep]

        startDates.append(np.min(ssubDF[f'{sat_time}']))
        endDates.append(np.max(ssubDF[f'{sat_time}']))

        x = ((np.array(ssubDF[f'{sat_time}']) - np.datetime64('2000-01-01T00:00:00')) / np.timedelta64(1, 'm'))
        y1 = np.array(ssubDF[f'Speed Difference ({sat} - TAO)'])
        y2 = np.array(ssubDF[f'Direction Difference ({sat} - TAO)'])

        if len(x) < kernelSize:
            continue

        left_y1, center_y1, right_y1 = getLeftRightCenterConv(y1, kernelSize)
        left_y1sq, center_y1sq, right_y1sq = getLeftRightCenterConv(y1 ** 2, kernelSize)

        left_y2, center_y2, right_y2 = getLeftRightCenterConv(y2, kernelSize)
        left_y2sq, center_y2sq, right_y2sq = getLeftRightCenterConv(y2 ** 2, kernelSize)

        color = colors[i]

        # Raw mean plots
        var_name = 'mean_wspd_diff'
        label = var_name if var_name not in label_done[0] else None
        axes[0].plot(ssubDF[f'{sat_time}'], y1, label=label, color=color, linestyle=line_styles['raw'])
        label_done[0].add(var_name)

        var_name = 'mean_wdir_diff'
        label = var_name if var_name not in label_done[1] else None
        axes[1].plot(ssubDF[f'{sat_time}'], y2, label=label, color=color, linestyle=line_styles['raw'])
        label_done[1].add(var_name)

        # Moving averages
        for ax_idx, y_vals, base_name in [
            (2, [left_y1, center_y1, right_y1], 'mean_wspd_diff'),
            (3, [left_y2, center_y2, right_y2], 'mean_wdir_diff'),
            (4, [left_y1sq - left_y1 * left_y1,
                 center_y1sq - center_y1 * center_y1,
                 right_y1sq - right_y1 * right_y1], 'var_wspd_diff'),
            (5, [left_y2sq - left_y2 * left_y2,
                 center_y2sq - center_y2 * center_y2,
                 right_y2sq - right_y2 * right_y2], 'var_wdir_diff')
        ]:
            for y_val, suffix in zip(y_vals, ['left', 'center', 'right']):
                var_name = f'{base_name}_{suffix}'
                label = var_name if var_name not in label_done[ax_idx] else None
                axes[ax_idx].plot(ssubDF[f'{sat_time}'], y_val, label=label,
                                  color=color, linestyle=line_styles[suffix])
                label_done[ax_idx].add(var_name)

    # ✅ Add legends for each subplot
    for ax in axes:
        ax.legend(fontsize=8, loc='best')
        ax.grid()

    # ✅ Add shading for deployments in top plots
    for i in range(len(startDates)):
        colorR = list(np.random.choice(range(256), size=3) / 256)
        axes[0].axvspan(startDates[i], endDates[i], alpha=0.05, color=colorR)
        axes[1].axvspan(startDates[i], endDates[i], alpha=0.05, color=colorR)

    # ✅ Save plot
    if lat < 0:
        latUnits = 'S'
        lat = abs(lat)
    else:
        latUnits = 'N'

    if lon < 0:
        lonUnits = 'W'
        lon = abs(lon)
    else:
        lonUnits = 'E'

    title = f'images_{sat}/T_{lat:02d}{latUnits}_{lon:03d}{lonUnits}_{suffix}.png'
    plt.tight_layout()
    plt.savefig(title, dpi=100)
    plt.close()


In [7]:
def plotDiff_ABS(SUBDF, lat, lon, sat='QuikSCAT', sat_time='QS_TIME', suffix='all', kernelSize = 30):
    subDF = SUBDF.loc[SUBDF['LATITUDE'] == lat]
    subDF = subDF.loc[subDF['LONGITUDE'] == (lon + 360) % 360]

    if len(subDF.index) < 1:
        return

    deployments = subDF['Deployment index']
    dep_labels = np.unique(np.array(deployments))
    startDates = []
    endDates = []

    fig, axes = plt.subplots(nrows=6, ncols=1, figsize=(20, 18))

    # 🎨 Colors for deployments
    cmap = plt.get_cmap('tab10')
    colors = [cmap(i % 10) for i in range(len(dep_labels))]

    # ✏️ Define consistent line styles for variables
    line_styles = {
        'raw': '-',
        'left': '--',
        'center': '-.',
        'right': ':'
    }

    # ✏️ Track which labels we have already added (so we don’t duplicate)
    label_done = {ax_idx: set() for ax_idx in range(6)}

    for i, dep in enumerate(dep_labels):
        ssubDF = subDF.loc[subDF['Deployment index'] == dep]

        startDates.append(np.min(ssubDF[f'{sat_time}']))
        endDates.append(np.max(ssubDF[f'{sat_time}']))

        x = ((np.array(ssubDF[f'{sat_time}']) - np.datetime64('2000-01-01T00:00:00')) / np.timedelta64(1, 'm'))
        xx = np.array(ssubDF[f'Zonal Wind Speed ({sat}) absolute'])
        yy = np.array(ssubDF[f'Meridional Wind Speed ({sat}) absolute'])
        rr = np.sqrt(xx**2 + yy**2) 
        y1 = rr - np.array(ssubDF['Neutral Wind Speed at 10m (TAO)'])
        y2 = np.array(ssubDF[f'Direction Difference ({sat} - TAO) absolute'])

        if len(x) < kernelSize:
            continue

        left_y1, center_y1, right_y1 = getLeftRightCenterConv(y1, kernelSize)
        left_y1sq, center_y1sq, right_y1sq = getLeftRightCenterConv(y1 ** 2, kernelSize)

        left_y2, center_y2, right_y2 = getLeftRightCenterConv(y2, kernelSize)
        left_y2sq, center_y2sq, right_y2sq = getLeftRightCenterConv(y2 ** 2, kernelSize)

        color = colors[i]

        # Raw mean plots
        var_name = 'mean_wspd_diff'
        label = var_name if var_name not in label_done[0] else None
        axes[0].plot(ssubDF[f'{sat_time}'], y1, label=label, color=color, linestyle=line_styles['raw'])
        label_done[0].add(var_name)

        var_name = 'mean_wdir_diff'
        label = var_name if var_name not in label_done[1] else None
        axes[1].plot(ssubDF[f'{sat_time}'], y2, label=label, color=color, linestyle=line_styles['raw'])
        label_done[1].add(var_name)

        # Moving averages
        for ax_idx, y_vals, base_name in [
            (2, [left_y1, center_y1, right_y1], 'mean_wspd_diff'),
            (3, [left_y2, center_y2, right_y2], 'mean_wdir_diff'),
            (4, [left_y1sq - left_y1 * left_y1,
                 center_y1sq - center_y1 * center_y1,
                 right_y1sq - right_y1 * right_y1], 'var_wspd_diff'),
            (5, [left_y2sq - left_y2 * left_y2,
                 center_y2sq - center_y2 * center_y2,
                 right_y2sq - right_y2 * right_y2], 'var_wdir_diff')
        ]:
            for y_val, suffix in zip(y_vals, ['left', 'center', 'right']):
                var_name = f'{base_name}_{suffix}'
                label = var_name if var_name not in label_done[ax_idx] else None
                axes[ax_idx].plot(ssubDF[f'{sat_time}'], y_val, label=label,
                                  color=color, linestyle=line_styles[suffix])
                label_done[ax_idx].add(var_name)

    # ✅ Add legends for each subplot
    for ax in axes:
        ax.legend(fontsize=8, loc='best')
        ax.grid()

    # ✅ Add shading for deployments in top plots
    for i in range(len(startDates)):
        colorR = list(np.random.choice(range(256), size=3) / 256)
        axes[0].axvspan(startDates[i], endDates[i], alpha=0.05, color=colorR)
        axes[1].axvspan(startDates[i], endDates[i], alpha=0.05, color=colorR)

    # ✅ Save plot
    if lat < 0:
        latUnits = 'S'
        lat = abs(lat)
    else:
        latUnits = 'N'

    if lon < 0:
        lonUnits = 'W'
        lon = abs(lon)
    else:
        lonUnits = 'E'

    title = f'images_{sat}/T_{lat:02d}{latUnits}_{lon:03d}{lonUnits}_{suffix}_absolute.png'
    plt.tight_layout()
    plt.savefig(title, dpi=100)
    plt.close()


In [8]:
latList = [-9, -8, -5, -2, 0, 2, 5, 8, 9]
lonList = [-95, -110, -125, -140, -155, -170, -180, 165]

ylen = len(latList)
xlen = len(lonList)

taskList = []

for latId  in range(ylen):
    for lonId in range(xlen):
        taskList.append([latList[latId], lonList[lonId]])

ntasks = len(taskList)

In [9]:
for i in range(ntasks):
    lat = taskList[i][0]
    lon = taskList[i][1]
    plotDiff_ABS(df, lat, lon, sat = 'QuikSCAT', sat_time = 'QS_TIME', suffix='all')

In [11]:
for i in range(ntasks):
    lat = taskList[i][0]
    lon = taskList[i][1]
    plotDiff(df, lat, lon, sat = 'QuikSCAT', sat_time = 'QS_TIME', suffix='all')

In [10]:
list(df.columns)

['Wind Direction (TAO)',
 'WDIR_QC',
 'WDIR_DM',
 'Wind Speed (TAO)',
 'WSPD_QC',
 'WSPD_DM',
 'Zonal Wind Speed (TAO)',
 'Meridional Wind Speed (TAO)',
 'Sea Surface Temperature (TAO)',
 'SST_QC',
 'SST_DM',
 'Relative Humidity (TAO)',
 'RELH_QC',
 'RELH_DM',
 'Air Temperature (TAO)',
 'AIRT_QC',
 'AIRT_DM',
 'Neutral Wind Speed at 10m (TAO)',
 'Wind Speed at 10m (TAO)',
 'VHM0',
 'VHM0_SW1',
 'VHM0_SW2',
 'VHM0_WW',
 'VMDR',
 'VMDR_SW1',
 'VMDR_SW2',
 'VMDR_WW',
 'VPED',
 'VSDX',
 'VSDY',
 'VTM01_SW1',
 'VTM01_SW2',
 'VTM01_WW',
 'VTM02',
 'VTM10',
 'VTPK',
 'WAVE_TIME',
 'vo',
 'zos',
 'uo',
 'thetao',
 'so',
 'GLORYS_TIME',
 'mean_WSPD',
 'std_WSPD',
 'mean_WSPD_10N',
 'std_WSPD_10N',
 'mean_U10N_x',
 'std_U10N_x',
 'mean_U10N_y',
 'std_U10N_y',
 'mean_WDIR',
 'std_WDIR',
 'mean_cosWDIR',
 'std_cosWDIR',
 'mean_sinWDIR',
 'std_sinWDIR',
 'mean_SST',
 'std_SST',
 'mean_AIRT',
 'std_AIRT',
 'mean_SST - AIRT',
 'std_SST - AIRT',
 'mean_RELH',
 'std_RELH',
 'LATITUDE',
 'LONGITUDE',
 '