In [4]:
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import normalize
from sklearn.linear_model import LinearRegression, RANSACRegressor
from scipy import stats

import joblib

import numpy as np
import xarray as xr

import matplotlib.pyplot as plt
from matplotlib import cm
import matplotlib.cbook as cbook
import matplotlib.colors as colors
import matplotlib.dates as mdates

from netCDF4 import Dataset, num2date, date2num
import os

import seaborn as sns
import cmocean as cmocn

%matplotlib inline

In [5]:
def getLeftRightCenterConv(array, kernelSize):
    if kernelSize%2 == 0:
        kernelSize += 1
    kernel = np.ones((kernelSize), dtype=float)
    kernel = kernel/np.sum(kernel)
    halfKSize = int(np.floor(kernelSize//2))
    array_bar = np.convolve(array, kernel, mode='full')
    centered = array_bar[halfKSize:-halfKSize]
    left_edged = array_bar[0:len(array)]
    right_edged = array_bar[-len(array):]
    return left_edged, centered, right_edged

In [6]:
#fname = '../testMatchups/rainFlagRemovedBuoyDataBadQualityRemovedMatchup_MetOP_AB_waveAndGlorysAdded_randomForestClassification'
#fname = '../testMatchups/rainFlagRemovedBuoyDataBadQualityRemovedMatchupAmbuguitiesAdded_waveAndGlorysAdded_manualRemovedSomeData.nc'
fname = '../testMatchups/rainFlagRemovedBuoyDataBadQualityRemovedMatchup_MetOP_AB_waveAndGlorysAdded.nc'
ds = xr.open_dataset(fname)

In [7]:
data = ds.to_dataframe()
df = data.copy()

In [8]:
ds

In [9]:
def plotDiff(SUBDF, lat, lon, sat='ASCAT', sat_time='AS_TIME', suffix='all', kernelSize = 30):
    subDF = SUBDF.loc[SUBDF['LATITUDE'] == lat]
    subDF = subDF.loc[subDF['LONGITUDE'] == (lon + 360) % 360]

    if len(subDF.index) < 1:
        return

    deployments = subDF['Deployment index']
    dep_labels = np.unique(np.array(deployments))
    startDates = []
    endDates = []

    fig, axes = plt.subplots(nrows=8, ncols=1, sharex= True, figsize=(20, 18))

    # 🎨 Colors for deployments
    cmap = plt.get_cmap('tab10')
    colors = [cmap(i % 10) for i in range(len(dep_labels))]

    # ✏️ Define consistent line styles for variables
    line_styles = {
        'raw': '-',
        'left': '--',
        'center': '-.',
        'right': ':'
    }

    # ✏️ Track which labels we have already added (so we don’t duplicate)
    label_done = {ax_idx: set() for ax_idx in range(8)}

    for i, dep in enumerate(dep_labels):
        ssubDF = subDF.loc[subDF['Deployment index'] == dep]

        startDates.append(np.min(ssubDF[f'{sat_time}']))
        endDates.append(np.max(ssubDF[f'{sat_time}']))

        x = ((np.array(ssubDF[f'{sat_time}']) - np.datetime64('2000-01-01T00:00:00')) / np.timedelta64(1, 'm'))
        AS_WSPD = np.array(ssubDF[f'WSPD_10N ({sat})'])
        TAO_WSPD = np.array(ssubDF['WSPD_10N (TAO)'])
        AS_WDIR = np.array(ssubDF[f'WDIR ({sat})'])
        TAO_WDIR = np.array(ssubDF['WDIR (TAO)'])
        y1 = np.array(ssubDF[f'WSPD_DIFF ({sat} - TAO)'])
        y2 = np.array(ssubDF[f'WDIR_DIFF ({sat} - TAO)'])

        if len(x) < kernelSize:
            continue

        left_y1, center_y1, right_y1 = getLeftRightCenterConv(y1, kernelSize)
        left_y1sq, center_y1sq, right_y1sq = getLeftRightCenterConv(y1 ** 2, kernelSize)

        left_y2, center_y2, right_y2 = getLeftRightCenterConv(y2, kernelSize)
        left_y2sq, center_y2sq, right_y2sq = getLeftRightCenterConv(y2 ** 2, kernelSize)

        color = colors[i]

        # Raw plots
        var_name = 'WSPD_10N'
        labels = [f'TAO_{var_name}', f'{sat}_{var_name}'] if var_name not in label_done[0] else [None, None]
        axes[0].plot(ssubDF[f'{sat_time}'], TAO_WSPD, label=labels[0], color='tomato', linestyle=line_styles['raw'], alpha = 0.5)
        axes[0].plot(ssubDF[f'{sat_time}'], AS_WSPD, label=labels[1], color='blue', linestyle=line_styles['right'])
        label_done[0].add(var_name)

        var_name = 'WDIR'
        labels = [f'TAO_{var_name}', f'{sat}_{var_name}'] if var_name not in label_done[1] else [None, None]
        axes[1].plot(ssubDF[f'{sat_time}'], TAO_WDIR, label=labels[0], color='tomato', linestyle=line_styles['raw'], alpha = 0.5)
        axes[1].plot(ssubDF[f'{sat_time}'], AS_WDIR, label=labels[1], color='blue', linestyle=line_styles['right'])
        label_done[1].add(var_name)

        
        var_name = 'raw_wspd_diff'
        label = var_name if var_name not in label_done[2] else None
        axes[2].plot(ssubDF[f'{sat_time}'], y1, label=label, color=color, linestyle=line_styles['raw'])
        label_done[2].add(var_name)

        var_name = 'raw_wdir_diff'
        label = var_name if var_name not in label_done[3] else None
        axes[3].plot(ssubDF[f'{sat_time}'], y2, label=label, color=color, linestyle=line_styles['raw'])
        label_done[3].add(var_name)

        # Moving averages
        for ax_idx, y_vals, base_name in [
            (4, [left_y1, center_y1, right_y1], 'mean_of_wspd_diff'),
            (5, [left_y2, center_y2, right_y2], 'mean_of_wdir_diff'),
            (6, [np.sqrt(left_y1sq - left_y1 * left_y1),
                 np.sqrt(center_y1sq - center_y1 * center_y1),
                 np.sqrt(right_y1sq - right_y1 * right_y1)], 'std_of_wspd_diff'),
            (7, [np.sqrt(left_y2sq - left_y2 * left_y2),
                 np.sqrt(center_y2sq - center_y2 * center_y2),
                 np.sqrt(right_y2sq - right_y2 * right_y2)], 'std_of_wdir_diff')
        ]:
            for y_val, suffix in zip(y_vals, ['left', 'center', 'right']):
                var_name = f'{base_name}_{suffix}'
                label = var_name if var_name not in label_done[ax_idx] else None
                axes[ax_idx].plot(ssubDF[f'{sat_time}'], y_val, label=label,
                                  color=color, linestyle=line_styles[suffix])
                label_done[ax_idx].add(var_name)

    # ✅ Add legends for each subplot
    for ax in axes:
        ax.legend(fontsize=8, loc='upper right')
        ax.xaxis.set_major_locator(mdates.YearLocator())
        ax.xaxis.set_minor_locator(mdates.MonthLocator())
        ax.grid(which = 'major', axis = 'both', lw = 1)
        ax.grid(which = 'minor', axis = 'both', lw = 0.2, alpha = 0.5)
        
    # ✅ Add shading for deployments in top plots
    for i in range(len(startDates)):
        colorR = list(np.random.choice(range(256), size=3) / 256)
        axes[0].axvspan(startDates[i], endDates[i], alpha=0.05, color=colorR)
        axes[1].axvspan(startDates[i], endDates[i], alpha=0.05, color=colorR)

    # ✅ Save plot
    if lat < 0:
        latUnits = 'S'
        lat = abs(lat)
    else:
        latUnits = 'N'

    if lon < 0:
        lonUnits = 'W'
        lon = abs(lon)
    else:
        lonUnits = 'E'

    plt.suptitle(f' {lat:2d}{latUnits} {lon:3d}{lonUnits}', fontsize=16)

    title = f'images_{sat}/T_{lat:02d}{latUnits}_{lon:03d}{lonUnits}_{suffix}.png'
    plt.tight_layout()
    plt.savefig(title, dpi=100)
    plt.close()


In [10]:
def plotDiff_ABS(SUBDF, lat, lon, sat='ASCAT', sat_time='AS_TIME', suffix='all', kernelSize = 30):
    subDF = SUBDF.loc[SUBDF['LATITUDE'] == lat]
    subDF = subDF.loc[subDF['LONGITUDE'] == (lon + 360) % 360]

    if len(subDF.index) < 1:
        return

    deployments = subDF['Deployment index']
    dep_labels = np.unique(np.array(deployments))
    startDates = []
    endDates = []

    fig, axes = plt.subplots(nrows=8, ncols=1, sharex= True, figsize=(20, 18))

    # 🎨 Colors for deployments
    cmap = plt.get_cmap('tab10')
    colors = [cmap(i % 10) for i in range(len(dep_labels))]

    # ✏️ Define consistent line styles for variables
    line_styles = {
        'raw': '-',
        'left': '--',
        'center': '-.',
        'right': ':'
    }

    # ✏️ Track which labels we have already added (so we don’t duplicate)
    label_done = {ax_idx: set() for ax_idx in range(8)}

    for i, dep in enumerate(dep_labels):
        ssubDF = subDF.loc[subDF['Deployment index'] == dep]

        startDates.append(np.min(ssubDF[f'{sat_time}']))
        endDates.append(np.max(ssubDF[f'{sat_time}']))

        TAO_WSPD = np.array(ssubDF['WSPD_10N (TAO)'])
        TAO_WDIR = np.array(ssubDF['WDIR (TAO)'])

        x = ((np.array(ssubDF[f'{sat_time}']) - np.datetime64('2000-01-01T00:00:00')) / np.timedelta64(1, 'm'))
        xx = np.array(ssubDF[f'UWND ({sat}) absolute'])
        yy = np.array(ssubDF[f'VWND ({sat}) absolute'])
        AS_WSPD = np.sqrt(xx**2 + yy**2) 
        AS_WDIR = (np.rad2deg(np.arctan2(yy, xx))+360)%360
        
        y1 = AS_WSPD - np.array(ssubDF['WSPD_10N (TAO)'])
        y2 = np.array(ssubDF[f'WDIR_DIFF ({sat} - TAO) absolute'])
        
        
        if len(x) < kernelSize:
            continue

        left_y1, center_y1, right_y1 = getLeftRightCenterConv(y1, kernelSize)
        left_y1sq, center_y1sq, right_y1sq = getLeftRightCenterConv(y1 ** 2, kernelSize)

        left_y2, center_y2, right_y2 = getLeftRightCenterConv(y2, kernelSize)
        left_y2sq, center_y2sq, right_y2sq = getLeftRightCenterConv(y2 ** 2, kernelSize)

        color = colors[i]

        # Raw plots
        var_name = 'WSPD_10N'
        labels = [f'TAO_{var_name}', f'{sat}_{var_name}'] if var_name not in label_done[0] else [None, None]
        axes[0].plot(ssubDF[f'{sat_time}'], TAO_WSPD, label=labels[0], color='tomato', linestyle=line_styles['raw'], alpha = 0.5)
        axes[0].plot(ssubDF[f'{sat_time}'], AS_WSPD, label=labels[1], color='blue', linestyle=line_styles['right'])
        label_done[0].add(var_name)

        var_name = 'WDIR'
        labels = [f'TAO_{var_name}', f'{sat}_{var_name}'] if var_name not in label_done[1] else [None, None]
        axes[1].plot(ssubDF[f'{sat_time}'], TAO_WDIR, label=labels[0], color='tomato', linestyle=line_styles['raw'], alpha = 0.5)
        axes[1].plot(ssubDF[f'{sat_time}'], AS_WDIR, label=labels[1], color='blue', linestyle=line_styles['right'])
        label_done[1].add(var_name)

        
        var_name = 'raw_wspd_diff'
        label = var_name if var_name not in label_done[2] else None
        axes[2].plot(ssubDF[f'{sat_time}'], y1, label=label, color=color, linestyle=line_styles['raw'])
        label_done[2].add(var_name)

        var_name = 'raw_wdir_diff'
        label = var_name if var_name not in label_done[3] else None
        axes[3].plot(ssubDF[f'{sat_time}'], y2, label=label, color=color, linestyle=line_styles['raw'])
        label_done[3].add(var_name)

        # Moving averages
        for ax_idx, y_vals, base_name in [
            (4, [left_y1, center_y1, right_y1], 'mean_of_wspd_diff'),
            (5, [left_y2, center_y2, right_y2], 'mean_of_wdir_diff'),
            (6, [np.sqrt(left_y1sq - left_y1 * left_y1),
                 np.sqrt(center_y1sq - center_y1 * center_y1),
                 np.sqrt(right_y1sq - right_y1 * right_y1)], 'std_of_wspd_diff'),
            (7, [np.sqrt(left_y2sq - left_y2 * left_y2),
                 np.sqrt(center_y2sq - center_y2 * center_y2),
                 np.sqrt(right_y2sq - right_y2 * right_y2)], 'std_of_wdir_diff')
        ]:
            for y_val, suffix in zip(y_vals, ['left', 'center', 'right']):
                var_name = f'{base_name}_{suffix}'
                label = var_name if var_name not in label_done[ax_idx] else None
                axes[ax_idx].plot(ssubDF[f'{sat_time}'], y_val, label=label,
                                  color=color, linestyle=line_styles[suffix])
                label_done[ax_idx].add(var_name)

    # ✅ Add legends for each subplot
    for ax in axes:
        ax.legend(fontsize=8, loc='upper right')
        ax.xaxis.set_major_locator(mdates.YearLocator())
        ax.xaxis.set_minor_locator(mdates.MonthLocator())
        ax.grid(which = 'major', axis = 'both', lw = 1)
        ax.grid(which = 'minor', axis = 'both', lw = 0.2, alpha = 0.5)

    # ✅ Add shading for deployments in top plots
    for i in range(len(startDates)):
        colorR = list(np.random.choice(range(256), size=3) / 256)
        axes[0].axvspan(startDates[i], endDates[i], alpha=0.05, color=colorR)
        axes[1].axvspan(startDates[i], endDates[i], alpha=0.05, color=colorR)

    # ✅ Save plot
    if lat < 0:
        latUnits = 'S'
        lat = abs(lat)
    else:
        latUnits = 'N'

    if lon < 0:
        lonUnits = 'W'
        lon = abs(lon)
    else:
        lonUnits = 'E'

    plt.suptitle(f' {lat:2d}{latUnits} {lon:3d}{lonUnits}', fontsize=16)

    title = f'images_{sat}/T_{lat:02d}{latUnits}_{lon:03d}{lonUnits}_{suffix}_absolute.png'
    plt.tight_layout()
    plt.savefig(title, dpi=100)
    plt.close()


In [11]:
latList = [-9, -8, -5, -2, 0, 2, 5, 8, 9]
lonList = [-95, -110, -125, -140, -155, -170, -180, 165]

ylen = len(latList)
xlen = len(lonList)

taskList = []

for latId  in range(ylen):
    for lonId in range(xlen):
        taskList.append([latList[latId], lonList[lonId]])

ntasks = len(taskList)

In [12]:
for i in range(ntasks):
    lat = taskList[i][0]
    lon = taskList[i][1]
    plotDiff(df, lat, lon, sat = 'ASCAT', sat_time = 'AS_TIME', suffix='all', kernelSize = 60)

In [None]:
for i in range(ntasks):
    lat = taskList[i][0]
    lon = taskList[i][1]
    plotDiff_ABS(df, lat, lon, sat = 'ASCAT', sat_time = 'AS_TIME', suffix='all', kernelSize = 60)