In [1]:
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import normalize
from sklearn.linear_model import LinearRegression, RANSACRegressor

from scipy import stats

import joblib

import numpy as np
import xarray as xr

import matplotlib.pyplot as plt
from matplotlib import cm
import matplotlib.cbook as cbook
import matplotlib.colors as colors
import matplotlib.dates as mdates

from netCDF4 import Dataset, num2date, date2num
import os

import seaborn as sns
import cmocean as cmocn

%matplotlib inline

In [2]:
ds = xr.open_dataset('../testMatchups/rainFlagRemovedBuoyDataBadQualityRemovedMatchupAmbuguitiesAdded_waveAndGlorysAdded_withoutManualRemovedSomeData.nc')
df = ds.to_dataframe()
ds

In [18]:
def robust_regression_ransac(x, y):
    x = np.asarray(x).reshape(-1, 1)
    y = np.asarray(y)

    #ransac = RANSACRegressor(random_state=42)
    #ransac.fit(x, y)

    model = RANSACRegressor(LinearRegression(), random_state=42)
    model.fit(x, y)

    Y = model.predict(x)

    slope = model.estimator_.coef_[0]
    #intercept = model.estimator_.intercept_
    #r2 = model.score(x, y)  # R² on inliers

    return slope[0], Y #, intercept, r2, model.inlier_mask_
    
def plotQS(thisDF, axes):
    WSPD_DIFF = np.array(thisDF['WSPD_DIFF (QuikSCAT - TAO)'])
    WDIR_DIFF = np.array(thisDF['WDIR_DIFF (QuikSCAT - TAO)'])

    mask = np.logical_or(np.isnan(WSPD_DIFF), np.isnan(WDIR_DIFF))
    thisDF = thisDF.loc[~mask]

    deployments = thisDF['Deployment index']
    dep_labels = np.sort(np.unique(np.array(deployments)))
    startDates = []
    endDates = []

    for i in range(len(dep_labels)):
        dep = dep_labels[i]

        #print(startDates[i], endDates[i])
        #colorR =list(np.random.choice(range(256), size=3)/256)
        cmap = plt.get_cmap('Dark2')
        colorR = tuple(np.array(cmap(i % cmap.N)))
        
        ssubDF = thisDF.loc[thisDF['Deployment index'] == dep]
        minTime = np.min(ssubDF['QS_TIME'])
        maxTime = np.max(ssubDF['QS_TIME'])

        #print(dep, minTime, maxTime)
        
        startDates.append(minTime)
        endDates.append(maxTime)
        
        x = ((np.array(ssubDF['QS_TIME']) - np.datetime64('2000-01-01T00:00:00'))/np.timedelta64(1,'D')).reshape(-1,1)    
        y = np.array(ssubDF['WSPD_DIFF (QuikSCAT - TAO)']).reshape(-1,1)

        if len(x) <2:
            continue
    
        
        slopeSpd, Y = robust_regression_ransac(x, y)
        axes[0].axvspan(startDates[i], endDates[i], 
                   alpha=0.05, color=colorR)
        axes[0].plot(ssubDF['QS_TIME'], Y, color=colorR)
        axes[0].scatter(ssubDF['QS_TIME'], ssubDF['WSPD_DIFF (QuikSCAT - TAO)'], 
                        color = colorR, s = 0.2)

        
        y = np.array(ssubDF['WDIR_DIFF (QuikSCAT - TAO)']).reshape(-1,1)
        slopeDir, Y = robust_regression_ransac(x, y)
        axes[1].axvspan(startDates[i], endDates[i], 
                   alpha=0.05, color=colorR)
        axes[1].plot(ssubDF['QS_TIME'], Y, color=colorR)
        axes[1].scatter(ssubDF['QS_TIME'], ssubDF['WDIR_DIFF (QuikSCAT - TAO)'], 
                        color = colorR, s = 0.2)

        if x[-1] - x[0] > 180:
            xpos = np.mean(ssubDF['QS_TIME'])
            axes[0].text(xpos, 3, f'{slopeSpd*100:5.2f} \n' + r'$cm.s^{-1}.day^{-1}$', color = colorR, ha = 'center')
            axes[1].text(xpos, 80, f'{slopeDir:5.2f} \n' + r'$\degree.day^{-1}$', color = colorR, ha = 'center')

    
        
    
    axes[0].set_title('Difference in wind speed (QS - TAO)')
    axes[1].set_title('Difference in wind direction (QS - TAO)')

    axes[0].set_ylim([-4, 4])
    axes[1].set_ylim([-90, 90])
    
    axes[0].xaxis.set_major_locator(mdates.YearLocator())
    axes[0].xaxis.set_minor_locator(mdates.MonthLocator())
    axes[0].grid(which = 'major', axis = 'both', lw = 1)
    axes[0].grid(which = 'minor', axis = 'both', lw = 0.2, alpha = 0.5)

    axes[1].xaxis.set_major_locator(mdates.YearLocator())
    axes[1].xaxis.set_minor_locator(mdates.MonthLocator())
    axes[1].grid(which = 'major', axis = 'both', lw = 1)
    axes[1].grid(which = 'minor', axis = 'both', lw = 0.2, alpha = 0.5)

In [19]:
latList = [-9, -8, -5, -2, 0, 2, 5, 8, 9]
lonList = [-95, -110, -125, -140, -155, -170, -180, 165]

ylen = len(latList)
xlen = len(lonList)

taskList = []

for latId  in range(ylen):
    for lonId in range(xlen):
        taskList.append([latList[latId], lonList[lonId]])

ntasks = len(taskList)

In [20]:
SUBDF = df.copy()
for i in range(ntasks):
    lat = taskList[i][0]
    lon = taskList[i][1]

    subDF = SUBDF.loc[SUBDF['LATITUDE'] == lat]
    subDF = subDF.loc[subDF['LONGITUDE'] == (lon + 360) % 360]
    
    if len(subDF['QS_TIME']) <1:
        print(len(subDF['QS_TIME']))
        continue
    
    fig, axes = plt.subplots(nrows=2, ncols=1, sharex = True, figsize=(20,8))

    plotQS(subDF, axes)

    if lat < 0:
        latUnits = 'S'
        lat = abs(lat)
    else:
        latUnits = 'N'
    
    if lon < 0:
        lonUnits = 'W'
        lon = abs(lon) # 360
    else:
        lonUnits = 'E'
    
    
    title = f'siteWiseLinearRegressionImages/T_{lat:02d}{latUnits}_{lon:03d}{lonUnits}_QuikSCAT.png'
    plt.tight_layout()
    plt.savefig(title, dpi = 100)
    plt.close()

0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
