# TS Clustering Project

William Gilmore

In [None]:
#import necessary packages
import numpy as np
import pandas as pd
import xarray as xr
import matplotlib.pyplot as plt 
import matplotlib.dates as mdates
import seaborn as sns
import datetime
from datetime import date
import warnings 

#Warnings
warnings.simplefilter('ignore') 

#load map packages
from cartopy.mpl.ticker import LongitudeFormatter, LatitudeFormatter
import cartopy.feature as cfeature
import cartopy.crs as ccrs
from calendar import month_abbr


## Functions

Compile Function

In [None]:
#Author: Paul Fornage
#Copiles data from multiple cruises to one one
#sail has been changed to ds to match my code

def Compile_Datasets(ddir, fn_list):

    # ddir: 'root' directory for data
    # fn_list: list of strings with the file names
    # returns: compiled list
    
    
    # open the first dataset
    ds = xr.open_dataset(ddir + fn_list[0])
    
    
    
    # give the first dataset a relative ID so all datasets can be differentiated
    ds["relativeID"] = 0
    # make lists for certain variables that remain constant for each dataset. these are used later in the last two cells
    yearList = [ds["time"][0].dt.year]
    durationList = [ds["time"][len(ds["time"]) - 1] - ds["time"][0]]
    # take the actual cruise ID from the dataset attributes and put it in a new list
    realID = [int(ds.attrs["id"])]
    # add the duration back to the dataset
    ds["duration"] = durationList[0]

    # repeat previous steps for other datasets that need to be combined.

    if len(fn_list) > 1:
        for i in range(1, len(fn_list)):
            temp = xr.open_dataset(ddir + fn_list[i])
            temp["relativeID"] = i
            yearList.append(temp["time"][0].dt.year)
            realID.append(int(temp.attrs["id"]))
            tempDuration = temp["time"][len(temp["time"]) - 1] - temp["time"][0]
            temp["duration"] = tempDuration
            durationList.append(tempDuration)
            ds = xr.concat([ds, temp], dim="time")
            temp.close()

    # reformat dates
    ds['date'] = mdates.date2num(ds['time'].dt.date)

    # ask what variable should be plotted
    return(ds)

West Coast Selection Function

In [None]:
#Author: William Gilmore
#Isolates data on the westcoast
def westcoast(upperLat, lowerLat):
    
    #upperLat: Upper acceptable latitude
    #lowerLat: Lower acceptable latitude
    # -function will discard all data not between upperLat and lowerLat
    
    
    
    #Removes data that is not within 300 km of shore
    ds.where(ds.dist_land <= 300)
    
    #Removes SF bay data
    ds.where(~(((ds.lon > -122.5938) & (ds.lat > 37.72783)) & ((ds.lon < -122.2506620424831) & (ds.lat < 38.094658646550556))) | ~(((ds.lon > -122.38678630116495) & (ds.lat > 37.430464705762226)) & ((ds.lon < -121.99799777841487) & (ds.lat < 37.81408437558721))))
    
    #Removes Data not between upperLat and lowerLat
    ds.where((ds.lat > lowerLat) & (ds.lat < upperLat))
    
    
    
    return(ds)

## Access and Filter Data

Function Calls

In [None]:
#list of westcoast cruises
fnlist = ["saildrone_west_coast_survey_2018_sd1024_JPLv5.0_8dy_20210613norep_20210613_20211116.nc", 
          "saildrone_west_coast_survey_2018_sd1025_JPLv5.0_8dy_20210613norep_20210613_20211116.nc",
          "saildrone_west_coast_survey_2018_sd1026_JPLv5.0_8dy_20210613norep_20210613_20211116.nc",
          "saildrone_west_coast_survey_2018_sd1027_JPLv5.0_8dy_20210613norep_20210613_20211116.nc",
          "saildrone_west_coast_survey_2018_sd1028_JPLv5.0_8dy_20210613norep_20210613_20211116.nc",
          "saildrone_west_coast_survey_2019_sd1038_JPLv5.0_8dy_20210613norep_20210613_20211116.nc",
          "saildrone_west_coast_survey_2019_sd1039_JPLv5.0_8dy_20210613norep_20210613_20211116.nc",
          "saildrone_west_coast_survey_2019_sd1040_JPLv5.0_8dy_20210613norep_20210613_20211116.nc",
          "saildrone_west_coast_survey_2019_sd1043_JPLv5.0_8dy_20210613norep_20210613_20211116.nc",
          "saildrone_west_coast_survey_2019_sd1044_JPLv5.0_8dy_20210613norep_20210613_20211116.nc",
          "saildrone_west_coast_survey_2019_sd1045_JPLv5.0_8dy_20210613norep_20210613_20211116.nc",
          "saildrone_west_coast_survey_2019_sd1046_JPLv5.0_8dy_20210613norep_20210613_20211116.nc",
          "saildrone_west_coast_survey_2019_sd1047_JPLv5.0_8dy_20210613norep_20210613_20211116.nc"]


ds = Compile_Datasets("../saildrone_data/", fnlist)
westcoast(25, 45)

Filter Data

In [None]:
#filter out salinity values that are less than or equal to 32
ds['SAL_CTD_MEAN']=ds.SAL_CTD_MEAN.where(ds.SAL_CTD_MEAN>=32, np.nan)

#resample the data by hour
#ds = ds.resample(time="H").mean(skipna=True)

#add date column but store it as a matplotlib date format
ds['date'] = mdates.date2num(ds['time'].dt.date)

#create function for calculating windspeed
def windspeed(u, v):
    return np.sqrt((np.abs(u**2) + np.abs(v**2)))

#calculate windspeed using function
ds['wind_speed'] = windspeed(ds['UWND_MEAN'], ds['VWND_MEAN'])

#filter data
ds_filtered = ds.drop('date')
ds_filtered = ds_filtered.to_dataframe()
ds_filtered['date'] = mdates.date2num(ds['time'].dt.date)
ds_filtered = ds_filtered.loc[(ds_filtered['SAL_CTD_MEAN'] >= 32) & (ds_filtered['TEMP_CTD_MEAN'] >= 6), 
                                  ['date','SAL_CTD_MEAN','TEMP_CTD_MEAN','lat','lon']]

#specify salinity values
xaxis_val = [32, 32.5, 33, 33.5, 34]

## Salinnity vs Temp with Date

In [None]:
fig = plt.figure(figsize=(7.6,4)) 
loc = mdates.AutoDateLocator()
sc = plt.scatter(x = ds_filtered['SAL_CTD_MEAN'], y = ds_filtered['TEMP_CTD_MEAN'], c = ds_filtered['date'], cmap='jet', alpha = 0.5)
clb = fig.colorbar(sc, ticks=loc, format=mdates.AutoDateFormatter(loc))
#ax.set_label('Date')
plt.title('Salinity versus Temperature with Date', fontdict = {'fontsize' : 16})
plt.xlabel("Salinity [PPT]")
plt.ylabel("Temperature [C]")
plt.xticks(xaxis_val)
plt.grid()
plt.show()

## Sampling Track 

In [None]:
#create list of ticks for the x axis
default_x_ticks = range(0,len(ds['time']),divmod(len(ds['time']), 10)[0]) #selects out 10 evenly spaced dates from the data

#define latitude and longitude boundaries
latr = [min(ds['lat']), max(ds['lat'])] 
lonr = [max(ds['lon']), min(ds['lon'])] 

# Select a region of our data, giving it a margin
margin = 0.5 
region = np.array([[latr[0]-margin,latr[1]+margin],[lonr[0]+margin,lonr[1]-margin]]) 

#add state outlines
states_provinces = cfeature.NaturalEarthFeature(
        category='cultural',
        name='admin_1_states_provinces_lines',
        scale='50m',
        facecolor='none')

# Create and set the figure context
fig = plt.figure(figsize=(16,10), dpi = 72) 
ax = plt.axes(projection=ccrs.PlateCarree()) 
ax.coastlines(resolution='10m',linewidth=1,color='black') 
ax.add_feature(cfeature.LAND, color='grey', alpha=0.3)
ax.add_feature(states_provinces, linewidth = 0.5)
ax.add_feature(cfeature.BORDERS)
ax.set_extent([region[1,0],region[1,1],region[0,0],region[0,1]],crs=ccrs.PlateCarree()) 
ax.set_xticks(np.round([*np.arange(region[1,1],region[1,0]+1,2)][::-1],0), crs=ccrs.PlateCarree()) 
ax.set_yticks(np.round([*np.arange(np.floor(region[0,0]),region[0,1]+1,1.5)],1), crs=ccrs.PlateCarree()) 
ax.xaxis.set_major_formatter(LongitudeFormatter(zero_direction_label=True))
ax.yaxis.set_major_formatter(LatitudeFormatter())
ax.gridlines(linestyle = '--', linewidth = 0.5)

# Plot track data, color by temperature
loc = mdates.AutoDateLocator()
sc = plt.scatter(x = ds['lon'], y = ds['lat'], c = ds['date'], cmap='jet')
#plt.plot(ds['lon'], ds['lat'], ls = ':', c = 'navy', alpha = 0.4)
clb = fig.colorbar(sc, ticks=np.linspace(min(ds['date']), max(ds['date']), 11))
clb.ax.set_title('Date')
clb.set_ticklabels(ds['time'].dt.date[default_x_ticks].values)
plt.title('Sampling Track for Cruise', fontdict = {'fontsize' : 16})
plt.show()