In [3]:
############
### 2019 ###
############

# written by Subhatra Sivam: ssivam@terpmail.umd.edu
# took me 19.6 seconds to run #

# import python packages
import cartopy.crs as ccrs
from datetime import datetime
from matplotlib.image import imread
import matplotlib.pyplot as plt
import os
import pandas as pd

# collect all saildrone data
sdfull = []

# set saildrone color :)
sdcolor = '#eb633e'

# loop through saildrones
sdname = ['1033','1034','1035','1036','1037','1041']
for name in sdname:
    # load flux data
    filepath = 'raw-data/sd/flux/sd2019-' + name + '.csv'
    sd = pd.read_csv(filepath)
    
    # store and remove unit row
    units = sd.iloc[0,:]
    sdid = sd['ID'][1]
    sd = sd.iloc[1:,1:]

    # get saildrone locations
    lon0 = sd['longitude'].values
    lat0 = sd['latitude'].values
    lon = []
    for l in lon0:
        lon.append(float(l))
    sd['longitude'] = lon
    lat = []
    for l in lat0:
        lat.append(float(l))
    sd['latitude'] = lat
    
    ### uncomment below for trajectory maps ###
    # # set axis dimensions
    # plt.figure()
    # ax = plt.axes(projection=ccrs.PlateCarree())
    # fname = os.path.join('/Users/ssiv/Documents/', 'clean-geo-base.png')
    # ax.imshow(imread(fname), origin='upper', transform=ccrs.PlateCarree(), 
    #         extent=[-180, 180, -90, 90])
    # ax.set_extent([-180, -145, 50, 80], crs=ccrs.PlateCarree())
    # ax.gridlines(draw_labels=True, dms=True, x_inline=False, y_inline=False,
    #                 linewidth=1, color='k', alpha=0.5)

    # # plot locations
    # plt.scatter(lon,lat,transform=ccrs.Geodetic(),s=1,c=sdcolor)
    # plt.title(sdid)
    # path = 'maps/sd-track/' + sdid + '.png'
    # plt.savefig(path)

    # assessing time
    date0 = sd['time']
    date = []
    for d in date0:
        date_object = datetime.strptime(d,'%Y-%m-%dT%H:%M:%SZ')
        date.append(date_object)
    sd['time'] = date

    # extract the date and hour from the 'datetime' column
    sd['date'] = sd['time'].dt.date
    sd['hour'] = sd['time'].dt.hour

    # group the DataFrame by 'date' and 'hour' and calculate the mean for each group
    # this ignores NaN values
    lon = sd.groupby(['date', 'hour'])['longitude'].mean().reset_index()
    lat = sd.groupby(['date', 'hour'])['latitude'].mean().reset_index()
    sd['QL'] = pd.to_numeric(sd['QL'], errors='coerce') 
    QL = sd.groupby(['date', 'hour'])['QL'].mean().reset_index()
    sd['QS'] = pd.to_numeric(sd['QS'], errors='coerce') 
    QS = sd.groupby(['date', 'hour'])['QS'].mean().reset_index()
    sd = pd.DataFrame([lon['date'],lon['hour'],lon['longitude'],lat['latitude'],QL['QL'],QS['QS']])
    sdflux = sd.T

    # load observation data
    filepath = 'raw-data/sd/obs/sd2019-' + name + '.csv'
    sd = pd.read_csv(filepath,low_memory=False)
    units = sd.iloc[0,:]
    sdid = sd['trajectory'][1]
    sd = sd.iloc[1:,1:]

    # assessing time
    date0 = sd['time']
    date = []
    for d in date0:
        date_object = datetime.strptime(d,'%Y-%m-%dT%H:%M:%SZ')
        date.append(date_object)
    sd['time'] = date

    # extract the date and hour from the 'datetime' column
    sd['date'] = sd['time'].dt.date
    sd['hour'] = sd['time'].dt.hour

    # group the DataFrame by 'date' and 'hour' and calculate the mean for each group
    # this ignores NaN values
    sd['T'] = pd.to_numeric(sd['TEMP_AIR_MEAN'], errors='coerce') 
    T = sd.groupby(['date', 'hour'])['T'].mean().reset_index()
    sd['SST'] = pd.to_numeric(sd['TEMP_CTD_RBR_MEAN'], errors='coerce') 
    SST = sd.groupby(['date', 'hour'])['SST'].mean().reset_index()
    sd['RH'] = pd.to_numeric(sd['RH_MEAN'], errors='coerce')
    RH = sd.groupby(['date', 'hour'])['RH'].mean().reset_index()
    sd['V'] = pd.to_numeric(sd['wind_speed'], errors='coerce')
    V = sd.groupby(['date', 'hour'])['V'].mean().reset_index()
    sd['p'] = pd.to_numeric(sd['BARO_PRES_MEAN'], errors='coerce')
    p = sd.groupby(['date', 'hour'])['p'].mean().reset_index()
    sd = pd.DataFrame([T['date'],T['hour'],T['T'],SST['SST'],RH['RH'],V['V']])
    sdobs = sd.T
    
    # merge data
    sd = pd.merge(sdflux, sdobs, on=['date', 'hour'], how='inner')
    if name == sdname[0]:
        sdfull = sd
    else:
        sdfull = pd.concat([sdfull,sd])

# convert 'date' and 'hour' columns to strings
sdfull['date'] = sdfull['date'].astype(str)
sdfull['hour'] = sdfull['hour'].astype(int).astype(str)

# concatenate 'date' and 'hour' columns with a space in between
sdfull['datetime'] = sdfull['date'] + ' ' + sdfull['hour'] + ':30:00'

# convert the concatenated column to datetime
sdfull['datetime'] = pd.to_datetime(sdfull['datetime'])
sdfull['datetime'] = sdfull['datetime'].apply(lambda x: x.timestamp())

# drop the 'date' and 'hour' columns
sdfull.drop(['date', 'hour'], axis=1, inplace=True)
sdfull.dropna(inplace=True)

# save file
sdfull.to_csv('data/sd/avg-at30-2019.csv', encoding='utf-8', index=False)

In [4]:
############
### 2018 ###
############

# written by Subhatra Sivam: ssivam@terpmail.umd.edu
# took me 8.4 seconds to run #

# import python packages
import cartopy.crs as ccrs
from datetime import datetime
from matplotlib.image import imread
import matplotlib.pyplot as plt
import os
import pandas as pd

# collect all saildrone data
sdfull = []

# set saildrone color :)
sdcolor = '#eb633e'

# loop through saildrones
sdname = ['1020','1021','1022','1023']
for name in sdname:
    # load flux data
    filepath = 'raw-data/sd/flux/sd2018-' + name + '.csv'
    sd = pd.read_csv(filepath)

    # store and remove unit row
    units = sd.iloc[0,:]
    sdid = sd['ID'][1]
    sd = sd.iloc[1:,1:]

    # get saildrone locations
    lon0 = sd['longitude'].values
    lat0 = sd['latitude'].values
    lon = []
    for l in lon0:
        lon.append(float(l))
    sd['longitude'] = lon
    lat = []
    for l in lat0:
        lat.append(float(l))
    sd['latitude'] = lat

    ### uncomment below for trajectory maps ###
    # # set axis dimensions
    # plt.figure()
    # ax = plt.axes(projection=ccrs.PlateCarree())
    # fname = os.path.join('/Users/ssiv/Documents/', 'clean-geo-base.png')
    # ax.imshow(imread(fname), origin='upper', transform=ccrs.PlateCarree(), 
    #         extent=[-180, 180, -90, 90])
    # ax.set_extent([-180, -145, 50, 80], crs=ccrs.PlateCarree())
    # ax.gridlines(draw_labels=True, dms=True, x_inline=False, y_inline=False,
    #                 linewidth=1, color='k', alpha=0.5)

    # # plot locations
    # plt.scatter(lon,lat,transform=ccrs.Geodetic(),s=1,c=sdcolor)
    # plt.title(sdid)
    # path = 'maps/sd-track/' + sdid + '.png'
    # plt.savefig(path)

    # assessing time
    date0 = sd['time']
    date = []
    for d in date0:
        date_object = datetime.strptime(d,'%Y-%m-%dT%H:%M:%SZ')
        date.append(date_object)
    sd['time'] = date

    # extract the date and hour from the 'datetime' column
    sd['date'] = sd['time'].dt.date
    sd['hour'] = sd['time'].dt.hour

    # group the DataFrame by 'date' and 'hour' and calculate the mean for each group
    # this ignores NaN values
    lon = sd.groupby(['date', 'hour'])['longitude'].mean().reset_index()
    lat = sd.groupby(['date', 'hour'])['latitude'].mean().reset_index()
    sd['QL'] = pd.to_numeric(sd['QL'], errors='coerce') 
    QL = sd.groupby(['date', 'hour'])['QL'].mean().reset_index()
    sd['QS'] = pd.to_numeric(sd['QS'], errors='coerce') 
    QS = sd.groupby(['date', 'hour'])['QS'].mean().reset_index()
    sd = pd.DataFrame([lon['date'],lon['hour'],lon['longitude'],lat['latitude'],QL['QL'],QS['QS']])
    sdflux = sd.T

    # load observation data
    filepath = 'raw-data/sd/obs/sd2018-' + name + '.csv'
    sd = pd.read_csv(filepath,low_memory=False)
    units = sd.iloc[0,:]
    sdid = sd['trajectory'][1]
    sd = sd.iloc[1:,1:]

    # assessing time
    date0 = sd['time']
    date = []
    for d in date0:
        date_object = datetime.strptime(d,'%Y-%m-%dT%H:%M:%SZ')
        date.append(date_object)
    sd['time'] = date

    # extract the date and hour from the 'datetime' column
    sd['date'] = sd['time'].dt.date
    sd['hour'] = sd['time'].dt.hour

    # group the DataFrame by 'date' and 'hour' and calculate the mean for each group
    # this ignores NaN values
    sd['T'] = pd.to_numeric(sd['TEMP_AIR_MEAN'], errors='coerce') 
    T = sd.groupby(['date', 'hour'])['T'].mean().reset_index()
    sd['SST'] = pd.to_numeric(sd['TEMP_CTD_MEAN'], errors='coerce') 
    SST = sd.groupby(['date', 'hour'])['SST'].mean().reset_index()
    sd['RH'] = pd.to_numeric(sd['RH_MEAN'], errors='coerce')
    RH = sd.groupby(['date', 'hour'])['RH'].mean().reset_index()
    sd['V'] = pd.to_numeric(sd['wind_speed'], errors='coerce')
    V = sd.groupby(['date', 'hour'])['V'].mean().reset_index()
    sd['p'] = pd.to_numeric(sd['BARO_PRES_MEAN'], errors='coerce')
    p = sd.groupby(['date', 'hour'])['p'].mean().reset_index()

    sd = pd.DataFrame([T['date'],T['hour'],T['T'],SST['SST'],RH['RH'],V['V'],p['p']])
    sdobs = sd.T

    # merge data
    sd = pd.merge(sdflux, sdobs, on=['date', 'hour'], how='inner')
    if name == sdname[0]:
        sdfull = sd
    else:
        sdfull = pd.concat([sdfull,sd])

# convert 'date' and 'hour' columns to strings
sdfull['date'] = sdfull['date'].astype(str)
sdfull['hour'] = sdfull['hour'].astype(int).astype(str)

# concatenate 'date' and 'hour' columns with a space in between
sdfull['datetime'] = sdfull['date'] + ' ' + sdfull['hour'] + ':30:00'

# convert the concatenated column to datetime
sdfull['datetime'] = pd.to_datetime(sdfull['datetime'])
sdfull['datetime'] = sdfull['datetime'].apply(lambda x: x.timestamp())

# drop the 'date' and 'hour' columns
sdfull.drop(['date', 'hour'], axis=1, inplace=True)
sdfull.dropna(inplace=True)

# save file
sdfull.to_csv('data/sd/avg-at30-2018.csv', encoding='utf-8', index=False)

In [5]:
############
### 2017 ###
############

# written by Subhatra Sivam: ssivam@terpmail.umd.edu
# took me 3.7 seconds to run #

# import python packages
import cartopy.crs as ccrs
from datetime import datetime
from matplotlib.image import imread
import matplotlib.pyplot as plt
import netCDF4 as nc
import numpy as np
import os
import pandas as pd
from scipy.signal import argrelextrema

# collect all saildrone data
sdfull = []

# set saildrone color :)
sdcolor = '#eb633e'

# read observation data
filename = 'raw-data/sd/obs/sd2017.nc'
f = nc.Dataset(filename,mode='r')
time = f.variables['time'][:]
time.astype(int) # some time values were decimals, when they should be integers
lat = f.variables['latitude'][:]
lon = f.variables['longitude'][:]
p = f.variables['BARO_PRES_MEAN'][:]
rh = f.variables['RH_MEAN'][:]
sst = f.variables['TEMP_CTD_MEAN'][:]
t = f.variables['TEMP_AIR_MEAN'][:]
V = f.variables['wind_speed'][:]

# identify local minima to separate saildrones
localmin = np.array(argrelextrema(time, np.less))

# create DataFrames for each saildrone subset
def create_saildrone_df(time, lat, lon, sst, t, rh, V, p, start, end):
    subset_df = pd.DataFrame({
        'datetime': time[start:end].squeeze(),
        'lat': lat[start:end].squeeze(),
        'lon': lon[start:end].squeeze(),
        'SST': sst[start:end].squeeze(),
        'T': t[start:end].squeeze(),
        'RH': rh[start:end].squeeze(),
        'V': V[start:end].squeeze(),
        'p': p[start:end].squeeze()
    })
    return subset_df

# Create DataFrames for each saildrone subset
sd1001 = create_saildrone_df(time, lat, lon, sst, t, rh, V, p, 0, localmin[0][0])
sd1002 = create_saildrone_df(time, lat, lon, sst, t, rh, V, p, localmin[0][0], localmin[0][1])
sd1003 = create_saildrone_df(time, lat, lon, sst, t, rh, V, p, localmin[0][1], len(time))

# loop through saildrones
sdname = ['1001','1002','1003']
for idx2017, name in enumerate(sdname):
    # load flux data
    filepath = 'raw-data/sd/flux/sd2017-' + name + '.csv'
    sd = pd.read_csv(filepath,low_memory=False)

    # store and remove unit row
    units = sd.iloc[0,:]
    sdid = sd['ID'][1]
    sd = sd.iloc[1:,1:]

    # get saildrone locations
    lon0 = sd['longitude'].values
    lat0 = sd['latitude'].values
    lon = []
    for l in lon0:
        lon.append(float(l))
    sd['longitude'] = lon
    lat = []
    for l in lat0:
        lat.append(float(l))
    sd['latitude'] = lat

    ### uncomment below for trajectory maps ###
    # # set axis dimensions
    # plt.figure()
    # ax = plt.axes(projection=ccrs.PlateCarree())
    # fname = os.path.join('/Users/ssiv/Documents/', 'clean-geo-base.png')
    # ax.imshow(imread(fname), origin='upper', transform=ccrs.PlateCarree(), 
    #         extent=[-180, 180, -90, 90])
    # ax.set_extent([-180, -145, 50, 80], crs=ccrs.PlateCarree())
    # ax.gridlines(draw_labels=True, dms=True, x_inline=False, y_inline=False,
    #                 linewidth=1, color='k', alpha=0.5)

    # # plot locations
    # plt.scatter(lon,lat,transform=ccrs.Geodetic(),s=1,c=sdcolor)
    # plt.title(sdid)
    # path = 'maps/sd-track/' + sdid + '.png'
    # plt.savefig(path)

    # assessing time
    date = pd.to_datetime(sd['time'])
    sd['time'] = date

    # extract the date and hour from the 'datetime' column
    sd['date'] = sd['time'].dt.date
    sd['hour'] = sd['time'].dt.hour

    # group the DataFrame by 'date' and 'hour' and calculate the mean for each group
    # this ignores NaN values
    lon = sd.groupby(['date', 'hour'])['longitude'].mean().reset_index()
    lat = sd.groupby(['date', 'hour'])['latitude'].mean().reset_index()
    sd['QL'] = pd.to_numeric(sd['QL'], errors='coerce') 
    QL = sd.groupby(['date', 'hour'])['QL'].mean().reset_index()
    sd['QS'] = pd.to_numeric(sd['QS'], errors='coerce') 
    QS = sd.groupby(['date', 'hour'])['QS'].mean().reset_index()
    sd = pd.DataFrame([lon['date'],lon['hour'],lon['longitude'],lat['latitude'],QL['QL'],QS['QS']])
    sdflux = sd.T

    # load observation data
    if idx2017 == 0:
        sd = sd1001
    elif idx2017 == 1:
        sd = sd1002
    elif idx2017 == 2:
        sd = sd1003

    # assessing time
    date0 = sd['datetime']
    date = [datetime.utcfromtimestamp(timestamp) for timestamp in date0]
    sd['time'] = date

    # extract the date and hour from the 'datetime' column
    sd['date'] = sd['time'].dt.date
    sd['hour'] = sd['time'].dt.hour

    # group the DataFrame by 'date' and 'hour' and calculate the mean for each group
    # this ignores NaN values
    T = sd.groupby(['date', 'hour'])['T'].mean().reset_index() 
    SST = sd.groupby(['date', 'hour'])['SST'].mean().reset_index()
    RH = sd.groupby(['date', 'hour'])['RH'].mean().reset_index()
    V = sd.groupby(['date', 'hour'])['V'].mean().reset_index()
    p = sd.groupby(['date', 'hour'])['p'].mean().reset_index()
    sd = pd.DataFrame([T['date'],T['hour'],T['T'],SST['SST'],RH['RH'],V['V'],p['p']])
    sdobs = sd.T
    
    # merge data
    sd = pd.merge(sdflux, sdobs, on=['date', 'hour'], how='inner')
    if name == sdname[0]:
        sdfull = sd
    else:
        sdfull = pd.concat([sdfull,sd])

# convert 'date' and 'hour' columns to strings
sdfull['date'] = sdfull['date'].astype(str)
sdfull['hour'] = sdfull['hour'].astype(int).astype(str)

# concatenate 'date' and 'hour' columns with a space in between
sdfull['datetime'] = sdfull['date'] + ' ' + sdfull['hour'] + ':30:00'

# convert the concatenated column to datetime
sdfull['datetime'] = pd.to_datetime(sdfull['datetime'])
sdfull['datetime'] = sdfull['datetime'].apply(lambda x: x.timestamp())

# drop the 'date' and 'hour' columns
sdfull.drop(['date', 'hour'], axis=1, inplace=True)
sdfull.dropna(inplace=True)

# save file
sdfull.to_csv('data/sd/avg-at30-2017.csv', encoding='utf-8', index=False)

In [None]:
# create time series #
plt.figure()
time = pd.to_datetime(sdfull['datetime'],unit='s')
QL = sdfull['QL']
plt.scatter(time,QL,c='k',s=2)
plt.axhline(0,c='k',ls=':')
plt.title('Saildrone Q$_L$')
plt.ylabel('Flux ($W*m^2$)')
path = 'timeseries/SD-QL.png'
plt.savefig(path)

plt.figure()
QS = sdfull['QS']
plt.scatter(time,QS,c='k',s=2)
plt.axhline(0,c='k',ls=':')
plt.title('Saildrone Q$_S$')
plt.ylabel('Flux ($W*m^2$)')
path = 'timeseries/SD-QS.png'
plt.savefig(path)