In [None]:
# Given
import netCDF4
import numpy as np
import matplotlib.pyplot as plt
import datetime
import time
import calendar
import pandas as pd
from matplotlib import gridspec

[Link to other buoys](https://cdip.ucsd.edu/m/deployment/station_view/)

In [None]:
# Type
stn = '071'
startdate = "01/01/2014"
enddate = "12/30/2016"

In [None]:
# Given
# CDIP Archived Dataset URL
data_url = 'http://thredds.cdip.ucsd.edu/thredds/dodsC/cdip/archive/' + stn + 'p1/' + stn + 'p1_historic.nc'

In [None]:
# Given
data = netCDF4.Dataset(data_url)

In [None]:
# Other Variables
# data.variables
# Hs = data.variables['waveHs']
# Tp = data.variables['waveTp']
# Dp = data.variables['waveDp'] 

[Code taken from](http://cdip.ucsd.edu/themes/?zoom=auto&tz=UTC&ll_fmt=dm&numcolorbands=100&palette=cdip_classic&high=4.572&r=7&un=1&pb=1&d2=p70&u2=s:071:st:1:v:product_descriptions)

In [None]:
ncTime = data.variables['sstTime'][:]
timeall = [datetime.datetime.fromtimestamp(t) for t in ncTime] # Convert ncTime variable to datetime stamps

# Create a variable of the Buoy Name and Month Name, to use in plot title
Harvest_Buoy = data.variables['metaStationName'][:]

month_name = calendar.month_name[int(startdate[0:2])]
year_num = (startdate[6:10])

In [None]:
def find_nearest(array,value):
    idx = (np.abs(array-value)).argmin()
    return array[idx]

In [None]:
def getUnixTimestamp(humanTime,dateFormat):
    unixTimestamp = int(time.mktime(datetime.datetime.strptime(humanTime, dateFormat).timetuple()))
    return unixTimestamp

In [None]:
unixstart = getUnixTimestamp(startdate,"%m/%d/%Y") 
neareststart = find_nearest(ncTime, unixstart)  # Find the closest unix timestamp
nearIndex = np.where(ncTime==neareststart)[0][0]  # Grab the index number of found date

unixend = getUnixTimestamp(enddate,"%m/%d/%Y")
future = find_nearest(ncTime, unixend)  # Find the closest unix timestamp
futureIndex = np.where(ncTime==future)[0][0]  # Grab the index number of found date

In [None]:
#  ---- Potential start point -----

# Get SST timestamp variable 
sst_time_var = data.variables['sstTime']

# Get SST variable 
sst = data.variables['sstSeaSurfaceTemperature'][:]

# Add waveheight and maybe direction/maybe energy

[cftime documentation](https://unidata.github.io/cftime/)

In [None]:
# Use num2date on sst_time_var
sst_time = netCDF4.num2date(sst_time_var[:], sst_time_var.units, only_use_cftime_datetimes=False)

In [None]:
# Make an empty pandas dataframe
sst_df = pd.DataFrame()

# Fill it with SST and the date time it was collected
sst_df['sst'] = sst
sst_df['date_time'] = sst_time

In [None]:
# Make date_time column a pandas date_time
sst_df['date_time'] = pd.to_datetime(sst_df['date_time']) 

In [None]:
# Pull out date from datetime
sst_df['date'] = sst_df['date_time'].dt.date

# Pull out month from datetime
sst_df['month'] = sst_df['date_time'].dt.month

# Pull out year from datetime
sst_df['year'] = sst_df['date_time'].dt.year

In [None]:
# Plot here to see what the data looks like
plt.figure(figsize = (10, 5))
plt.plot(sst_df.date, sst_df.sst, linewidth = .5)

In [None]:
# Filter data to be greater than 1997 to dump missing values
sst_df = sst_df[sst_df['year'] > 1997]

In [None]:
# Inspect data
sst_df.head()

In [None]:
# Group by date and summarise with mean SST
sst_daily = sst_df.groupby('date').agg({'sst': 'mean', 'month': 'first', 'year': 'first'})

In [None]:
# Inspect Data
sst_daily.head()

In [None]:
# PLot daily values
plt.figure(figsize = (10, 5))
plt.plot(sst_daily.index, sst_daily.sst)

In [None]:
# Calculate 30 day rolling average
sst_roll = sst_daily.rolling(window = 30).mean()

In [None]:
# Plot monthly average
plt.figure(figsize = (10, 5))
plt.plot(sst_roll.index, sst_roll.sst)