# Scripps Automated Shore Station (SASS) Self-Calibrating SeapHOx (SCS) live data viewer
### This script scrubs .dat files from the SCCOOS dr: https://sccoos.org/dr/data/. The SCS measures pH, oxygen, salinity and temperature. The pH values plotted here are calibrated using an average of the coefficients established pre-deployment and from the  automated tris calibrations. Oxygen concentration has been corrected for salinity and pressure. Before using and sharing these values, please get permission from the author. A new tab delimited text file will be produced every time the script runs, with the current timestamp of the time the file was created. All time zones associated with this script are in UTC.
### Created by: Taylor Wirth twirth@ucsd.edu
### Last edit: 11 April 2022

In [62]:
# load the .py scripts
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import os
import glob
%load_ext autoreload
%autoreload 1
%aimport get_recent_dr
%aimport get_all_dr
%aimport correct_DO_with_sal
%aimport pHtris_from_T
%aimport k0int_from_Vint_pHcal
%aimport k0ext_from_Vext_pHcal
%aimport pHint_from_Vint_k0int
%aimport pHext_from_Vext_k0ext
%aimport plot_all

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [63]:
# Get all the files for the Scripps Pier SCS. get_all_dr.py looks and grabs all data files for the Scripps Pier SCS, 
# then converts it into a pandas DataFrame. 
# Printed below is the last hour of data. This may take a few seconds depending on last data download!
path = 'https://sccoos.org/dr/data/scripps_pier_scs/'

# if no txt file of downloaded data, get all of the data:
txt = glob.glob('*.txt')
result = [i for i in txt if i.startswith('SIOpierSCS_download')]

if not result: # if there is no downloaded text file, download all data
    df, col_names = get_all_dr.get_all_dr(path)
else: # or download data starting from last downloaded sample
    df, col_names = get_recent_dr.get_recent_dr(path)
    # delete current txt file
    os.remove([i for i in txt if i.startswith('SIOpierSCS')][0])

# create new tab delimited file to download
from datetime import datetime
now = datetime.utcnow()
current_time = now.strftime('%Y%m%d%H%M%S')
df.to_csv('SIOpierSCS_download_'+current_time+'.txt', header=col_names, index=None, sep='\t')

# display important variables
df.iloc[[-6,-5,-4,-3,-2,-1],[7,8,2,3,4,5,6,9,10,11,12,16,17,18,21,22,23,31,33]]

Unnamed: 0,date,time,sensor_name,samp_type,samp_num,calib_num,calib_rep,vbatt,vtherm,vint,vext,press,pHint,pHext,O2con,O2sat,O2temp,SBEtemp,SBEsal
9749,2022/04/27,16:20:10,SCS003,0,2702,1,0,17.11,-1.96315,0.063558,-0.925551,4.479,8.051717,8.058586,222.115,68.209,13.475,13.646,33.5951
9750,2022/04/27,16:30:10,SCS003,0,2703,1,0,17.11,-1.95981,0.059963,-0.929303,3.046,7.994231,8.005199,228.412,69.747,13.221,12.9785,33.7788
9751,2022/04/27,16:40:10,SCS003,0,2704,1,0,17.11,-1.95981,0.059907,-0.929451,3.277,7.993267,8.001765,209.334,63.772,13.116,12.9751,33.7148
9752,2022/04/27,16:50:10,SCS003,0,2705,1,0,17.11,-1.95981,0.061251,-0.928096,3.924,8.015176,8.022067,207.326,63.305,13.219,13.1808,33.6809
9753,2022/04/27,17:00:10,SCS003,0,2706,1,0,17.11,-1.95252,0.058877,-0.930636,3.565,7.974985,7.98003,206.066,62.606,12.995,12.991,33.6701
9754,2022/04/27,17:10:10,SCS003,0,2707,1,0,17.11,-1.95981,0.059681,-0.929863,3.265,7.990544,7.997669,198.847,60.393,12.98,12.8275,33.7837


In [64]:
# Data QC
# drop data rows if SBE salinity is outside 10 standard deviations
df = df[np.abs(df['SBEsal']-df['SBEsal'].mean()) <= (10*df['SBEsal'].std())]
# remove pressure outliers outside 3 standard deviations
df['press']=df['press'][~(np.abs(df['press']-df['press'].mean()) > (3*df['press'].std()))]

In [65]:
# Oxygen correction for salinity and pressure
df['O2_corr'] = correct_DO_with_sal.correct_DO_with_sal(df.O2con, df.O2temp, df.press, df.SBEsal, sal_input=0)

In [66]:
# run this only once!! separates calibration data from downloaded data file
caldf = df.loc[df['samp_type']==1] # calibration samples
caldf = caldf.rename(columns={"date": "caldate"})
df = df.drop(df[df.samp_type==1].index) # remove calibration samples from the data frame

In [67]:
# calculate in situ tris pH from temperature and calibration coefficients from tris pH
pHtris = pHtris_from_T.pHtris_from_T(caldf.SBEtemp, S=35)
k0int_tris = k0int_from_Vint_pHcal.k0int_from_Vint_pHcal(caldf.vint, pHtris, caldf.SBEtemp)
k0ext_tris = k0ext_from_Vext_pHcal.k0ext_from_Vext_pHcal(caldf.vext, pHtris, caldf.SBEtemp, calsal=35)

tankdf = pd.read_csv([i for i in txt if i.startswith('SCS_tank')][0],sep='\t') # tank/'factory' calibration coefficients

# add k0's from tris to calibration data frame
caldf.loc[:,'k0int_tris'] = k0int_tris
caldf.loc[:,'k0ext_tris'] = k0ext_tris

# combine calibration data
dfk0 = pd.concat([tankdf, caldf],ignore_index=True,sort=False)
dfk0 = dfk0.drop(columns=['internet_datetime','IP','samp_type','samp_num','calib_num','calib_rep','time',\
    'vbatt','vtherm','vint','vext','isobatt','contemp','pHtemp','press','pHint','pHext',\
    'O2_MN','O2_SN','O2con','O2sat','O2temp','Dphase','Bphase','Rphase','Bamp','Bpot','Ramp','Raw_Temp',\
    'SBEtemp','SBEcond','SBEsal','SBEday','SBEmon','SBEyear','SBEtime','O2_corr'])


In [68]:
# SeapHOx calibration from automated tris measurements
dfk0['caldate'] = pd.to_datetime(dfk0['caldate'])
dfk0 = dfk0.set_index(['sensor_name'])
dfk0 = dfk0.sort_values(by='caldate')
dfk0 = dfk0.reset_index()
print('All calibration coefficients:')
print(dfk0)

All calibration coefficients:
   sensor_name    caldate  k0int_tank  k0int_tris  k0ext_tank  k0ext_tris
0       SCS002 2022-01-18   -0.361933         NaN   -1.358165         NaN
1       SCS002 2022-02-18         NaN   -0.363890         NaN   -1.358733
2       SCS002 2022-02-18         NaN   -0.363718         NaN   -1.358583
3       SCS002 2022-03-04         NaN   -0.363890         NaN   -1.358733
4       SCS002 2022-03-04         NaN   -0.363718         NaN   -1.358583
5       SCS002 2022-03-18         NaN   -0.363169         NaN   -1.358549
6       SCS002 2022-03-18         NaN   -0.363075         NaN   -1.358447
7       SCS002 2022-04-01         NaN   -0.380479         NaN   -1.374578
8       SCS002 2022-04-01         NaN   -0.371263         NaN   -1.365427
9       SCS003 2022-04-04   -0.373082         NaN   -1.404321         NaN
10      SCS003 2022-04-22         NaN   -0.389679         NaN   -1.417942
11      SCS003 2022-04-22         NaN   -0.390035         NaN   -1.418295


In [69]:
# drop calibrations when tris bag was empty
dfk0 = dfk0.drop([7,8])
dfk0 = dfk0.set_index(['sensor_name'])
print('Calibrations dropped due to empty tris bag: rows 7 and 8')
print(dfk0)

Calibrations dropped due to empty tris bag: rows 7 and 8
               caldate  k0int_tank  k0int_tris  k0ext_tank  k0ext_tris
sensor_name                                                           
SCS002      2022-01-18   -0.361933         NaN   -1.358165         NaN
SCS002      2022-02-18         NaN   -0.363890         NaN   -1.358733
SCS002      2022-02-18         NaN   -0.363718         NaN   -1.358583
SCS002      2022-03-04         NaN   -0.363890         NaN   -1.358733
SCS002      2022-03-04         NaN   -0.363718         NaN   -1.358583
SCS002      2022-03-18         NaN   -0.363169         NaN   -1.358549
SCS002      2022-03-18         NaN   -0.363075         NaN   -1.358447
SCS003      2022-04-04   -0.373082         NaN   -1.404321         NaN
SCS003      2022-04-22         NaN   -0.389679         NaN   -1.417942
SCS003      2022-04-22         NaN   -0.390035         NaN   -1.418295


In [70]:
# calculate calibration coefficients for each sensor
k0int_mean = []
k0ext_mean = []
unq_sen = dfk0.index.unique()

# loop through sensor names/deployments
for i in range(0,len(unq_sen)):
    k0int = dfk0.loc[unq_sen[i]].k0int_tank.tolist()
    k0ext = dfk0.loc[unq_sen[i]].k0ext_tank.tolist()
    if isinstance(dfk0.loc[unq_sen[i]].k0int_tank.tolist(),float):
        k0int_mean.append(np.nanmean(k0int))
        k0ext_mean.append(np.nanmean(k0ext))
        print(unq_sen[i] + ' k0int_tank_only = ', k0int_mean[i])
        print(unq_sen[i] + ' kext_tank_only = ', k0ext_mean[i])
        continue
    
    # append the k0's to calculate the means of all k0's
    k0int.extend(dfk0.loc[unq_sen[i]].k0int_tris.tolist())
    k0ext.extend(dfk0.loc[unq_sen[i]].k0ext_tris.tolist())
    k0int_mean.append(np.nanmean(k0int))
    k0ext_mean.append(np.nanmean(k0ext))
    print(unq_sen[i] + ' k0int_mean = ', k0int_mean[i])
    print(unq_sen[i] + ' kext_mean = ', k0ext_mean[i])

SCS002 k0int_mean =  -0.363341874042181
SCS002 kext_mean =  -1.3585419207112064
SCS003 k0int_mean =  -0.3842653234537466
SCS003 kext_mean =  -1.413519324962157


In [71]:
# plot calibration coefficients
figs = {} # container for figures
for p in range(0,len(unq_sen)):
    fig = make_subplots(specs=[[{"secondary_y": True}]])
    fig.add_trace(go.Scatter(x=dfk0.loc[[unq_sen[p]]].caldate, y=dfk0.loc[[unq_sen[p]]].k0int_tank,mode='markers',name='k0int_tank',\
        marker_symbol='star',marker_size=15,marker_color='green'),secondary_y=False)
    fig.add_trace(go.Scatter(x=dfk0.loc[[unq_sen[p]]].caldate, y=dfk0.loc[[unq_sen[p]]].k0ext_tank,mode='markers',name='k0ext_tank',\
        marker_symbol='square',marker_size=10,marker_color='black'),secondary_y=True)
    fig.add_trace(go.Scatter(x=dfk0.loc[[unq_sen[p]]].caldate, y=dfk0.loc[[unq_sen[p]]].k0int_tris,mode='markers',name='k0int_tris',\
        marker_symbol='star',marker_size=15,marker_color='blue'),secondary_y=False)
    fig.add_trace(go.Scatter(x=dfk0.loc[[unq_sen[p]]].caldate, y=dfk0.loc[[unq_sen[p]]].k0ext_tris,mode='markers',name='k0ext_tris',\
        marker_symbol='square',marker_size=10,marker_color='red'),secondary_y=True)
    fig.add_hline(y=float(k0int_mean[p]),secondary_y=False,name='k0int_mean',line_color='blue',annotation_text="k0 means",annotation_position="top left") # mean k0int for all calibrations
    fig.add_hline(y=float(k0ext_mean[p]),secondary_y=True,name='k0int_mean',line_color='red') # mean k0ext for all calibrations
    fig.update_layout(height=500, width=950,title_text=unq_sen[p] + ' Calibration coefficients at 0C')
    fig['layout']['yaxis']['title']='k0_int'
    fig['layout']['yaxis2']['title']='k0_ext'
    figs['fig'+str(p)] = fig

for p in range(0,len(unq_sen)):     
    figs[list(figs)[p]].show()

In [72]:
# add tris pH and sensor pH during automated tris calibrations
caldf.loc[:,'pHtris'] = pHtris
caldf.loc[:,'pHint_tris'] = pHint_from_Vint_k0int.pHint_from_Vint_k0int(k0int_mean[0], caldf.vint, caldf.SBEtemp)
caldf.loc[:,'pHext_tris'] = pHext_from_Vext_k0ext.pHext_from_Vext_k0ext(k0ext_mean[0], caldf.vext, caldf.SBEtemp, 35)


In [73]:

# plot in situ tris pH vs sensor pH
"""
fig = go.Figure()
fig.add_trace(go.Scatter(x=caldf.pHtris,y=caldf.pHint_tris,mode='markers',name='pHint_tris',\
        marker_symbol='star',marker_size=15,marker_color='blue'))
fig.add_trace(go.Scatter(x=caldf.pHtris,y=caldf.pHext_tris,mode='markers',name='pHext_tris',\
        marker_symbol='square',marker_size=15,marker_color='red'))
fig.update_layout(shapes = [{'type': 'line', 'yref': 'paper', 'xref': 'paper', 'y0': 0, 'y1': 1, 'x0': 0, 'x1': 1}])
fig.update_layout(yaxis_range=[8,8.5])
fig.update_layout(xaxis_range=[8,8.5])
fig.update_layout(height=700, width=700,title_text='pH_tris vs pH_sensor')
fig['layout']['yaxis']['title']='pH_sensor'
fig['layout']['xaxis']['title']='pH_tris'
fig.show()
"""


"\nfig = go.Figure()\nfig.add_trace(go.Scatter(x=caldf.pHtris,y=caldf.pHint_tris,mode='markers',name='pHint_tris',        marker_symbol='star',marker_size=15,marker_color='blue'))\nfig.add_trace(go.Scatter(x=caldf.pHtris,y=caldf.pHext_tris,mode='markers',name='pHext_tris',        marker_symbol='square',marker_size=15,marker_color='red'))\nfig.update_layout(shapes = [{'type': 'line', 'yref': 'paper', 'xref': 'paper', 'y0': 0, 'y1': 1, 'x0': 0, 'x1': 1}])\nfig.update_layout(yaxis_range=[8,8.5])\nfig.update_layout(xaxis_range=[8,8.5])\nfig.update_layout(height=700, width=700,title_text='pH_tris vs pH_sensor')\nfig['layout']['yaxis']['title']='pH_sensor'\nfig['layout']['xaxis']['title']='pH_tris'\nfig.show()\n"

In [74]:
# calcuate calibrated pHint and pHext
df['datetime'] = list(df['date'] + ' ' + df['time']) # combine date and time to use for plots
pHint_cal = []
pHext_cal = []

for p in enumerate(unq_sen):
    pHint_SBE = pHint_from_Vint_k0int.pHint_from_Vint_k0int(k0int_mean[p[0]], df.loc[df.sensor_name==p[1]].vint, df.loc[df.sensor_name==p[1]].SBEtemp)
    #pHdf[p[1]+'_pHint'] = pHint_SBE
    pHint_cal = pHint_cal + (list(pHint_SBE))
    pHext_SBE = pHext_from_Vext_k0ext.pHext_from_Vext_k0ext(k0ext_mean[p[0]], df.loc[df.sensor_name==p[1]].vext, df.loc[df.sensor_name==p[1]].SBEtemp, df.loc[df.sensor_name==p[1]].SBEsal)
    #pHdf[p[1]+'_pHext'] = pHext_SBE
    pHext_cal = pHext_cal + (list(pHext_SBE))

# add calibrated pH to the data frame

df['pHint_cal'] = pHint_cal
df['pHext_cal'] = pHext_cal

In [75]:
# fun part! plot it all
plot_all.plot_all(unq_sen,df,df.datetime,df.pHint_cal,df.pHext_cal,df.O2_corr,df.O2sat,df.SBEtemp,df.O2temp,df.pHtemp,df.SBEsal,df.press)