# Scripps Automated Shore Station (SASS) Self-Calibrating SeapHOx (SCS) live data viewer
### This script scrubs .dat files from the SCCOOS dr: https://sccoos.org/dr/data/. The SCS measures pH, oxygen, salinity and temperature. The pH values plotted here are calibrated using an average of the coefficients established pre-deployment and from the  automated tris calibrations. Oxygen concentration has been corrected for salinity and pressure. Before using and sharing these values, please get permission from the author. A new tab delimited text file will be produced every time the script runs, with the current timestamp of the time the file was created. All time zones associated with this script are in UTC.
### Created by: Taylor Wirth twirth@ucsd.edu
### Last edit: 11 April 2022

In [1]:
# load the .py scripts
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import os
import glob
%load_ext autoreload
%autoreload 1
%aimport get_recent_dr
%aimport get_all_dr
%aimport correct_DO_with_sal
%aimport pHtris_from_T
%aimport k0int_from_Vint_pHcal
%aimport k0ext_from_Vext_pHcal
%aimport pHint_from_Vint_k0int
%aimport pHext_from_Vext_k0ext
%aimport plot_all

The last hour of data and plot of all the data will show below. Please wait!

In [2]:
# Get all the files for the Scripps Pier SCS. get_all_dr.py looks and grabs all data files for the Scripps Pier SCS, 
# then converts it into a pandas DataFrame. 
# Printed below is the last hour of data. This may take a few seconds depending on last data download!
path = 'https://sccoos.org/dr/data/scripps_pier_scs/'

# if no txt file of downloaded data, get all of the data:
txt = glob.glob('*.txt')
if len(txt) > 1:
    df, col_names = get_recent_dr.get_recent_dr(path)
    # delete current txt file
    os.remove([i for i in txt if i.startswith('SIOpierSCS')][0])
else:
    df, col_names = get_all_dr.get_all_dr(path)

# create new tab delimited file to download
from datetime import datetime
now = datetime.utcnow()
current_time = now.strftime('%Y%m%d%H%M%S')
df.to_csv('SIOpierSCS_download_'+current_time+'.txt', header=col_names, index=None, sep='\t')

df.tail(6)

Unnamed: 0,internet_datetime,IP,sensor_name,samp_type,samp_num,calib_num,calib_rep,date,time,vbatt,...,Bpot,Ramp,Raw_Temp,SBEtemp,SBEcond,SBEsal,SBEday,SBEmon,SBEyear,SBEtime
7732,2022-04-13T16:21:20Z,0.0.0.0,SCS003,0,686,0,0,2022/04/13,16:20:10,18.01,...,731.2,756.2,387.9,16.1031,4.23847,33.5603,13,Apr,2022,16:20:02
7733,2022-04-13T16:31:20Z,0.0.0.0,SCS003,0,687,0,0,2022/04/13,16:30:10,18.01,...,729.6,756.4,386.0,16.1395,4.24303,33.5699,13,Apr,2022,16:30:02
7734,2022-04-13T16:41:20Z,0.0.0.0,SCS003,0,688,0,0,2022/04/13,16:40:10,18.01,...,727.5,756.3,383.3,16.3221,4.25772,33.5454,13,Apr,2022,16:40:02
7735,2022-04-13T16:51:20Z,0.0.0.0,SCS003,0,689,0,0,2022/04/13,16:50:10,18.01,...,724.3,756.0,380.7,16.3934,4.26623,33.5602,13,Apr,2022,16:50:02
7736,2022-04-13T17:01:20Z,0.0.0.0,SCS003,0,690,0,0,2022/04/13,17:00:10,18.01,...,716.9,755.6,373.2,16.6947,4.29512,33.5604,13,Apr,2022,17:00:02
7737,2022-04-13T17:11:20Z,0.0.0.0,SCS003,0,691,0,0,2022/04/13,17:10:10,18.01,...,709.6,754.7,369.5,16.8693,4.30672,33.5155,13,Apr,2022,17:10:02


In [3]:
# Data QC
# drop data rows if SBE salinity is outside 10 standard deviations
df = df[np.abs(df['SBEsal']-df['SBEsal'].mean()) <= (10*df['SBEsal'].std())]
# remove pressure outliers outside 3 standard deviations
df['press']=df['press'][~(np.abs(df['press']-df['press'].mean()) > (3*df['press'].std()))]

In [4]:
# Oxygen correction for salinity and pressure
df['O2_corr'] = correct_DO_with_sal.correct_DO_with_sal(df.O2con, df.O2temp, df.press, df.SBEsal, sal_input=0)

In [5]:
# run this only once!!
caldf = df.loc[df['samp_type']==1] # calibration samples
caldf = caldf.rename(columns={"date": "caldate"})
df = df.drop(df[df.samp_type==1].index) # remove calibration samples from the data frame

In [6]:
pHtris = pHtris_from_T.pHtris_from_T(caldf.SBEtemp, S=35)
k0int_tris = k0int_from_Vint_pHcal.k0int_from_Vint_pHcal(caldf.vint, pHtris, caldf.SBEtemp)
k0ext_tris = k0ext_from_Vext_pHcal.k0ext_from_Vext_pHcal(caldf.vext, pHtris, caldf.SBEtemp, calsal=35)

tankdf = pd.read_csv([i for i in txt if i.startswith('SCS_tank')][0],sep='\t')
caldf.loc[:,'k0int_tris'] = k0int_tris
caldf.loc[:,'k0ext_tris'] = k0ext_tris
dfk0 = pd.concat([tankdf, caldf],ignore_index=True,sort=False)
dfk0 = dfk0.drop(columns=['internet_datetime','IP','samp_type','samp_num','calib_num','calib_rep','time',\
    'vbatt','vtherm','vint','vext','isobatt','contemp','pHtemp','press','pHint','pHext',\
    'O2_MN','O2_SN','O2con','O2sat','O2temp','Dphase','Bphase','Rphase','Bamp','Bpot','Ramp','Raw_Temp',\
    'SBEtemp','SBEcond','SBEsal','SBEday','SBEmon','SBEyear','SBEtime','O2_corr'])


In [7]:
# SeapHOx calibration from automated tris measurements
dfk0['caldate'] = pd.to_datetime(dfk0['caldate'])
dfk0 = dfk0.set_index(['sensor_name'])
dfk0 = dfk0.sort_values(by='caldate')
dfk0 = dfk0.reset_index()
print('All calibration coefficients:')
print(dfk0)

All calibration coefficients:
  sensor_name    caldate  k0int_tank  k0int_tris  k0ext_tank  k0ext_tris
0      SCS002 2022-01-18   -0.368642         NaN   -1.358165         NaN
1      SCS002 2022-02-18         NaN   -0.369635         NaN   -1.358724
2      SCS002 2022-02-18         NaN   -0.369442         NaN   -1.358583
3      SCS002 2022-03-04         NaN   -0.369091         NaN   -1.358733
4      SCS002 2022-03-04         NaN   -0.368919         NaN   -1.358583
5      SCS002 2022-03-18         NaN   -0.368863         NaN   -1.358549
6      SCS002 2022-03-18         NaN   -0.368796         NaN   -1.358447
7      SCS002 2022-04-01         NaN   -0.386176         NaN   -1.374578
8      SCS002 2022-04-01         NaN   -0.376958         NaN   -1.365427
9      SCS003 2022-04-04   -0.379604         NaN   -1.404149         NaN


In [8]:
# drop calibrations when tris bag was empty
dfk0 = dfk0.drop([7,8])
dfk0 = dfk0.set_index(['sensor_name'])
print('Calibrations dropped due to empty tris bag: rows 7 and 8')
print(dfk0)

Calibrations dropped due to empty tris bag: rows 7 and 8
               caldate  k0int_tank  k0int_tris  k0ext_tank  k0ext_tris
sensor_name                                                           
SCS002      2022-01-18   -0.368642         NaN   -1.358165         NaN
SCS002      2022-02-18         NaN   -0.369635         NaN   -1.358724
SCS002      2022-02-18         NaN   -0.369442         NaN   -1.358583
SCS002      2022-03-04         NaN   -0.369091         NaN   -1.358733
SCS002      2022-03-04         NaN   -0.368919         NaN   -1.358583
SCS002      2022-03-18         NaN   -0.368863         NaN   -1.358549
SCS002      2022-03-18         NaN   -0.368796         NaN   -1.358447
SCS003      2022-04-04   -0.379604         NaN   -1.404149         NaN


In [9]:
# calculate calibration coefficients for each sensor
k0int_mean = []
k0ext_mean = []
unq_sen = dfk0.index.unique()

for i in range(0,len(unq_sen)):
    k0int = dfk0.loc[unq_sen[i]].k0int_tank.tolist()
    k0ext = dfk0.loc[unq_sen[i]].k0ext_tank.tolist()
    if isinstance(dfk0.loc[unq_sen[i]].k0int_tank.tolist(),float):
        k0int_mean.append(np.nanmean(k0int))
        k0ext_mean.append(np.nanmean(k0ext))
        print(unq_sen[i] + ' k0int_tank_only = ', k0int_mean[i])
        print(unq_sen[i] + ' kext_tank_only = ', k0ext_mean[i])
        continue
    
    # append calibration coefficients from tris
    #if unq_sen[i] == 'SCS002': # remove last two tris calibrations when bag was empty
    #    k0int.extend(dfk0.loc[unq_sen[i]].k0int_tris[:-2].tolist())
    #    k0ext.extend(dfk0.loc[unq_sen[i]].k0ext_tris[:-2].tolist())
    #else:
    k0int.extend(dfk0.loc[unq_sen[i]].k0int_tris.tolist())
    k0ext.extend(dfk0.loc[unq_sen[i]].k0ext_tris.tolist())
    k0int_mean.append(np.nanmean(k0int))
    k0ext_mean.append(np.nanmean(k0ext))
    print(unq_sen[i] + ' k0int_mean = ', k0int_mean[i])
    print(unq_sen[i] + ' kext_mean = ', k0ext_mean[i])

SCS002 k0int_mean =  -0.3690553739850381
SCS002 kext_mean =  -1.3585406349969207
SCS003 k0int_tank_only =  -0.379604
SCS003 kext_tank_only =  -1.404149


In [10]:
figs = {} # container for figures
for p in range(0,len(unq_sen)):
    fig = make_subplots(specs=[[{"secondary_y": True}]])
    fig.add_trace(go.Scatter(x=dfk0.loc[[unq_sen[p]]].caldate, y=dfk0.loc[[unq_sen[p]]].k0int_tank,mode='markers',name='k0int_tank',\
        marker_symbol='star',marker_size=15,marker_color='green'),secondary_y=False)
    fig.add_trace(go.Scatter(x=dfk0.loc[[unq_sen[p]]].caldate, y=dfk0.loc[[unq_sen[p]]].k0ext_tank,mode='markers',name='k0ext_tank',\
        marker_symbol='square',marker_size=10,marker_color='black'),secondary_y=True)
    fig.add_trace(go.Scatter(x=dfk0.loc[[unq_sen[p]]].caldate, y=dfk0.loc[[unq_sen[p]]].k0int_tris,mode='markers',name='k0int_tris',\
        marker_symbol='star',marker_size=15,marker_color='blue'),secondary_y=False)
    fig.add_trace(go.Scatter(x=dfk0.loc[[unq_sen[p]]].caldate, y=dfk0.loc[[unq_sen[p]]].k0ext_tris,mode='markers',name='k0ext_tris',\
        marker_symbol='square',marker_size=10,marker_color='red'),secondary_y=True)
    fig.add_hline(y=float(k0int_mean[p]),secondary_y=False,name='k0int_mean',line_color='blue',annotation_text="k0 means",annotation_position="top left") # mean k0int for all calibrations
    fig.add_hline(y=float(k0ext_mean[p]),secondary_y=True,name='k0int_mean',line_color='red') # mean k0ext for all calibrations
    fig.update_layout(height=500, width=950,title_text=unq_sen[p] + ' Calibration coefficients at 0C')
    fig['layout']['yaxis']['title']='k0_int'
    fig['layout']['yaxis2']['title']='k0_ext'
    figs['fig'+str(p)] = fig

for p in range(0,len(unq_sen)):     
    figs[list(figs)[p]].show()

In [11]:
# calcuate calibrated pHint and pHext
df['datetime'] = list(df['date'] + ' ' + df['time']) # combine date and time to use for plots
pHint_cal = []
pHext_cal = []

for p in enumerate(unq_sen):
    pHint_SBE = pHint_from_Vint_k0int.pHint_from_Vint_k0int(k0int_mean[p[0]], df.loc[df.sensor_name==p[1]].vint, df.loc[df.sensor_name==p[1]].SBEtemp)
    #pHdf[p[1]+'_pHint'] = pHint_SBE
    pHint_cal = pHint_cal + (list(pHint_SBE))
    pHext_SBE = pHext_from_Vext_k0ext.pHext_from_Vext_k0ext(k0ext_mean[p[0]], df.loc[df.sensor_name==p[1]].vext, df.loc[df.sensor_name==p[1]].SBEtemp, df.loc[df.sensor_name==p[1]].SBEsal)
    #pHdf[p[1]+'_pHext'] = pHext_SBE
    pHext_cal = pHext_cal + (list(pHext_SBE))

df['pHint_cal'] = pHint_cal
df['pHext_cal'] = pHext_cal


In [87]:
plot_all.plot_all(unq_sen,df,df.datetime,df.pHint_cal,df.pHext_cal,df.O2_corr,df.O2sat,df.SBEtemp,df.O2temp,df.pHtemp,df.SBEsal,df.press)