# Scripps Automated Shore Station (SASS) Self-Calibrating SeapHOx (SCS) live data viewer
### This script scrubs .dat files from the SCCOOS dr: https://sccoos.org/dr/data/. The SCS measures pH, oxygen, salinity and temperature. The pH values plotted here are calibrated using an average of the coefficients established pre-deployment and from the  automated tris calibrations. Oxygen concentration has been corrected for salinity and pressure. Before using and sharing these values, please get permission from the author. A new tab delimited text file will be produced every time the script runs, with the current timestamp of the time the file was created. All time zones associated with this script are in UTC.
### Created by: Taylor Wirth twirth@ucsd.edu
### Last edit: 11 April 2022

In [1]:
# load the .py scripts
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import os
import glob
%load_ext autoreload
%autoreload 1
%aimport get_recent_dr
%aimport get_all_dr
%aimport correct_DO_with_sal
%aimport pHtris_from_T
%aimport k0int_from_Vint_pHcal
%aimport k0ext_from_Vext_pHcal
%aimport pHint_from_Vint_k0int
%aimport pHext_from_Vext_k0ext
%aimport plot_all

The last hour of data and plot of all the data will show below. Please wait!

In [2]:
# Get all the files for the Scripps Pier SCS. get_all_dr.py looks and grabs all data files for the Scripps Pier SCS, 
# then converts it into a pandas DataFrame. 
# Printed below is the last hour of data. This may take a few seconds depending on last data download!
path = 'https://sccoos.org/dr/data/scripps_pier_scs/'

# if no txt file of downloaded data, get all of the data:
txt = glob.glob('*.txt')
if len(txt) > 1:
    df, col_names = get_recent_dr.get_recent_dr(path)
    # delete current txt file
    os.remove(txt[1])
else:
    df, col_names = get_all_dr.get_all_dr(path)

# create new tab delimited file to download
from datetime import datetime
now = datetime.utcnow()
current_time = now.strftime('%Y%m%d%H%M%S')
df.to_csv('SIOpierSCS_download_'+current_time+'.txt', header=col_names, index=None, sep='\t')

df.tail(6)

Unnamed: 0,internet_datetime,IP,sensor_name,samp_type,samp_num,calib_num,calib_rep,date,time,vbatt,...,Bpot,Ramp,Raw_Temp,SBEtemp,SBEcond,SBEsal,SBEday,SBEmon,SBEyear,SBEtime
7615,2022-04-12T20:51:21Z,0.0.0.0,SCS003,0,569,0,0,2022/04/12,20:50:10,18.1,...,733.1,752.2,366.9,16.73,4.29432,33.5239,12,Apr,2022,20:50:03
7616,2022-04-12T21:01:21Z,0.0.0.0,SCS003,0,570,0,0,2022/04/12,21:00:10,18.1,...,718.6,752.6,352.5,17.3253,4.35524,33.5561,12,Apr,2022,21:00:03
7617,2022-04-12T21:11:21Z,0.0.0.0,SCS003,0,571,0,0,2022/04/12,21:10:10,18.1,...,715.7,752.0,353.7,17.2251,4.34376,33.5402,12,Apr,2022,21:10:03
7618,2022-04-12T21:21:21Z,0.0.0.0,SCS003,0,572,0,0,2022/04/12,21:20:10,18.1,...,711.8,751.4,349.0,17.4222,4.3643,33.5536,12,Apr,2022,21:20:03
7619,2022-04-12T21:31:21Z,0.0.0.0,SCS003,0,573,0,0,2022/04/12,21:30:10,18.1,...,713.1,751.0,345.3,17.538,4.37548,33.5538,12,Apr,2022,21:30:03
7620,2022-04-12T21:41:21Z,0.0.0.0,SCS003,0,574,0,0,2022/04/12,21:40:10,18.1,...,714.2,749.9,343.7,17.5086,4.37472,33.5716,12,Apr,2022,21:40:03


In [3]:
# Data QC
# drop data rows if SBE salinity is outside 10 standard deviations
df = df[np.abs(df['SBEsal']-df['SBEsal'].mean()) <= (10*df['SBEsal'].std())]
# remove pressure outliers outside 3 standard deviations
df['press']=df['press'][~(np.abs(df['press']-df['press'].mean()) > (3*df['press'].std()))]

In [4]:
# Oxygen correction for salinity and pressure
O2_corr = correct_DO_with_sal.correct_DO_with_sal(df.O2con, df.O2temp, df.press, df.SBEsal, sal_input=0)

In [5]:
# run this only once!!
caldf = df.loc[df['samp_type']==1] # calibration samples
caldf = caldf.rename(columns={"date": "caldate"})
df = df.drop(df[df.samp_type==1].index) # remove calibration samples from the data frame

In [132]:
pHtris = pHtris_from_T.pHtris_from_T(caldf.SBEtemp, S=35)
k0int_tris = k0int_from_Vint_pHcal.k0int_from_Vint_pHcal(caldf.vint, pHtris, caldf.SBEtemp)
k0ext_tris = k0ext_from_Vext_pHcal.k0ext_from_Vext_pHcal(caldf.vext, pHtris, caldf.SBEtemp, calsal=35)

tankdf = pd.read_csv(txt[0],sep='\t')
caldf.loc[:,'k0int_tris'] = k0int_tris
caldf.loc[:,'k0ext_tris'] = k0ext_tris
dfk0 = pd.concat([tankdf, caldf],ignore_index=True,sort=False)
dfk0 = dfk0.drop(columns=['internet_datetime','IP','samp_type','samp_num','calib_num','calib_rep','time',\
    'vbatt','vtherm','vint','vext','isobatt','contemp','pHtemp','press','pHint','pHext',\
    'O2_MN','O2_SN','O2con','O2sat','O2temp','Dphase','Bphase','Rphase','Bamp','Bpot','Ramp','Raw_Temp',\
    'SBEtemp','SBEcond','SBEsal','SBEday','SBEmon','SBEyear','SBEtime'])


In [133]:
# SeapHOx calibration from automated tris measurements
dfk0['caldate'] = pd.to_datetime(dfk0['caldate'])
dfk0 = dfk0.set_index(['sensor_name'])
dfk0 = dfk0.sort_values(by='caldate')
print(dfk0)

               caldate  k0int_tank  k0int_tris  k0ext_tank  k0ext_tris
sensor_name                                                           
SCS002      2022-01-18   -0.396167         NaN   -1.384365         NaN
SCS002      2022-02-18         NaN   -0.396167         NaN   -1.384365
SCS002      2022-02-18         NaN   -0.396967         NaN   -1.384783
SCS002      2022-03-04         NaN   -0.396616         NaN   -1.384933
SCS002      2022-03-04         NaN   -0.396444         NaN   -1.384783
SCS002      2022-03-18         NaN   -0.396388         NaN   -1.384749
SCS002      2022-03-18         NaN   -0.396321         NaN   -1.384647
SCS002      2022-04-01         NaN   -0.413701         NaN   -1.400778
SCS002      2022-04-01         NaN   -0.404483         NaN   -1.391627
SCS003      2022-04-04   -0.379604         NaN   -1.404149         NaN


In [142]:
# calculate calibration coefficients for each sensor

k0int_mean = []
k0ext_mean = []
unq_sen = dfk0.index.unique()

for i in range(0,len(unq_sen)):
    k0int = dfk0.loc[unq_sen[i]].k0int_tank.tolist()
    k0ext = dfk0.loc[unq_sen[i]].k0ext_tank.tolist()
    if isinstance(dfk0.loc[unq_sen[i]].k0int_tank.tolist(),float):
        k0int_mean.append(np.nanmean(k0int))
        k0ext_mean.append(np.nanmean(k0ext))
        print(unq_sen[i] + ' k0int_tank_only = ', k0int_mean[i])
        print(unq_sen[i] + ' kext_tank_only = ', k0ext_mean[i])
        continue

    k0int.extend(dfk0.loc[unq_sen[i]].k0int_tris.tolist())
    k0ext.extend(dfk0.loc[unq_sen[i]].k0ext_tris.tolist())
    k0int_mean.append(np.nanmean(k0int))
    k0ext_mean.append(np.nanmean(k0ext))
    print(unq_sen[i] + ' k0int_mean = ', k0int_mean[i])
    print(unq_sen[i] + ' kext_mean = ', k0ext_mean[i])

SCS002 k0int_mean =  -0.3992503781980395
SCS002 kext_mean =  -1.38722557960448
SCS003 k0int_tank_only =  -0.379604
SCS003 kext_tank_only =  -1.404149


In [144]:
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(go.Scatter(x=dfk0.caldate, y=dfk0.k0int_tris,mode='markers',name='k0int_tris',marker_symbol='star',marker_size=15,marker_color='blue'),secondary_y=False)
fig.add_trace(go.Scatter(x=dfk0.caldate, y=dfk0.k0ext_tris,mode='markers',name='k0ext_tris',marker_symbol='square',marker_size=10,marker_color='red'),secondary_y=True)
fig.add_hline(y=k0int_mean,secondary_y=False,name='k0int_mean',line_color='blue',annotation_text="k0 means",annotation_position="top left") # mean k0int for all calibrations
fig.add_hline(y=k0ext_mean,secondary_y=True,name='k0int_mean',line_color='red') # mean k0ext for all calibrations
fig.update_layout(height=500, width=950,title_text='Calibration coefficients at 0C')
fig['layout']['yaxis']['title']='k0_int'
fig['layout']['yaxis2']['title']='k0_ext'
fig.show()

ValueError: 
    Invalid value of type 'builtins.float' received for the 'y' property of scatter
        Received value: -0.379604

    The 'y' property is an array that may be specified as a tuple,
    list, numpy array, or pandas Series

In [10]:
datetime = list(df['date'] + ' ' + df['time']) # combine date and time to use for plots
Vint = df.vint.astype(float)
Vext = df.vext.astype(float)
T_C = df.SBEtemp.astype(float)
sal = df.SBEsal.astype(float)
pHint_SBE = pHint_from_Vint_k0int.pHint_from_Vint_k0int(k0int_mean, Vint, T_C)
pHint_tris = pHint_from_Vint_k0int.pHint_from_Vint_k0int(k0int_tris,Vint, T_C)
pHext_SBE = pHext_from_Vext_k0ext.pHext_from_Vext_k0ext(k0ext_mean, Vext, T_C, sal)
pHext_tris = pHext_from_Vext_k0ext.pHext_from_Vext_k0ext(k0ext_tris, Vext, T_C, sal)

In [11]:
plot_all.plot_all(datetime,pHint_SBE,pHext_SBE,O2_corr,df.O2sat,df.SBEtemp,df.O2temp,df.pHtemp,df.SBEsal,df.press)