In [2]:
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import re
import xarray as xr

In [3]:
%run create_JASADCP_metadata_df.ipynb

**All names to become 'VM-150'**:\
'VM-150', 'RD-VM150', 'VM150', 'RD-VM0150'

**All names to become 'VM-300'**:\
'VM-300', 'RD-VM300'

**All names to become 'UNCLEAR'**:\
'150' (1990-1998; assume RDI + VM + NB or BB),
'RDI' (1985-1993; some give you transmit frequency, w/ those just assume VM + either NB or BB),
'150 kHz hull mounted ADCP' (1991; assume RDI + VM + NB or BB),
'Narrowband' (1994-1996; all 153.6kHz transmit frequency; assume RDI + VM + NB or BB),
'150 kHz' (1994; assume RDI + VM + NB or BB),
'153.6 kHz hull mounted ADCP' (2002; assume RDI + maybe OS?)

In [71]:
pd.set_option('max_rows', 1000)
pd.set_option('display.max_colwidth', -1)
#df[(df['instrument_name'].str.contains('VM-')) & (df['bandwidth']=='broadband')]['bandwidth'] # --> none
#df[(df['instrument_name'].str.contains('VM-'))]
#df[(df['instrument_name'].str.contains('Workhorse')) & (df['bandwidth']=='narrowband')] # --> 2
#df[(df['hardware_model']=='150') & (df['bandwidth']=='broadband')]
#df[(df['instrument_name'].str.contains('OS-'))]['cruise_beg_date'].min() # --> '2000-08-31 00:01:35'
#df[(df['instrument_name'].str.contains('OS-'))]['cruise_beg_date'].max() # --> '2018-11-15 17:22:09'
#df[(df['instrument_name'].str.contains('VM-'))]['cruise_beg_date'].min() # --> '1986-11-17 21:07:04'
#df[(df['instrument_name'].str.contains('VM-'))]['cruise_beg_date'].max() # --> '2016-05-22 02:58:13'
df[(df['hardware_model']=='153.6 kHz hull mounted ADCP')]
# next look at the names vm-150 and vm-300; see if you can figure out diff btwn bb and nb

Timestamp('2016-05-22 02:58:13')

# Calculate Sv for each ncfile

#### Mean volume backscattering strength eqn (Mullison 2017):  
$S_v = C + 10log((T_x + 273.16)*R^2) - L_{DBM} - P_{DBW} + 2 \alpha R + 10log(10^{k_c(E-E_r)/10} - 1)$, where:  
- C = constant combining several params specific to each instrument
- $T_x$ = temperature measured at the transducer ($^{\circ}$C)
- R = along-beam range to the measurement, taken in the last quarter of the bin for Workhorse, Long Ranger, and Quartermaster, and at midpt of the bin for other instruments
- $L_{DBM}$ = 10log(transmit pulse length, meters)
- $P_{DBW}$ = 10log(transmit power, Watts)
- $\alpha$ = absorption coefficient of the water
- $k_c$ = RSSI slope (dB/count)
- $E_r$ = noise floor (counts)

#### Mean volume backscattering strength eqn (fst-003):  
$S_v = 10log(\frac{4.47 \times 10^{-20} K_2 K_S}{c}) + 10log((T_x + 273.16)*R^2) - 10log(P) - 10log(K_1) + 2 \alpha R + 10log(10^{k_c(E-E_r)/10} - 1)$
- $K_2$ = system noise factor (dimensionless)
- $K_S$ = system constant, depends on NBADCP frequency
- c = speed of sound at the scattering layer being measured
- P = transmit pulse length
- $K_1$ = real-time power into the water (Watts) 
- All others the same as Mullison 2017 eqn

#### NBADCP-specific values and calculations (fst-003):

$C$:  
$C = 10log[\frac{4.47 x 10^{-20} K_2 K_s}{c}]$

$K_2$:

In [18]:
# From fst-003, pg 6:
arrays = [['VM', 'VM', 'VM', 'VM', 'VM', 'DR', 'DR', 'DR', 'DR', 'DR'],
          [75, 150, 300, 600, 1200, 75, 150, 300, 600, 1200]]
tuples = list(zip(*arrays))
index = pd.MultiIndex.from_tuples(tuples, names = ['NBADCP Model', 'Frequency (kHz)'])
K2_values = pd.DataFrame([2.5, 4.3, 4.5, 9.1, 10.5, 2.2, 3.6, 4.2, 7.1, 8.1],index=index,columns=['K2'])
df_K2_values = pd.DataFrame(K2_values)
df_K2_values

Unnamed: 0_level_0,Unnamed: 1_level_0,K2
NBADCP Model,Frequency (kHz),Unnamed: 2_level_1
VM,75,2.5
VM,150,4.3
VM,300,4.5
VM,600,9.1
VM,1200,10.5
DR,75,2.2
DR,150,3.6
DR,300,4.2
DR,600,7.1
DR,1200,8.1


$K_S$:

In [11]:
# From fst-003, pg 10:
Ks_values = {
    'NBADCP frequency (kHz)' : [75, 150, 300, 600, 1200],
    'Ks' : [1.09E5, 4.17E5, 7.69E5, 1.56E6, 5.65E6]
}
df_Ks_values = pd.DataFrame(Ks_values)
df_Ks_values

Unnamed: 0,NBADCP frequency (kHz),Ks
0,75,109000.0
1,150,417000.0
2,300,769000.0
3,600,1560000.0
4,1200,5650000.0


$K_1$:

$K_1 = [\frac{(V_s \times a)-b}{c}]^2 \times K_{1c}$

$K_{1c}$:

In [11]:
# From fst-003, pg 10:
Ks_values = {
    'NBADCP frequency (kHz)' : [75, 150, 300, 600, 1200],
    'Ks' : [1.09E5, 4.17E5, 7.69E5, 1.56E6, 5.65E6]
}
df_Ks_values = pd.DataFrame(Ks_values)
df_Ks_values

Unnamed: 0,NBADCP frequency (kHz),Ks
0,75,109000.0
1,150,417000.0
2,300,769000.0
3,600,1560000.0
4,1200,5650000.0


$R$:

$R = \frac{B + |(P-D)/2| + (N \times D) + (D/4)}{cos(\theta)} \times \frac{c'}{1475.1}$

$K_c$:

For E < 200 counts, $K_c = \frac{127.3}{T_e+271}$  
If E is 200-230 counts, calibration of $K_c$ must be done at RDI. If E > 230 counts, $K_c$ can't be calibrated.  
$T_e$ = "temperature of system electronics - $T_e$ is used to calculate $K_c$ and $E_r$. $E_r$...is particularly sensitive to changes in $T_e$, so it is crucial to obtain an accurate record of $T_e$."  
"For DR-NBADCPs in which both system electronics and transducer assembly are immersed, $T_x$ can be substituted for $T_e$."  
"The ambient temp for the sm electronics will generally differ from the temp recorded at the transducer. Therefore, you must independently measure and record the temperature of the system electronics."

$E_r$:

#### For each ncfile, save the following variables along with Sv:
- Tx (temp at transducer, dims = time, units = degC)
- R (range, NBADCP samples in the last quarter of each depth cell/OS at midpt, dims = depth, units = m)
- Kc (RSSI slope, constant, units = dB/count)
- E (RSSI, dims = depth x time, units = counts)
- Er (noise floor, constant, units = counts) 
- P (transmit pulse length, 10log(P) = LDBM, constant, units = m)
- K1 (assumed or measured transmit power, 10log(K1) = PDBW, dims = time, units = Watts)
- alpha (attenuation coefficient)
- c (speed of sound, dims = depth x time, units = m/s)
- theta (angle of the transducer beams to vertical, constant, units = degrees)
- D (depth cell length, constant, units = m)
- Ks (if applicable - only for NBADCP?)
- K2 (if applicable - only for NBADCP?)
- K1c (if applicable - only for NBADCP?)
- B (blank beyond transmit, if applicable - only for NBADCP?, constant, units = m)

Metadata should include:
- Instrument type
- Instrument SN
- Transmit frequency

### Narrow df down to Pacific region, OS instruments for now

In [5]:
print(df['geo_region'].isnull().sum())
print(len(df))
dfnow = df.dropna(subset=['geo_region'])
print(len(dfnow))
dfnow = dfnow[dfnow['geo_region'].str.contains('acific')]
print(len(dfnow))
dfnow = dfnow[dfnow['instrument_name'].str.contains('OS')]
print(len(dfnow))
dfnow = dfnow[(dfnow['instrument_name']!='OS-II-38')&(dfnow['instrument_name']!='OS-II-75')] 

33
2254
2221
1274
611


## Load table of C, PDBW values

In [None]:
cpdbw = pd.read_csv(dpath + 'typical_system_characteristics_table2_mullison2017.csv')

In [9]:
cpdbw.head()

Unnamed: 0,Instrument,C (25%) (dB),C (6%) (dB),P_DBW Battery (dB),P_DBW Power Supply (dB),Rayleigh Distance (m)
0,ChannelMaster 300,-143.44,-152.26,,15.1,2.69
1,ChannelMaster 600,-139.08,-147.28,,12.0,2.96
2,ChannelMaster 1200,-127.13,-137.17,,9.0,1.71
3,Explorer Phased Array,,-139.14,,9.0,1.67
4,Explorer Piston,-132.73,-140.95,,3.0,1.35


## Load table of kc, Er values

#### Nominal Kc values from TRDI field service email:
BBADCP: 0.45 dB/count  
WHADCP: 0.42 dB/count  
OS-II-75, SN1508: 0.373 (beam 1), 0.386 (beam 2), 0.388 (beam 3), 0.384 (beam 4) = 0.38275 (avg)  
OS-II-75, SN10656: 0.398 (beam 1), 0.389 (beam 2), 0.3988 (beam 3), 0.395 (beam 4) = 0.395 (avg)  
#### Nominal Kc values from Jerry Mullison email:
Format: min-max, avg, stdev  
OS150: 0.41-0.44, 0.42, 0.012   
OS75: 0.36-0.42, 0.39, 0.019  
OS38: 0.36-0.37, 0.37, 0.004  

#### Nominal Er values from Jerry Mullison email:
Format: min-max, avg, stdev  
OS150: 18-28, 22, 2.08  
OS75: 11-25, 19, 2.93  
OS38: 5-33, 14, 4.83  

#### Further questions:
- Are the Kc and Er values in Jerry Mullison's email from phase II or phase I OS?
- Where does the speed of sound c in the denominator go comparing 1998 TRDI field service note vs. Mullison 2017? What c should I use for the denominator, 1478.1 m/s like for calculating the depth cells in 1998 TRDI field service note??
- So I can just use the lowest measured E per cruise to define Er? Some papers do this, cite them. Or if I know that all cruises used the same ADCP, I could find the lowest measured E out of all of those cruises. (Er should be constant for a given ADCP according to Mullison 2017.)
- What year did TRDI switch from OS to OS-II?
- What years were all these instruments available?

In [10]:
# - Calc avg Kc for OS-II w/ SNs
print(np.mean([0.373, 0.386, 0.388, 0.384]))
print(np.mean([0.398, 0.389, 0.398, 0.395]))

0.38275000000000003
0.395


## Load WOD data (T,S)

## Compute Sv

In [20]:
dfnow.iloc[0]

fname                                                       00804_short.nc
hardware_model                                           Ocean Surveyer 75
instrument_name                                                      OS-75
serial_numbers                                                        None
transmit_frequency                                                    None
phased_array                                                          None
cruise_beg_date                                        2004-06-15 06:33:10
blanking_interval                                                       8m
bin_length                                                              8m
transducer_beam_angle                                                 None
transmit_pulse_length                                                   8m
comments                 narrowband mode///mounted with the ADCP pointi...
biomass_dtmn                                                            no
geo_region               

Mean volume backscattering strength:  
$S_v = C + 10log((T_x + 273.16)*R^2) - L_{DBM} - P_{DBW} + 2 \alpha R + 10log(10^{k_c(E-E_r)/10} - 1)$

In [None]:
ncfile = dpath + 'JASADCP/ncfiles/00001_short.nc'
nc = xr.open_dataset(ncfile)

In [None]:
cols = df.columns.values

In [None]:
cols[0:-1]

In [None]:
nc

In [None]:
nc['depth']

In [None]:
nc['amp']

In [None]:
nc['amp'].plot()

In [None]:
nc['depth'].values

In [None]:
#nct['ADCP_CONFIG'].attrs['model_name']
nct['ADCP_CONFIG'].attrs

# TESTING/OLD

In [None]:
fnames = sorted(os.listdir(dpath + 'JASADCP/ncfiles'))

In [None]:
nc_counter = len(fnames)
geo_region = [None]*nc_counter

In [None]:
ifile = 0
for fname in fnames:
    ncfile = dpath + 'JASADCP/ncfiles/' + fname
    ncnow = xr.open_dataset(ncfile)
    strnow = ncnow.attrs['cruise_sonar_summary']
    geo_regionnow = re.findall("GEOGRAPHIC_REGION *: *((?:\S+ )*\S+)", strnow)
    if geo_regionnow:
        geo_region[ifile] = geo_regionnow[0]
    ifile = ifile+1

In [None]:
df = pd.concat(
    [pd.Series(fnames,name='fname'), pd.Series(geo_region,name='geo_region')],
    axis=1)