In [49]:
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import re
import xarray as xr

In [50]:
# - Set data path
dpath = '/opt/acoustic-variability/data/'

In [51]:
# - Set saved out figure path
figpath = '/opt/acoustic-variability/python/figures/'

# Compile JASADCP metadata

## Read JASADCP metadata into df

In [52]:
fnames = sorted(os.listdir(dpath + 'JASADCP/ncfiles'))

In [53]:
nc_counter = len(fnames)
hardware_model = [None]*nc_counter
serial_numbers = [None]*nc_counter
transmit_frequency = [None]*nc_counter
phased_array = [None]*nc_counter
cruise_beg_date = [None]*nc_counter
blanking_interval = [None]*nc_counter
bin_length = [None]*nc_counter
transducer_beam_angle = [None]*nc_counter
transmit_pulse_length = [None]*nc_counter
comments = [None]*nc_counter
biomass_dtmn = [None]*nc_counter

In [54]:
ifile = 0
for fname in fnames:
    ncfile = dpath + 'JASADCP/ncfiles/' + fname
    ncnow = xr.open_dataset(ncfile)
    strnow = ncnow.attrs['cruise_sonar_summary']
    # 1.) hardware_model
    hardware_modelnow = re.findall("HARDWARE MODEL *: *((?:\S+ )*\S+)", strnow)
    if hardware_modelnow:
        hardware_model[ifile] = hardware_modelnow[0]
    elif not hardware_modelnow: # if hardware_modelnow is empty
        manufacturernow = re.findall("MANUFACTURER *: *((?:\S+ )*\S+)", strnow)
        if manufacturernow:
            hardware_model[ifile] = manufacturernow[0]
    # 2.) serial_numbers
    serial_numbersnow = re.findall("SERIAL NUMBERS *: *((?:\S+ )*\S+)", strnow)
    if serial_numbersnow:
        serial_numbers[ifile] = serial_numbersnow[0]
    # 3.) transmit_frequency
    transmit_frequencynow = re.findall("TRANSMIT FREQUENCY *: *((?:\S+ )*\S+)", strnow)
    if transmit_frequencynow:
        transmit_frequency[ifile] = transmit_frequencynow[0]
    # 4.) phased_array 
    phased_arraynow = re.findall("phased.array", strnow, re.IGNORECASE)
    if phased_arraynow:
        sep = '///'; phased_arraynow = sep.join(phased_arraynow)
        phased_array[ifile] = phased_arraynow
    # 5.) cruise_beg_date
    cruise_beg_date[ifile] = ncnow['time'][0].values
    # 6.) blanking_interval
    blanking_intervalnow = re.findall("BLANKING INTERVAL *: *((?:\S+ )*\S+)", strnow)
    if blanking_intervalnow:
        blanking_interval[ifile] = blanking_intervalnow[0]
    # 7.) bin_length
    bin_lengthnow = re.findall("BIN LENGTH *: *((?:\S+ )*\S+)", strnow)
    if bin_lengthnow:
        bin_length[ifile] = bin_lengthnow[0]
    # 8.) transducer_beam_angle
    transducer_beam_anglenow = re.findall("TRANSDUCER BEAM ANGLE *: *((?:\S+ )*\S+)", strnow)
    if transducer_beam_anglenow:
        transducer_beam_angle[ifile] = transducer_beam_anglenow[0]
    # 9.) transmit_pulse_length
    transmit_pulse_lengthnow = re.findall("TRANSMIT PULSE LENGTH *: *((?:\S+ )*\S+)", strnow)
    if transmit_pulse_lengthnow:
        transmit_pulse_length[ifile] = transmit_pulse_lengthnow[0]
    # 10.) comments
    commentsnow = re.findall("COMMENTS *: *((?:\S+ )*\S+)", strnow)
    if commentsnow:
        sep = '///'; commentsnow = sep.join(commentsnow)
        comments[ifile] = commentsnow
    # 11.) biomass_dtmn
    biomass_dtmnnow = re.findall("BIOMASS DETERMINATION *: *((?:\S+ )*\S+)", strnow)
    if biomass_dtmnnow:
        biomass_dtmn[ifile] = biomass_dtmnnow[0]
    ifile = ifile+1

In [55]:
df = pd.concat(
    [pd.Series(hardware_model,name='hardware_model'), pd.Series(serial_numbers,name='serial_numbers'),
     pd.Series(transmit_frequency,name='transmit_frequency'),pd.Series(phased_array,name='phased_array'),
     pd.Series(cruise_beg_date,name='cruise_beg_date'),pd.Series(blanking_interval,name='blanking_interval'),
     pd.Series(bin_length,name='bin_length'),pd.Series(transducer_beam_angle,name='transducer_beam_angle'),
     pd.Series(transmit_pulse_length,name='transmit_pulse_length'),pd.Series(comments,name='comments')],
     axis=1)

In [56]:
# - Define fxns to describe bandwidth from comments and hardware_model

# --> Checks for hardware_model names containing NB = narrowband
#dfnow = df.dropna(subset=['hardware_model'])
#dfnow[dfnow['hardware_model'].str.contains('NB')]['hardware_model'].unique()
# --> Results are:
# array(['NB 150 (VM-150-18HP)', 'NB 150', 'VM-150 (NB)'], dtype=object)

def set_bandwidth_from_comments(row):
# I use "broadband" instead of "broad" here b/c we have some cruise comments that say "broad-scale"
    if row['comments']:
        if (re.search('broadband', row['comments'], re.IGNORECASE) and
                re.search('narro', row['comments'], re.IGNORECASE)):
            return 'both broad and narrowband?' 
        elif (re.search('broadband', row['comments'], re.IGNORECASE) and
                re.search('phased.array', row['comments'], re.IGNORECASE)):
            # this will also capture broadband+narro+phased.array
            return 'both broadband and phased array?' 
        elif re.search('broadband', row['comments'], re.IGNORECASE):
            return 'broadband'
        elif (re.search('narro', row['comments'], re.IGNORECASE) and
                re.search('phased.array', row['comments'], re.IGNORECASE)): 
            return 'both narrowband and phased array'
        elif re.search('narro', row['comments'], re.IGNORECASE):
            return 'narrowband'
        else:
            return 'unknown'
    else:
        return 'unknown'

def set_bandwidth_from_hardware_model(row):
    if row['hardware_model']:
        if re.search('broad', row['hardware_model'], re.IGNORECASE):
            return 'broadband'
        elif (re.search('narro', row['hardware_model'], re.IGNORECASE) or
                re.search('nb', row['hardware_model'], re.IGNORECASE)):
            return 'narrowband'
        elif re.search('phased.array', row['hardware_model'], re.IGNORECASE):
            return 'phased array'
        else:
            return 'unknown'
    else:
        return 'unknown'

def set_final_bandwidth(row):
    if row['bw_from_comments']==row['bw_from_hardware_model']:
        return row['bw_from_comments']
    elif row['bw_from_comments']=='unknown':
        return row['bw_from_hardware_model'] 
    elif row['bw_from_hardware_model']=='unknown':
        return row['bw_from_comments'] 
    else:
        return 'CHECK NC FILE' 

In [57]:
df['bw_from_comments']=df.apply(set_bandwidth_from_comments, axis=1)
df['bw_from_hardware_model']=df.apply(set_bandwidth_from_hardware_model, axis=1)
df['bandwidth']=df.apply(set_final_bandwidth, axis=1)

In [58]:
#pd.set_option('max_rows', 100)
#pd.set_option('display.max_colwidth', -1)
#df.head()

## Uniformly rename hardware_model names

In [59]:
# - Save out all unique hardware_model names + # of occurrences to look at in separate window
# --> use this output to create a legend of how to translate different instrument names to a uniform list of names
#df['hardware_model'].value_counts().to_csv('jasadcp_unique_instruments.csv', header=['hardware_model count'])

#### **List of uniform instrument names:**

**All names to become 'OS-38'**:  
'Ocean Surveyer 38', 'Ocean Surveyor 38'

**All names to become 'OS-75'**:  
'Ocean Surveyor 75', 'OS75 narrowband', 'Ocean Surveyer 75', 'Ocean Surveyor 75 narrowband', 'OS75 (Ocean Surveyor)', 'Ocean Surveyor 75 broadband', 'Ocean Surveyor OS75', 'OS75', 'Ocean Surveryor 75', 'Ocean Surveyor 75 narroband', 'Ocean Surveyor 75 Broadband', 'Ocean Surveyor 75 Narrowband', '75KHz Ocean Surveyor narrowband', 'Ocean Surveyor 75 kHz', 'RDI 75KHz Ocean Surveyor', 'Ocean Surveyor 75 kHz Phased Array', '75KHz Ocean Surveyor'

**All names to become 'OS-150'**:  
'Ocean Surveyer 150', 'Ocean Surveryor 150', 'Ocean Surveyor 150 narroband', 'Ocean Surveyor 150 narrowband', 'Ocean Surveyor 150 broadband'

**All names to become 'OS-II-38'**:  
'Ocean Surveyor II (OS-II 38)'

**All names to become 'OS-II-75'**:  
'OSII75S phased-array'

**All names to become 'VM-75'**:
'VM75 narrowband'

**All names to become 'VM-150'**:  
'VM-150', 'RD-VM150', 'VM-150 Narrowband', 'RD-VM150 Narrow band', 'VM-150 (NB)', 'VM-150 narrowband', 'RDI VM150 narrowband', 'VM150', 'RD-VM150 narrowband', 'RD-VM0150'

**All names to become 'VM-300'**:  
'VM-300', 'RD-VM300'

**All names to become 'VM-150-18HP'**:  
'NB 150 (VM-150-18HP)'

**All names to become 'Workhorse-300'**:  
'WorkHorse 300', 'Workhorse 300', 'Workhorse 300; 300 kHz' 

**All names to become 'Workhorse-1200'**:  
'Workhorse 1200'

**All names to become 'Workhorse-Mariner-300'**:  
'Workhorse Mariner (300 kHz)'

**All names to become 'Workhorse-Mariner-600'**:  
'WorkHorse Mariner 600'

**All names to become 'DCP4400A'**:  
'DCP4400A'

**All names to become (UNCLEAR)**:  
'Narrowband 150', 'NB 150', 'VM-150 and VM-300' (CHECKED NC FILES - CAN'T DTMN IF IT'S 150 OR 300 --> DISCARD), '150', 'narrowband 75 kHz', 'RDI', '150 narrowband', '150 kHz Narrowband', 'Broadband 150', 'Broad Band 150', 'RD-VM' (HAS ALL SERIAL NUMBERS, READ TRANSMIT FREQUENCY, TOO - IF 150 THEN VM-150, ETC.), '150 kHz hull mounted ADCP', 'Narrowband', 'Narrowband 300', '150 kHz', 'Direct-Read 150 kHz Narrowband' (HAS ALL SERIAL NUMBERS), 'Vessel-mounted 150 kHz Narrowband' (HAS ALL SERIAL NUMBERS), 'Narrow Band 150Khz', 'Vessel-mount 150 kHz Narrowband', '1) Narrow Band 150 kHz', '153.6 kHz hull mounted ADCP', '300 narrow band', '150 kHz broadband', '150 kHz narrow band', '"150 broad band, concave"', 'Vessel-Mount 150 kHz Narrowband' (HAS ALL SERIAL NUMBERS)

In [60]:
# - Define fxn to uniformly rename different original hardware_model names
def set_uniform_name_orig_hardware_model(row):
    if row['hardware_model']:
        if row['hardware_model'] in ['Ocean Surveyer 38', 'Ocean Surveyor 38']:
            return 'OS-38' 
        elif row['hardware_model'] in ['Ocean Surveyor 75', 'OS75 narrowband',
                                       'Ocean Surveyer 75', 'Ocean Surveyor 75 narrowband',
                                       'OS75 (Ocean Surveyor)', 'Ocean Surveyor 75 broadband',
                                       'Ocean Surveyor OS75', 'OS75', 'Ocean Surveryor 75',
                                       'Ocean Surveyor 75 narroband', 'Ocean Surveyor 75 Broadband',
                                       'Ocean Surveyor 75 Narrowband', '75KHz Ocean Surveyor narrowband',
                                       'Ocean Surveyor 75 kHz', 'RDI 75KHz Ocean Surveyor',
                                       'Ocean Surveyor 75 kHz Phased Array', '75KHz Ocean Surveyor']:
            return 'OS-75' 
        elif row['hardware_model'] in ['Ocean Surveyer 150', 'Ocean Surveryor 150',
                                       'Ocean Surveyor 150 narroband', 'Ocean Surveyor 150 narrowband',
                                       'Ocean Surveyor 150 broadband']:
            return 'OS-150'
        elif row['hardware_model'] in ['Ocean Surveyor II (OS-II 38)']:
            return 'OS-II-38'
        elif row['hardware_model'] in ['OSII75S phased-array']:
            return 'OS-II-75'
        elif row['hardware_model'] in ['VM75 narrowband']:
            return 'VM-75'
        elif row['hardware_model'] in ['VM-150', 'RD-VM150', 'VM-150 Narrowband', 'RD-VM150 Narrow band',
                                       'VM-150 (NB)', 'VM-150 narrowband', 'RDI VM150 narrowband',
                                       'VM150', 'RD-VM150 narrowband', 'RD-VM0150']:
            return 'VM-150'
        elif row['hardware_model'] in ['VM-300', 'RD-VM300']:
            return 'VM-300'
        elif row['hardware_model'] in ['RD-VM']:
            if re.search('150', row['transmit_frequency']):
                return 'VM-150'
        elif row['hardware_model'] in ['NB 150 (VM-150-18HP)']:
            return 'VM-150-18HP'
        elif row['hardware_model'] in ['WorkHorse 300', 'Workhorse 300', 'Workhorse 300; 300 kHz']:
            return 'Workhorse-300'
        elif row['hardware_model'] in ['Workhorse 1200']:
            return 'Workhorse-1200'
        elif row['hardware_model'] in ['Workhorse Mariner (300 kHz)']:
            return 'Workhorse-Mariner-300'
        elif row['hardware_model'] in ['WorkHorse Mariner 600']:
            return 'Workhorse-Mariner-600'
        else:
            return row['hardware_model'] + ' (UNCLEAR)' 
    else:
        return None

In [61]:
# - Create df w/ uniform instrument names
df['instrument_name']=df.apply(set_uniform_name_orig_hardware_model, axis=1)
cols = list(df.columns.values)
cols = [cols[0]]+['instrument_name']+cols[1:-1]
df = df[cols]

In [None]:
df_sn['instrument_name'].value_counts()

In [None]:
dfnow

In [69]:
dfnow = df.dropna(subset=['phased_array'])
dfnow[dfnow['bandwidth']=='CHECK NC FILE']

Unnamed: 0,hardware_model,instrument_name,serial_numbers,transmit_frequency,phased_array,cruise_beg_date,blanking_interval,bin_length,transducer_beam_angle,transmit_pulse_length,comments,bw_from_comments,bw_from_hardware_model,bandwidth
569,Ocean Surveyor 75 kHz Phased Array,OS-75,,75,Phased Array///Phased Array,2002-06-08 21:03:36,8 m,4 m,30.0,4 m,Configured for Broadband Mode,broadband,narrowband,CHECK NC FILE
570,Ocean Surveyor 75 kHz Phased Array,OS-75,,75,Phased Array///Phased Array,2002-08-07 16:39:30,8 m,4 m,30.0,4 m,Configured for Broadband Mode,broadband,narrowband,CHECK NC FILE
1310,OSII75S phased-array,OS-II-75,,75 kHz,phased-array,2008-09-10 16:11:32,8 m,16 m,,16 m,broadband mode,broadband,narrowband,CHECK NC FILE


In [63]:
# The following suggests that a phased array set up can be broadband
# --> Need to ask if that's true b/c Mullison (2017) implies that it's not:
# "Systems w/ piston transducers are capable of bandwidths of 25% and 6%, while phased
# array systems do not allow 25% bandwidth." Jerry also said that all OS
# instruments are narrowband...is that right?
df[df['bandwidth']=='CHECK NC FILE']

# The following also definitely confirms that for OS, there are lots of
# broadband+phased array AND narrowband+phased array
#dfnow = df.dropna(subset=['instrument_name'])
#dfnow[dfnow['instrument_name'].str.contains('OS')][['hardware_model',
#                                                   'instrument_name','comments','bw_from_comments',
#                                                  'bw_from_hardware_model','bandwidth']]

Unnamed: 0,hardware_model,instrument_name,serial_numbers,transmit_frequency,phased_array,cruise_beg_date,blanking_interval,bin_length,transducer_beam_angle,transmit_pulse_length,comments,bw_from_comments,bw_from_hardware_model,bandwidth
569,Ocean Surveyor 75 kHz Phased Array,OS-75,,75,Phased Array///Phased Array,2002-06-08 21:03:36.000000000,8 m,4 m,30.0,4 m,Configured for Broadband Mode,broadband,narrowband,CHECK NC FILE
570,Ocean Surveyor 75 kHz Phased Array,OS-75,,75,Phased Array///Phased Array,2002-08-07 16:39:30.000000000,8 m,4 m,30.0,4 m,Configured for Broadband Mode,broadband,narrowband,CHECK NC FILE
1007,Ocean Surveyor 75 Broadband,OS-75,,75 kHz,,2006-02-12 22:58:29.000000000,8 or 16 m,8 m,,8.33 m,"OS75 Narrowband in separate set, SAC 01064",narrowband,broadband,CHECK NC FILE
1008,Ocean Surveyor 75 Narrowband,OS-75,,75 kHz,,2006-02-12 22:58:29.000000000,16 m,16 m,,16 m,"OS75 Broadband in separate set, SAC 01063",broadband,narrowband,CHECK NC FILE
1009,Ocean Surveyor 75 Broadband,OS-75,,75 kHz,,2006-03-09 01:20:06.000000001,8 or 16 m,8 m,,8.33 m,"OS75 Narrowband in separate set, SAC 01066",narrowband,broadband,CHECK NC FILE
1010,Ocean Surveyor 75 Narrowband,OS-75,,75 kHz,,2006-03-09 01:20:06.000000001,16 m,16 m,,16 m,"OS75 broadband in separate set, SAC 01065",broadband,narrowband,CHECK NC FILE
1243,Ocean Surveyor 75 Narrowband,OS-75,,75 kHz,,2008-11-02 18:18:26.000000000,8 m,16 m,,16 m,"OS75 broadband in separate set, SAC 01301",broadband,narrowband,CHECK NC FILE
1244,Ocean Surveyor 75 Broadband,OS-75,,75 kHz,,2008-11-02 18:18:26.000000000,8 or 16 m,8 m,,8.33 m,"OS75 Narrowband in separate set, SAC 01300",narrowband,broadband,CHECK NC FILE
1310,OSII75S phased-array,OS-II-75,,75 kHz,phased-array,2008-09-10 16:11:32.000000000,8 m,16 m,,16 m,broadband mode,broadband,narrowband,CHECK NC FILE
1422,Ocean Surveyor 75 broadband,OS-75,,75 kHz,,2010-11-20 17:56:13.000000004,16 m,8 m,,8 m,"OS75 narrowband in separate set, SAC 01496",narrowband,broadband,CHECK NC FILE


## Create df w/ serial numbers

In [31]:
#df_sn = df.dropna(subset = ['serial_numbers'])
df_sn = df.dropna(subset = ['serial_numbers'])[['instrument_name','serial_numbers',
                                                'transmit_frequency','bw_from_comments',
                                                'bw_from_hardware_model','cruise_beg_date','comments']]
df_sn = df_sn[df_sn['serial_numbers'] != 'unconfirmed']

In [32]:
#pd.set_option('max_rows', 1000)
#pd.set_option('display.max_colwidth', -1)
df_sn.head()

Unnamed: 0,instrument_name,serial_numbers,transmit_frequency,bw_from_comments,bw_from_hardware_model,cruise_beg_date,comments
77,VM-150,"System 15, Transducer 199",153 kHz,unknown,narrowband,1992-11-18 23:02:04,"Only approximate GPS positions due to///Beams aligned fore-aft, not 45 degree///In general avoided tight screening of GPS to"
78,VM-150,"System 15, Transducer 199",153 kHz,unknown,narrowband,1993-01-08 00:06:04,"Beams aligned fore-aft, not 45 degree///In general avoided tight screening of GPS to"
89,VM-150,ADCP = 536 / XDUC = 449,153 kHz,unknown,unknown,1992-10-30 22:16:41,The only significant deviations from///vessel length///A user exit program automatically resets///Prior to 93/4/15 the gyrocompass///Bottom tracking is done for about///Data quality is dependent upon load
90,VM-150,ADCP = 536 / XDUC = 449,153 kHz,unknown,unknown,1993-03-27 07:11:19,The only significant deviations from///vessel length///A user exit program automatically resets///Prior to 93/4/15 the gyrocompass///Bottom tracking is done for about///Data quality is dependent upon load
91,VM-150,ADCP = 536 / XDUC = 449,153 kHz,unknown,unknown,1993-06-18 21:08:57,The only significant deviations from///vessel length///A user exit program automatically resets///Prior to 93/4/15 the gyrocompass///Bottom tracking is done for about///Data quality is dependent upon load


In [34]:
df.groupby(['instrument_name','transmit_frequency',
               'bw_from_comments','bw_from_hardware_model']
             ).size().reset_index().rename(columns={0:'count'})

Unnamed: 0,instrument_name,transmit_frequency,bw_from_comments,bw_from_hardware_model,count
0,150 (UNCLEAR),153.6 kHz,unknown,unknown,17
1,"150 broad band, concave (UNCLEAR)",150 kHz,unknown,broadband,1
2,150 narrowband (UNCLEAR),150 kHz,unknown,narrowband,8
3,300 narrow band (UNCLEAR),300 kHz,unknown,narrowband,1
4,Broad Band 150 (UNCLEAR),150 KHz,unknown,broadband,5
5,Broadband 150 (UNCLEAR),150 kHz,unknown,broadband,6
6,DCP4400A (UNCLEAR),115 kHz along 3 beams,unknown,unknown,1
7,NB 150 (UNCLEAR),150 KHz,unknown,narrowband,29
8,Narrowband (UNCLEAR),153.6 kHz,unknown,narrowband,4
9,Narrowband 150 (UNCLEAR),150 kHz,unknown,narrowband,57


In [30]:
df_sn.groupby(['instrument_name','serial_numbers','transmit_frequency',
               'bw_from_comments','bw_from_hardware_model']
             ).size().reset_index().rename(columns={0:'count'})

Unnamed: 0,instrument_name,serial_numbers,transmit_frequency,bw_from_comments,bw_from_hardware_model,count
0,OS-75,"S/N 1508 (in-use unit), S/N 10656 (spare unit)",76.8KHz,broadband,unknown,4
1,OS-75,"S/N 1508 (in-use unit), S/N 10656 (spare unit)",76.8KHz,narrowband,unknown,4
2,OS-75,"S/N 1508 (in-use unit), S/N 10656 (spare unit)",76.8KHz,unknown,unknown,10
3,VM-150,127,153 kHz,unknown,unknown,13
4,VM-150,177,150 kHz,unknown,unknown,2
5,VM-150,177 VM ADCP electronics (serial number 607),150 kHz,unknown,unknown,2
6,VM-150,28 (Transducer),153 kHz,unknown,unknown,1
7,VM-150,500,150 kHz,unknown,narrowband,4
8,VM-150,ADCP = 536 / XDUC = 449,153 kHz,unknown,unknown,29
9,VM-150,Ducer = 275,150kHz,unknown,unknown,5


In [24]:
df_sn['instrument_name'].value_counts()

VM-150                                         340
OS-75                                          18 
Direct-Read 150 kHz Narrowband (UNCLEAR)       3  
Vessel-mounted 150 kHz Narrowband (UNCLEAR)    3  
VM-300                                         2  
Vessel-Mount 150 kHz Narrowband (UNCLEAR)      1  
Name: instrument_name, dtype: int64

### Dealing w/ the unknown hardware_model names

In [None]:
# Check a few of the nc files to see if there is more info on the instrument;
# after printing out fnamesnow, use ncdump -h on tern to look at metadata of those files
# --> doesn't seem to be more info gleaned :( when I looked at a bunch of files
idxsnow = df[df['hardware_model']=='VM-150 and VM-300'].index.tolist()
fnamesnow = [fnames[i] for i in idxsnow] 
fnamesnow

#### Let's just try to make another spreadsheet for RDI that has some extra info. Maybe they can figure out which instrument it is from this extra info.

**Info to add when the hardware model is unknown:**  
'CHIEF SCIENTIST ON SHIP', 'PERSONNEL IN CHARGE',
'MANUFACTURER', 'HARDWARE MODEL' (already in df), 'SERIAL NUMBERS' (already in df),  
'TRANSMIT FREQUENCY' (already in df), 'TRANSDUCER CONFIGURATION',  
'DEPTH RANGE', 'BIN LENGTH' (already in df), 'NUMBER OF BINS',  
'TRANSMIT PULSE LENGTH' (already in df), 'BLANKING INTERVAL' (already in df), 'ENSEMBLE AVERAGING INTERVAL'

In [None]:
df.to_csv('jasadcp_metadata_for_TRDI.csv')

In [None]:
df.dtypes # datetime64 doesn't go to csv right

## Important JASADCP notes

In [27]:
# --> The following suggests that all VM instruments are narrowband:
dfnow = df.dropna(subset=['instrument_name'])
dfnow = dfnow[dfnow['instrument_name'].str.contains('VM')]
dfnow.groupby(['hardware_model','instrument_name','transmit_frequency','bw_from_comments','bw_from_hardware_model']).size().reset_index().rename(columns={0:'count'})

Unnamed: 0,hardware_model,instrument_name,transmit_frequency,bw_from_comments,bw_from_hardware_model,count
0,NB 150 (VM-150-18HP),VM-150-18HP,153.6 KHz,unknown,narrowband,18
1,RD-VM,VM-150,150kHz,unknown,unknown,5
2,RD-VM0150,VM-150,153 kHz,unknown,unknown,1
3,RD-VM150,VM-150,150 KHz,unknown,unknown,16
4,RD-VM150,VM-150,150 kHz except,unknown,unknown,1
5,RD-VM150,VM-150,153,unknown,unknown,3
6,RD-VM150,VM-150,153 kHz,unknown,unknown,135
7,RD-VM150 Narrow band,VM-150,153.6 kHz,unknown,narrowband,24
8,RD-VM300,VM-300,306.8 kHz,unknown,unknown,6
9,RD-VM300,VM-300,307 kHz,unknown,unknown,2


# Compile TAO mooring metadata

In [None]:
os.listdir(dpath + 'TAO_NDBC/ncfiles')

In [None]:
ncfile = dpath + 'TAO_NDBC/ncfiles/TAO_T0N170W_KA019-20151201_D_ADCP.nc'
nct = xr.open_dataset(ncfile)

In [None]:
nct['INTENSITY']

In [None]:
#nct['ADCP_CONFIG'].attrs['model_name']
nct['ADCP_CONFIG'].attrs

In [None]:
nct['INTENSITY'].plot()

# TESTING/OLD

In [None]:
#ncfile = dpath + 'JASADCP/ncfiles/01305_short.nc'
#ncfile = dpath + 'JASADCP/ncfiles/00200_short.nc'
ncfile = dpath + 'JASADCP/ncfiles/02000_short.nc'
ncj = xr.open_dataset(ncfile)
ncj.attrs['cruise_sonar_summary'].split('\n')
string = ncj.attrs['cruise_sonar_summary']

# USE THIS ONE!!!
# hardware_model, serial_numbers, transmit_frequency, phased_array,
# cruise_beg_date, blanking_interval, bin_length, transducer_beam_angle,
# transmit_pulse_length, ...C (from Mullison 2017 Table 2),
# transmit_power (from Mullison 2017 Table 2)??
hardware_model = re.findall("HARDWARE MODEL *: *((?:\w+ )*\w+)", string)
blanking_interval = re.findall("BLANKING INTERVAL *: *((?:\w+ )*\w+)", string)
phased_array = re.findall("(phased array)", string, re.IGNORECASE)[0]
cruise_beg_date = ncj.time[0]
# \w = [A-Za-z0-9]

In [None]:
print(df[df['phased_array']=='phased array']['hardware_model'].to_string())

In [None]:
fnames = []
for fname in os.listdir(dpath + 'JASADCP/ncfiles/'):
    if fname.endswith('.nc'):
        fnames.append(fname)

In [None]:
fnames[2236]

In [None]:
[i for i,x in enumerate(hardware_model) if not x]

In [None]:
# hardware_model troubleshooting
# 5, 10, 11
# 00573_short.nc - NB 150 (VM-150-18HP)
# 00139_short.nc - VM-150 
# 01305_short.nc - RD-VM150 Narrow band 
# 43
# 00726_short.nc - blank with name under MANUFACTURER 

In [None]:
ncfile = dpath + 'JASADCP/ncfiles/01872_short.nc'
ncj = xr.open_dataset(ncfile)
ncj.attrs['cruise_sonar_summary'].split('\n')