# read_raw

This notebook reads the raw FlowMow2 Sentry data into Pandas dataframes and saves them to HDF5 files. This notebook does not do any processing on the data aside from assigning timestamps.

#### Setup

In [None]:
%matplotlib notebook
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import datetime as dt
import glob
import scipy.io as sio

#### Timestamp extractor function

In [None]:
def get_timestamp(line):
    timestamp = dt.datetime.strptime(' '.join(line.strip().split(' ')[1:3]), '%Y/%m/%d %H:%M:%S.%f')
    epoch = np.float64(timestamp.replace(tzinfo=dt.timezone.utc).timestamp()) # 'epoch' is unix time
    return timestamp, epoch

#### Import nav

In [None]:
%%time

# list of matlab rnv files
filenames = 'ashes_nav.list'

# initialize lists
timestamp = []
epoch = []
dive_number = []
lat = []
lon = []
depth = []
height = []
heading = []
pitch = []
roll = []

# loop through matlab files and extract nav data
with open(filenames, 'r') as f:
    for filename in f:
        nav_mat = sio.loadmat(filename.strip(), squeeze_me=True)
        nrows = len(nav_mat['rnv']['t'].take(0))
        for i in range(nrows):
            timestamp.append(dt.datetime.utcfromtimestamp(nav_mat['rnv']['t'].take(0)[i]))
        epoch.extend(nav_mat['rnv']['t'].take(0))        
        dive_number.extend(np.ones((nrows,), dtype=np.int64) * np.int64(filename.split('/')[7][-3:]))
        lat.extend(nav_mat['rnv']['lat'].take(0))
        lon.extend(nav_mat['rnv']['lon'].take(0))
        depth.extend(nav_mat['rnv']['pos'].take(0)[:,2])
        height.extend(nav_mat['rnv']['alt'].take(0))
        heading.extend(nav_mat['rnv']['pos'].take(0)[:,3])
        pitch.extend(nav_mat['rnv']['pos'].take(0)[:,4])
        roll.extend(nav_mat['rnv']['pos'].take(0)[:,5])

# convert to dataframe
nav = pd.DataFrame({'timestamp': timestamp, 'epoch': epoch, 'dive_number': dive_number,
                    'lat': lat, 'lon': lon, 'depth': depth, 'heading': heading,
                    'pitch': pitch, 'roll': roll, 'height': height})

# reorder columns
nav = nav[['timestamp', 'epoch', 'dive_number', 'lat', 'lon', 'depth', 'height', 'heading', 'pitch', 'roll']]

# save to hdf5
nav.to_hdf('nav.h5', 'table', append=False, data_columns=True)

#### Import Paros pressure sensor data

In [None]:
%%time

# list of paros DAT files
filenames = 'ashes_paros.list'

# loop through all files and append data to list
paros_list = []
i = 0
with open(filenames, 'r') as f:
    for filename in f:
        dive_number = np.int64(filename.split('/')[7][-3:])
        with open(filename.strip(), 'r') as g:
            for line in g:
                if 'RAW' in line.strip()[0:3]:
                    if 'P2=' in line.strip():
                        if len(line) == 58: # good lines from this instrument are length 58
                            timestamp, epoch = get_timestamp(line)
                            a = line.strip().split(' ')[3].split(',')[0].split('=')[1]
                            b = line.strip().split(' ')[3].split(',')[1]
                            paros_list.append([timestamp, epoch, dive_number, np.float64(a), np.float64(b)])
                            i = i+1

# convert to dataframe (tau and eta are the the pressure and temperature signal periods in microseconds)
paros = pd.DataFrame(paros_list, columns=['timestamp', 'epoch', 'dive_number', 'tau', 'eta'])

# save to hdf5
paros.to_hdf('paros.h5', 'table', append=False, data_columns=True)

#### Import stinger GX3-25 microstrain IMU data

In [None]:
%%time

# list of ustrain_adv DAT files
filenames = 'ashes_ustrain_adv.list'

# loop through all files and append data to list
ustrain_adv_list = []
with open(filenames, 'r') as f:
    for filename in f:
        dive_number = np.int64(filename.split('/')[7][-3:])
        with open(filename.strip(), 'r') as g:
            for line in g:
                if 'MSA3' in line.strip()[0:4]:
                    if len(line.split(' ')) == 33: # good lines from this instrument will have 33 fields
                        timestamp, epoch = get_timestamp(line)
                        ustrain_adv_list.append([timestamp, epoch, dive_number] +
                                                list(map(np.float64, line.strip().split(' ')[3:-1])))

# convert to dataframe
ustrain_adv = pd.DataFrame(ustrain_adv_list, columns=['timestamp','epoch','dive_number','a',
                                                      'b','c','d','e','f','g','h','i','j','k',
                                                      'l','m','n','o','p','q','r','s','t','u',
                                                      'v','w','x','y','z','aa','bb'])

# save to hdf5
ustrain_adv.to_hdf('ustrain_adv.h5', 'table', append=False, data_columns=True)

#### Import chassis GX3-25 microstrain IMU data

In [None]:
%%time

# list of ustrain_chassis DAT files
filenames = 'ashes_ustrain_chassis.list'

# loop through all files and append data to list
ustrain_chassis_list = []
with open(filenames, 'r') as f:
    for filename in f:
        dive_number = np.int64(filename.split('/')[7][-3:])
        with open(filename.strip(), 'r') as g:
            for line in g:
                if 'MSA3' in line.strip()[0:4]:
                    if len(line.split(' ')) == 33: # good lines from this instrument will have 33 fields
                        timestamp, epoch = get_timestamp(line)
                        ustrain_chassis_list.append([timestamp, epoch, dive_number] +
                                                    list(map(np.float64, line.strip().split(' ')[3:-1])))

# convert to dataframe
ustrain_chassis = pd.DataFrame(ustrain_chassis_list, columns=['timestamp','epoch','dive_number','a',
                                                              'b','c','d','e','f','g','h','i','j','k',
                                                              'l','m','n','o','p','q','r','s','t','u',
                                                              'v','w','x','y','z','aa','bb'])

# save to hdf5
ustrain_chassis.to_hdf('ustrain_chassis.h5', 'table', append=False, data_columns=True)

#### Import SBE3 data

In [None]:
%%time

# list of ustrain_chassis DAT files
filenames = 'ashes_sbe3.list'

# loop through all files and append data to list
sbe3_list = []
with open(filenames, 'r') as f:
    for filename in f:
        dive_number = np.int64(filename.split('/')[7][-3:])
        with open(filename.strip(), 'r', encoding='utf-8', errors='ignore') as g:
            for line in g:
                if 'SBE3' in line.strip()[0:4]:
                    if len(line.strip()) == 58: # good lines from this instrument are length 58
                        timestamp, epoch = get_timestamp(line)
                        counts_0 = np.int64(line.strip().split(' ')[4])
                        counts_1 = np.int64(line.strip().split(' ')[6])
                        if counts_0 > 500000 and counts_0 < 815000 and counts_1 > 450000 and counts_1 < 770000:
                            sbe3_list.append([timestamp, epoch, dive_number, counts_0, counts_1])

# convert to dataframe
sbe3 = pd.DataFrame(sbe3_list, columns=['timestamp','epoch','dive_number','counts_0','counts_1'])

# save to hdf5
sbe3.to_hdf('sbe3.h5', 'table', append=False, data_columns=True)

In [None]:
sbe3.epoch.diff().plot(linewidth=0, marker='.', markersize=0.1)