# read_raw

This notebook reads the raw FlowMow2 Sentry data into Pandas dataframes and saves them to an HDF5 file using HDFStore. This notebook does not do any processing on the data aside from assigning timestamps.

#### Setup

In [None]:
%matplotlib notebook
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import datetime as dt
import glob

#### Timestamp extractor function

In [None]:
def get_timestamp(line):
    timestamp = dt.datetime.strptime(' '.join(line.strip().split(' ')[1:3]), '%Y/%m/%d %H:%M:%S.%f')
    epoch = timestamp.replace(tzinfo=dt.timezone.utc).timestamp() # 'epoch' is unix time
    return timestamp, epoch

#### Import SCC data

In [None]:
%%time
# get list of data files
filenames = glob.glob('/home/tjc/research/flowmow/**/*.scc', recursive=True)
filenames.sort()

# loop through all files and append data to list
scc_list = []
for filename in filenames:
    dive_number = int(filename.split('/')[5][-3:])
    with open(filename, 'r') as f:
        for line in f:
            if 'SCC' in line.strip()[0:3]:
                timestamp, epoch = get_timestamp(line)
                scc_list.append([timestamp, epoch, dive_number] + list(map(np.float64, line.strip().split(' ')[3:])))

# convert to dataframe
scc = pd.DataFrame(scc_list, columns=['timestamp','epoch','dive_number','lat','lon','depth','pressure',
                                      'heading','magx','magy','magz','obs','eh','aux1','aux2','T1','C1',
                                      'T2','C2','S1','S2','ss1','depth_d','height','D1','D2'])

# store to HDF5 store
store = pd.HDFStore('flowmow.h5')
store['scc'] = scc

#### Import Paros pressure sensor data

In [None]:
%%time
# get list of data files
filenames = glob.glob('/home/tjc/research/flowmow/**/parosKinsey/*.DAT', recursive=True)
filenames.sort()

# loop through all files and append data to list
paros_list = []
i = 0
for filename in filenames:
    dive_number = int(filename.split('/')[5][-3:])
    with open(filename, 'r') as f:
        for line in f:
            if 'RAW' in line.strip()[0:3]:
                if 'P2=' in line.strip():
                    timestamp, epoch = get_timestamp(line)
                    a = line.strip().split(' ')[3].split(',')[0].split('=')[1]
                    b = line.strip().split(' ')[3].split(',')[1]
                    paros_list.append([timestamp, epoch, dive_number, a, b])
                    i = i+1

# convert to dataframe
paros = pd.DataFrame(paros_list, columns=['timestamp', 'epoch', 'dive_number', 'a', 'b'])

# store
store['paros'] = paros

#### Import stinger GX3-25 microstrain IMU data

In [None]:
%%time
# get list of data files
filenames = glob.glob('/home/tjc/research/flowmow/**/ustrainAdv/*.DAT', recursive=True)
filenames.sort()

# loop through all files and append data to list
ustrain_adv_list = []
for filename in filenames:
    dive_number = int(filename.split('/')[5][-3:])
    with open(filename, 'r') as f:
        for line in f:
            if 'MSA3' in line.strip()[0:4]:
                timestamp, epoch = get_timestamp(line)
                ustrain_adv_list.append([timestamp, epoch, dive_number] + list(map(np.float64, line.strip().split(' ')[3:-1])))

# convert to dataframe
ustrain_adv = pd.DataFrame(ustrain_adv_list, columns=['timestamp','epoch','dive_number','a','b','c','d','e','f',
                                                      'g','h','i','j','k','l','m','n','o','p','q',
                                                      'r','s','t','u','v','w','x','y','z','aa','bb'])
# store
store['ustrain_adv'] = ustrain_adv

#### Import chassis GX3-25 microstrain IMU data

In [None]:
%%time
# get list of data files
filenames = glob.glob('/home/tjc/research/flowmow/**/ustrain-chassis/*.DAT', recursive=True)
filenames.sort()

# loop through all files and append data to list
ustrain_chassis_list = []
for filename in filenames:
    dive_number = int(filename.split('/')[5][-3:])
    with open(filename, 'r') as f:
        for line in f:
            if 'MSA3' in line.strip()[0:4]:
                timestamp, epoch = get_timestamp(line)
                ustrain_chassis_list.append([timestamp, epoch, dive_number] + list(map(np.float64, line.strip().split(' ')[3:-1])))

# convert to dataframe
ustrain_chassis = pd.DataFrame(ustrain_chassis_list, columns=['timestamp','epoch','dive_number','a','b','c','d','e','f',
                                                      'g','h','i','j','k','l','m','n','o','p','q',
                                                      'r','s','t','u','v','w','x','y','z','aa','bb'])

# store
store['ustrain_chassis'] = ustrain_chassis

#### Import SBE3 data

In [None]:
%%time
# get list of data files
filenames = glob.glob('/home/tjc/research/flowmow/**/sbe3/*.DAT', recursive=True)
filenames.sort()

# loop through all files and append data to list
sbe3_list = []
for filename in filenames:
    dive_number = int(filename.split('/')[5][-3:])
    with open(filename, 'r') as f:
        for line in f:
            if 'RAW' in line.strip()[0:3]:
                timestamp, epoch = get_timestamp(line)
                sbe3_list.append([timestamp, epoch, dive_number] + list(map(np.uint32, line.strip().split(' ')[4:6])))

# convert to dataframe
sbe3 = pd.DataFrame(sbe3_list, columns=['timestamp','epoch','dive_number','a','b'])

# store
store['sbe3'] = sbe3

#### Close store

In [None]:
store.flush(fsync=True)
store.close()
del store