# read_raw

This notebook reads the raw FlowMow Sentry data into Pandas dataframes and saves them to HDF5 files using HDFStore.

#### Setup

In [None]:
%matplotlib notebook
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import datetime as dt
import glob
import subprocess
import os

#### Timestamp extractor function

In [None]:
def get_timestamp(line):
    timestamp = dt.datetime.strptime(' '.join(line.strip().split(' ')[1:3]), '%Y/%m/%d %H:%M:%S.%f')
    epoch = timestamp.replace(tzinfo=dt.timezone.utc).timestamp()
    return timestamp, epoch

#### Import SCC data

In [None]:
%%time
filenames = glob.glob('/home/tjc/research/flowmow/**/*.scc', recursive=True)
filenames.sort()
scc_list = []
for filename in filenames:
    with open(filename, 'r') as f:
        for line in f:
            if 'SCC' in line.strip()[0:3]:
                timestamp, epoch = get_timestamp(line)
                scc_list.append([timestamp, epoch] + list(map(np.float64, line.strip().split(' ')[3:])))           
scc = pd.DataFrame(scc_list, columns=['timestamp','epoch','lat','lon','depth','pressure','heading',
                                      'magx','magy','magz','obs','eh','aux1','aux2','T1','C1',
                                      'T2','C2','S1','S2','ss1','depth_d','height','D1','D2'])

#### Use GMT to get UTM values for Sentry navigation

In [None]:
%%time
tmpfile = dt.datetime.now().strftime("utmx_utmy_%Y%m%d%H%M%S%f")
scc.to_csv(tmpfile, sep=',', columns = ['lon', 'lat'], header=False, index=False)
cmd = 'cat %s | gmt mapproject -Ju9/1 -R-132/-126/40/48 -F' % tmpfile
gmt_output = (subprocess.check_output(cmd, shell=True).decode('utf-8')).split('\n')
os.remove(tmpfile)
os.remove('gmt.history')

utm_x = []
utm_y = []
for i in gmt_output:
    try:
        utm_x.append(np.float64(i.split('\t')[0]))
        utm_y.append(np.float64(i.split('\t')[1]))
    except:
        pass
scc.insert(4, 'utm_x', utm_x)
scc.insert(5, 'utm_y', utm_y)

#### Save SCC to an HDF5 store

In [None]:
%%time
store = pd.HDFStore('flowmow.h5')
store['scc'] = scc
store.flush(fsync=True)

#### Import Paros pressure sensor data

In [None]:
%%time
filenames = glob.glob('/home/tjc/research/flowmow/**/parosKinsey/*.DAT', recursive=True)
filenames.sort()
paros_list = []
i = 0
for filename in filenames:
    with open(filename, 'r') as f:
        for line in f:
            if 'RAW' in line.strip()[0:3]:
                if 'P2=' in line.strip():
                    timestamp, epoch = get_timestamp(line)
                    a = line.strip().split(' ')[3].split(',')[0].split('=')[1]
                    b = line.strip().split(' ')[3].split(',')[1]
                    paros_list.append([timestamp,epoch,a,b])
                    i = i+1
paros = pd.DataFrame(paros_list, columns=['timestamp', 'epoch', 'a', 'b'])

#### Push paros to the store

In [None]:
%%time
store['paros'] = paros
store.flush(fsync=True)

#### Close store

In [None]:
store.close()