# Transform position data to `.wav` files

Position data for 2014-2018 has been stored in `.mat` files with other data. For convenience, this notebook converts the position data to channels in a `.wav` file. The output filenames match the the name scheme of `.flac` files found elsewhere. This filename matches the `.mat` filename with the two-digit year and underscore portions removed.

Channel order:

0. depth
1. pitch
2. roll
3. head(ing)



In [None]:
import sys
from pathlib import Path
import numpy as np
import pandas as pd
import scipy.io.wavfile as wavfile
from scipy.io import loadmat
from phonlab.utils import dir2df

import matplotlib.pyplot as plt

In [None]:
positiondir = Path('/Users/ronald/src/cetiprojects/combined_data/')

In [None]:
matdf = dir2df(positiondir, fnpat='sw\d+_(?P<tag>\d+[a-z])')
matdf

In [None]:
dfs = []
for row in matdf.itertuples():
    mat = loadmat(positiondir / row.fname, simplify_cells=True)
    wdf = pd.DataFrame(mat['whale'])
    wdf['fname'] = row.fname
    wdf['outwav'] = positiondir / f'sw{row.tag}.dprh.wav'
    # (Some of the?) Columns for which the value in the first row applies to all rows in the .mat file.
    for col in ('TagID', 'Whale', 'MovementFS', 'RecDur', 'TagOnLocal', 'TagOnUTC', 'Sunset'):
        wdf[col] = wdf.iloc[0][col]
    dfs.append(wdf)
divedf = pd.concat(dfs, axis='rows').reset_index(drop=True)
divedf = pd.concat(
    [
        divedf,
        divedf['TagID'].str.extract(r'sw(?P<year>\d+)_(?P<tag>(?P<day>\d+)(?P<tagidx>[a-z]))')
    ], axis='columns'
)
divedf

In [None]:
def tag2wav(tag):
    '''
    Convert position data in a dive dataframe associated with a tag to a
    .wav file.

    There are multiple dives in a single recording, and the position data
    is stored in the first dive.
    '''
    print(f'Working on {tag.name}')
    try:
        d = np.array([
            np.nan_to_num(tag.iloc[0]['Depth'], nan=-100) * -1,
            np.nan_to_num(tag.iloc[0]['Pitch'], nan=1000),
            np.nan_to_num(tag.iloc[0]['Roll'], nan=1000),
            np.nan_to_num(tag.iloc[0]['Head'], nan=1000)
        ]).T
    except ValueError as e:
        if np.isnan(tag.iloc[0]['Pitch']) and np.isnan(tag.iloc[0]['Roll']) and np.isnan(tag.iloc[0]['Head']):
            d = np.array([
                np.nan_to_num(tag.iloc[0]['Depth'], nan=-100) * -1,
                np.zeros(len(tag.iloc[0]['Depth'])) + 1000,
                np.zeros(len(tag.iloc[0]['Depth'])) + 1000,
                np.zeros(len(tag.iloc[0]['Depth'])) + 1000,
            ]).T
        print(f'\n\nCreated placeholder values for missing pitch/roll/head data for tag {tag.name}.\n\n')
    except Exception as e:
        print(f'Error reading positions for tag {tag.name}.\n\n{e}\n\n')
        return
    try:
        rate = int(tag.iloc[0]['MovementFS'])
    except ValueError:
        if np.isnan(tag.iloc[0]['MovementFS']):
            rate = int(25)   # Default to 25Hz; all files appear to be 25Hz rate.
    except Exception as e:
        print(f'Could not get rate for tag {tag.name}.\n\n{e}\n\n')
        return
    try:
        wavfile.write(
            tag.iloc[0]['outwav'],
            rate,
            d
        )
        print(f"Wrote {tag.iloc[0]['outwav']}")
    except Exception as e:
        print(f'Error writing .wav file for tag {tag.name} using rate {rate}.\n\n{e}\n\n')
    return
d = divedf.groupby('tag').apply(tag2wav, include_groups=False)

In [None]:
divedf[divedf['TagID'] == 'sw14_092a']['Depth']